Bucket Sort

Bucket Sort

Bucket sort is useful when input is uniformly divided over a range.

This sort is a sorting algorithm that separate the elements into multiple groups known to be buckets. Elements in bucketsort are first uniformly distributed into groups called buckets, and then they are sorting by other sorting algorithm. Then, elements are collected in a sorted manner.

 

 

The procedure of performing the bucket sort as follows –

  • Firstly, partition the range into a fixed number of buckets.
  • After that, toss each element into its appropriate bucket.
  • Then, each bucket sorted ividually by applying a sorting algorithm.
  • And lastly, sequence all the sorted buckets.

 

Algorithm

1.  Let B[0....n-1] be a new array  
2.  n=length[A]  
3.  for i=0 to n-1  
4.  make B[i] an empty list  
5.  for i=1 to n  
6.  do insert A[i] into list B[n a[i]]  
7.  for i=0 to n-1  
8.  do sort list B[i] with insertion-sort  
9.  Concatenate lists B[0], B[1],........, B[n-1] together in order  
End   

Scatter-gather approach

This sort algorithm based on scatter-gather approach. Here, the  elements are firstly scattered into buckets. After the scattering, elements in each bucket are sorted using a sorting algorithm. At last, the sorted elements will be collected in order.

Suppose,  the elements of array are –

 

 

bucket sort

 

 

Now firstly, create buckets with a range from 0 to 25. Buckets range are 0-5, 5-10, 10-15, 15-20, 20-25. Elements are inserted in to the buckets by its bucket range. Let the value of an item is 9, so it will be inserted in the bucket with the range 5-10. Similarly, every item of the array will insert like this.

This phase is called as the scattering of array elements.

 

 

bucket sort

 

Now, sorting the each bucket individually. The elements of each bucket is sorted by using any sorting algorithms.

 

 

bucket sort

 

At last, collect the sorted elements from each bucket in order

 

bucket sort

 

Now, the array is sorted.

 

 

Program of bucketsort in C

#include <stdio.h>
#include <stdlib.h>

#define NARRAY 7   // Array Size
#define NBUCKET 6  // Number of buckets
#define INTERVAL 10  // bucket capacity

struct Node {
  int data;
  struct Node *next;
};

void BucketSort(int arr[]);
struct Node *InsertionSort(struct Node *list);
void print(int arr[]);
void printBuckets(struct Node *list);
int getBucketIndex(int value);

//  function
void BucketSort(int arr[]) {
  int i, j;
  struct Node **buckets;

  // Create buckets 
  buckets = (struct Node **)malloc(sizeof(struct Node *) * NBUCKET);

  // Initialize buckets
  for (int i = 0; i < NBUCKET; ++i) {
    buckets[i] = NULL;
  }

  for (int i = 0; i < NARRAY; ++i) {
    struct Node *current;
    int pos = getBucketIndex(arr[i]);
    current = (struct Node *)malloc(sizeof(struct Node));
    current->data = arr[i];
    current->next = buckets[pos];
    buckets[pos] = current;
  }

  // Printing buckets 
  for (int i = 0; i < NBUCKET; i++) {
    printf("Bucket[%d]: ", i);
    printBuckets(buckets[i]);
    printf("\n");
  }

  // Sort elements 
  for (int i = 0; i < NBUCKET; ++i) {
    buckets[i] = InsertionSort(buckets[i]);
  }

  printf("-------------\n");
  printf("Bucktets after sorting\n");
  for (int i = 0; i < NBUCKET; i++) {
    printf("Bucket[%d]: ", i);
    printBuckets(buckets[i]);
    printf("\n");
  }

  // sorted elements
  for (int j = 0, i = 0; i < NBUCKET; ++i) {
    struct Node *node;
    node = buckets[i];
    while (node) {
      arr[j++] = node->data;
      node = node->next;
    }
  }

  return;
}

// Function 
struct Node *InsertionSort(struct Node *list) {
  struct Node *k, *nodeList;
  if (list == 0 || list->next == 0) {
    return list;
  }

  nodeList = list;
  k = list->next;
  nodeList->next = 0;
  while (k != 0) {
    struct Node *ptr;
    if (nodeList->data > k->data) {
      struct Node *tmp;
      tmp = k;
      k = k->next;
      tmp->next = nodeList;
      nodeList = tmp;
      continue;
    }

    for (ptr = nodeList; ptr->next != 0; ptr = ptr->next) {
      if (ptr->next->data > k->data)
        break;
    }

    if (ptr->next != 0) {
      struct Node *tmp;
      tmp = k;
      k = k->next;
      tmp->next = ptr->next;
      ptr->next = tmp;
      continue;
    } else {
      ptr->next = k;
      k = k->next;
      ptr->next->next = 0;
      continue;
    }
  }
  return nodeList;
}

int getBucketIndex(int value) {
  return value / INTERVAL;
}

void print(int ar[]) {
  int i;
  for (i = 0; i < NARRAY; ++i) {
    printf("%d ", ar[i]);
  }
  printf("\n");
}

// Print buckets
void printBuckets(struct Node *list) {
  struct Node *cur = list;
  while (cur) {
    printf("%d ", cur->data);
    cur = cur->next;
  }
}

int main(void) {
  int array[NARRAY] = {42, 32, 33, 52, 37, 47, 51};

  printf("Initial array: ");
  print(array);
  printf("-------------\n");

  BucketSort(array);
  printf("-------------\n");
  printf("Sorted array: ");
  print(array);
  return 0;
}

Output

Initial array: 42 32 33 52 37 47 51 
-------------
Bucket[0]: 
Bucket[1]: 
Bucket[2]: 
Bucket[3]: 37 33 32 
Bucket[4]: 47 42 
Bucket[5]: 51 52 
-------------
Bucktets after sorting
Bucket[0]: 
Bucket[1]: 
Bucket[2]: 
Bucket[3]: 32 33 37 
Bucket[4]: 42 47 
Bucket[5]: 51 52 
-------------
Sorted array: 32 33 37 42 47 51 52

 

Program of bucketsort in C++

#include <iomanip>
#include <iostream>
using namespace std;

#define NARRAY 7   // Array size 
#define NBUCKET 6  //Number of buckets
#define INTERVAL 10  //bucket capacity

struct Node {
  int data;
  struct Node *next;
};

void BucketSort(int arr[]);
struct Node *InsertionSort(struct Node *list);
void print(int arr[]);
void printBuckets(struct Node *list);
int getBucketIndex(int value);

// Sorting function
void BucketSort(int arr[]) {
  int i, j;
  struct Node **buckets;

  // Create buckets 
  buckets = (struct Node **)malloc(sizeof(struct Node *) * NBUCKET);

  // Initialize buckets
  for (int i = 0; i < NBUCKET; ++i) {
    buckets[i] = NULL;
  }

  for (int i = 0; i < NARRAY; ++i) {
    struct Node *current;
    int pos = getBucketIndex(arr[i]);
    current = (struct Node *)malloc(sizeof(struct Node));
    current->data = arr[i];
    current->next = buckets[pos];
    buckets[pos] = current;
  }

  // Printing buckets
  for (int i = 0; i < NBUCKET; i++) {
    cout << "Bucket[" << i << "] : ";
    printBuckets(buckets[i]);
    cout << endl;
  }

  // Sorting elements
  for (i = 0; i < NBUCKET; ++i) {
    buckets[i] = InsertionSort(buckets[i]);
  }

  cout << "-------------" << endl;
  cout << "Bucktets after sorted" << endl;
  for (int i = 0; i < NBUCKET; i++) {
    cout << "Bucket[" << i << "] : ";
    printBuckets(buckets[i]);
    cout << endl;
  }

  for (int j = 0, i = 0; i < NBUCKET; ++i) {
    struct Node *node;
    node = buckets[i];
    while (node) {
      arr[j++] = node->data;
      node = node->next;
    }
  }

  for (int i = 0; i < NBUCKET; ++i) {
    struct Node *node;
    node = buckets[i];
    while (node) {
      struct Node *tmp;
      tmp = node;
      node = node->next;
      free(tmp);
    }
  }
  free(buckets);
  return;
}

// Function
struct Node *InsertionSort(struct Node *list) {
  struct Node *k, *nodeList;
  if (list == 0 || list->next == 0) {
    return list;
  }

  nodeList = list;
  k = list->next;
  nodeList->next = 0;
  while (k != 0) {
    struct Node *ptr;
    if (nodeList->data > k->data) {
      struct Node *tmp;
      tmp = k;
      k = k->next;
      tmp->next = nodeList;
      nodeList = tmp;
      continue;
    }

    for (ptr = nodeList; ptr->next != 0; ptr = ptr->next) {
      if (ptr->next->data > k->data)
        break;
    }

    if (ptr->next != 0) {
      struct Node *tmp;
      tmp = k;
      k = k->next;
      tmp->next = ptr->next;
      ptr->next = tmp;
      continue;
    } else {
      ptr->next = k;
      k = k->next;
      ptr->next->next = 0;
      continue;
    }
  }
  return nodeList;
}

int getBucketIndex(int value) {
  return value / INTERVAL;
}

// Print buckets
void print(int ar[]) {
  int i;
  for (i = 0; i < NARRAY; ++i) {
    cout << setw(3) << ar[i];
  }
  cout << endl;
}

void printBuckets(struct Node *list) {
  struct Node *cur = list;
  while (cur) {
    cout << setw(3) << cur->data;
    cur = cur->next;
  }
}

int main(void) {
  int array[NARRAY] = {42, 32, 33, 52, 37, 47, 51};

  cout << "Initial array: " << endl;
  print(array);
  cout << "-------------" << endl;

  BucketSort(array);
  cout << "-------------" << endl;
  cout << "Sorted array: " << endl;
  print(array);
}

Output

Initial array: 
 42 32 33 52 37 47 51
-------------
Bucket[0] : 
Bucket[1] : 
Bucket[2] : 
Bucket[3] :  37 33 32
Bucket[4] :  47 42
Bucket[5] :  51 52
-------------
Bucktets after sorted
Bucket[0] : 
Bucket[1] : 
Bucket[2] : 
Bucket[3] :  32 33 37
Bucket[4] :  42 47
Bucket[5] :  51 52
-------------
Sorted array: 
 32 33 37 42 47 51 52

Program of bucket sort in Python

def bucketSort(array):
    bucket = []

    # Creating buckets
    for i in range(len(array)):
        bucket.append([])

    # Inserting the elements
    for j in array:
        index_b = int(10 * j)
        bucket[index_b].append(j)

    # Sorting elements
    for i in range(len(array)):
        bucket[i] = sorted(bucket[i])

    k = 0
    for i in range(len(array)):
        for j in range(len(bucket[i])):
            array[k] = bucket[i][j]
            k += 1
    return array


array = [.42, .32, .33, .52, .37, .47, .51]
print("Sorted Array in descending order is")
print(bucketSort(array))

Output

Sorted Array in descending order is
[0.32, 0.33, 0.37, 0.42, 0.47, 0.51, 0.52]

Program of bucket sort in Java

import java.util.ArrayList;
import java.util.Collections;

public class BucketSort {
  public void bucketSort(float[] arr, int n) {
    if (n <= 0)
      return;
    @SuppressWarnings("unchecked")
    ArrayList<Float>[] bucket = new ArrayList[n];

    // Creating buckets
    for (int i = 0; i < n; i++)
      bucket[i] = new ArrayList<Float>();

    // Adding elements 
    for (int i = 0; i < n; i++) {
      int bucketIndex = (int) arr[i] * n;
      bucket[bucketIndex].add(arr[i]);
    }

    // Sorting elements
    for (int i = 0; i < n; i++) {
      Collections.sort((bucket[i]));
    }

    // sorted array
    int index = 0;
    for (int i = 0; i < n; i++) {
      for (int j = 0, size = bucket[i].size(); j < size; j++) {
        arr[index++] = bucket[i].get(j);
      }
    }
  }

  public static void main(String[] args) {
    BucketSort b = new BucketSort();
    float[] arr = { (float) 0.42, (float) 0.32, (float) 0.33, (float) 0.52, (float) 0.37, (float) 0.47,
        (float) 0.51 };
    b.bucketSort(arr, 7);

    for (float i : arr)
      System.out.print(i + "  ");
  }
}

Output

0.32  0.33  0.37  0.42  0.47  0.51  0.52

 

Bucket Sort Complexity

Time Complexity
Best O(n+k)
Worst O(n2)
Average O(n)
Space Complexity O(n+k)
Stability Yes

 

The best case complexity is O(n + k) and average-case complexity is O(n + k),

The worst-case complexity is O(n2), where n is the items numbers.

 

 

Advantages of bucket sort

 

  • This sort reduces the number of comparisons.
  • It is asymptotically fast because their are  uniform distribution of elements.

The limitations of bucket sort

 

  • This sort is may or may not be a stable sorting algorithm.
  • It is not useful if we have a large number of array because it increases the cost.
  • It is not an inplace sorting algorithm, because some extra space is required to sort the buckets.

 

 

Bucket Sort Applications

 

  • This sort is used when input is uniformly distributed over a range.
  •  And there are floating point values

 

Read more, Radix Sort

Share this post

Leave a Reply

Your email address will not be published. Required fields are marked *