home
site map
Computer Science
November 2004
email

Histogram sort for integers (fast)

This is a histogram sort for signed integers. It is about three times as fast as qsort.




/*
 * Copyright (c) 2004 by Richard Harter.
 * This software may be freely used and modified without
 * restriction provided that this notice is preserved unaltered.
 *
 */

/*
 * This function sorts an array of integers in place using a math
 * sort based histogram sort.  2*len locations of auxilliary space
 * are used, where len is the data length.
 */
 
#include <stdlib.h>

#define MIN_SORTABLE_LENGTH 128
#define PROCESSED -1

int
int_msort(int * data, int len)
{
   int      data_min;               /* Minimum value in data array */
   int      data_max;               /* Maximum value in data array */
   int      index_min;              /* Index of minimum value      */
   int      i;                      /* Major loop index            */
   int      j;                      /* Minor loop index            */
   int      ifac;                   /* Integer scaling factor      */
   int      temp;                   /* Temp loc for data moves     */

   int      *space;                 /* Ptr to allocated space      */
   int      *cmp_index;             /* Ptr to computed indices     */
   int      *cum;                   /* Ptr to cumulative distrib.  */
   int      *hist;                  /* Ptr to cumulative distrib.  */
   int      *sorted;                /* Ptr to almost sorted data   */

   if (len <= 1) return 1;
   if (len == 2) {
      if (data[0] > data[1]) {
         temp = data[0];
         data[0] = data[1];
         data[1] = temp;
      }
      return 1;
   }
   data_min  = data[0];
   data_max  = data[0];
   index_min = 0;
   for (i=len; --i>0;) {
      if (data[i] < data_min) {
         data_min  = data[i];
         index_min = i;
      }
   }

   if (index_min != 0) {
      data[index_min] = data[0];
      data[0]         = data_min;
   }
   if (len >= MIN_SORTABLE_LENGTH) {
      for (i=len; --i>0;) {
         if (data[i] > data_max) data_max  = data[i];
      }

      /* Compute interpolation function */

      ifac = (data_max - data_min)/(len -1);
      if (ifac<=0) ifac=1;	  
      else while (((data_max-data_min)/ifac)>(len-1)) ifac++;

      /* allocate index and histogram space */

      space = malloc((2*len+1)*sizeof(int));
      if (!space) return 0;
      cmp_index = space;
      cum       = space + len;
      hist      = cum + 1;
      sorted    = hist;
      memset(cum,0,(len+1)*sizeof(int));

      /* Compute raw interpolation indices */

      for (i=len; --i>=0;) {
         hist[cmp_index[i] = (data[i] - data_min)/ifac] += 1;
      }
      for (i=1;i<len;i++) cum[i] += cum[i-1];  
      for (i=0;i<len;i++) cmp_index[i] = cum[cmp_index[i]]++;

      /* Math sort proper */

      for (i=len; --i >= 0;) sorted[cmp_index[i]] = data[i];
      memcpy(data, sorted, len*sizeof(int));
      free (space);
   }

   /* End of math sort section, begin insertion sort section */

   {
      for (i=1; i<len; i++) {
         if (data[i] >= data[i-1]) continue;
         temp = data[i];
         data[i] = data[i-1];
         for (j = i-2; temp < data[j]; j--) data[j+1] = data[j];
         data[j+1] = temp;
      }
   }
   return 1;
}

This page was last updated January 15, 2005.

home
site map
Computer Science
November 2004
email