Example C code. It uses fewer local variables than the example linked to in the original post, allowing a C compiler to use registers for those variables. This program takes less than 0.5 second to sort 20 million (20*1024*1024 = 20971520) 64 bit unsigned integers by the upper 32 bits on my system (Intel 3770K 3.5ghz cpu, Windows 7 Pro 64 bit).
/* radix sort via upper 32 bits of 64 bit unsigned integers */
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
typedef unsigned long long uint64_t;
void RadixSort(uint64_t * pData, uint64_t * pTemp, size_t count)
{
size_t mIndex[4][256] = { 0 }; /* index matrix */
size_t * pmIndex; /* ptr to row of matrix */
size_t i, j, m, n;
uint64_t u;
for (i = 0; i < count; i++) { /* generate histograms */
u = pData[i];
mIndex[3][(u >> 32) & 0xff]++;
mIndex[2][(u >> 40) & 0xff]++;
mIndex[1][(u >> 48) & 0xff]++;
mIndex[0][(u >> 56) & 0xff]++;
}
for (j = 0; j < 4; j++) { /* convert to indices */
pmIndex = mIndex[j];
n = 0;
for (i = 0; i < 256; i++) {
m = pmIndex[i];
pmIndex[i] = n;
n += m;
}
}
for (i = 0; i < count; i++) { /* radix sort */
u = pData[i];
pTemp[mIndex[3][(u >> 32) & 0xff]++] = u;
}
for (i = 0; i < count; i++) {
u = pTemp[i];
pData[mIndex[2][(u >> 40) & 0xff]++] = u;
}
for (i = 0; i < count; i++) {
u = pData[i];
pTemp[mIndex[1][(u >> 48) & 0xff]++] = u;
}
for (i = 0; i < count; i++) {
u = pTemp[i];
pData[mIndex[0][(u >> 56) & 0xff]++] = u;
}
}
#define COUNT (20*1024*1024) /* number of elements */
static clock_t dwTimeStart; /* clock values */
static clock_t dwTimeStop;
int main( )
{
uint64_t * pData;
uint64_t * pTemp;
uint64_t r;
size_t i;
/* allocate memory */
pData = (uint64_t *)malloc(COUNT*sizeof(uint64_t));
if(pData == NULL){
return 0;
}
pTemp = (uint64_t *)malloc(COUNT*sizeof(uint64_t));
if(pTemp == NULL){
free(pData);
return 0;
}
for(i = 0; i < COUNT; i++){ /* generate test data */
r = (((uint64_t)((rand()>>4) & 0xff))<< 0);
r |= (((uint64_t)((rand()>>4) & 0xff))<< 8);
r |= (((uint64_t)((rand()>>4) & 0xff))<<16);
r |= (((uint64_t)((rand()>>4) & 0xff))<<24);
r |= (((uint64_t)((rand()>>4) & 0xff))<<32);
r |= (((uint64_t)((rand()>>4) & 0xff))<<40);
r |= (((uint64_t)((rand()>>4) & 0xff))<<48);
r |= (((uint64_t)((rand()>>4) & 0xff))<<56);
pData[i] = r;
}
dwTimeStart = clock();
RadixSort(pData, pTemp, COUNT); /* sort array */
dwTimeStop = clock();
printf("Number of ticks %d
", dwTimeStop-dwTimeStart);
for(i = 1; i < COUNT; i++){ /* check sort */
if((pData[i-1]>>32) > (pData[i]>>32)){
break;
}
}
if(i != COUNT)
printf("sort error
");
free(pData);
return(0);
}
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…