#include <stdlib.h>
#include <stdio.h>
#include <time.h>


#define NUM 1000000
#define LEN 15

/* This cannot be inlined by MSVC compiler */
/* static int */
/* compare_stringC(const void *s1, const void *s2) */
/* { */
/*   return strncmp(s1, s2, LEN); */
/* } */

static int
compare_stringC(const void *s1, const void *s2)
{
    const unsigned char *c1 = (unsigned char *)s1;
    const unsigned char *c2 = (unsigned char *)s2;
    size_t i;

    for(i = 0; i < LEN; ++i) {
        if (c1[i] != c2[i]) {
            return (c1[i] > c2[i]) ? 1 : -1;
        }
	if (c1[i] == 0) return 0;
    }
    return 0;
}


static int
compare_stringP(const void *s1, const void *s2)
{
    const unsigned char *c1 = (unsigned char *)s1;
    const unsigned char *c2 = (unsigned char *)s2;
    size_t i;

    for(i = 0; i < LEN; ++i) {
        if (c1[i] != c2[i]) {
            return (c1[i] > c2[i]) ? 1 : -1;
        }
    }
    return 0;
}



#define PYA_QS_STACK 100
#define SMALL_QUICKSORT 15

static int
compare_string(char *s1, char *s2, size_t len)
{
    const unsigned char *c1 = (unsigned char *)s1;
    const unsigned char *c2 = (unsigned char *)s2;
    size_t i;

    for(i = 0; i < len; ++i) {
        if (c1[i] != c2[i]) {
            return (c1[i] > c2[i]) ? 1 : -1;
        }
    }
    return 0;
}

#define STRING_LT(pa, pb, len) (compare_string(pa, pb, len) < 0)

static void
swap_string(char *s1, char *s2, size_t len)
{
    while(len--) {
        const char t = *s1;
        *s1++ = *s2;
        *s2++ = t;
    }
}


#define opt_memcpy(a,b,n) memcpy((a),(b),(n))

static void
copy_string(char *s1, char *s2, size_t len)
{
    while(len--) {
        *s1++ = *s2++;
    }
}

/* This seems to work faster on my laptop, but YMMV */
static void
opt_memcpy2(void *a, const void *b, size_t n) {
  size_t i;
  if (n<16)
	for (i=0; i<n; i++)
	  ((char *)a)[i] = ((char *)b)[i];
  else
	memcpy(a, b, n);
}


static int
newqsort(char *start, size_t num, size_t len)
{
    char *vp = malloc(len);
    char *pl = start;
    char *pr = start + (num - 1)*len;
    char *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk;

    for(;;) {
        while ((pr - pl) > 5*len) {
            /* quicksort partition */
            pm = pl + (((pr - pl)/len) >> 1)*len;
            if (STRING_LT(pm, pl, len)) swap_string(pm, pl, len);
            if (STRING_LT(pr, pm, len)) swap_string(pr, pm, len);
            if (STRING_LT(pm, pl, len)) swap_string(pm, pl, len);
            opt_memcpy(vp, pm, len);
            pi = pl;
            pj = pr - len;
            swap_string(pm, pj, len);
            for(;;) {
                do pi += len; while (STRING_LT(pi, vp, len));
                do pj -= len; while (STRING_LT(vp, pj, len));
                if (pi >= pj)  break;
                swap_string(pi, pj, len);
            }
            pk = pr - len;
            swap_string(pi, pk, len);
            /* push largest partition on stack */
            if (pi - pl < pr - pi) {
                *sptr++ = pi + len;
                *sptr++ = pr;
                pr = pi - len;
            }
            else {
                *sptr++ = pl;
                *sptr++ = pi - len;
                pl = pi + len;
            }
        }

        /* insertion sort */
        for(pi = pl + len; pi <= pr; pi += len) {
            opt_memcpy(vp, pi, len);
            pj = pi;
            pk = pi - len;
            while(pj > pl && STRING_LT(vp, pk, len)) {
                opt_memcpy(pj, pk, len);
                pj -= len;
                pk -= len;
            }
            opt_memcpy(pj, vp, len);
        }
        if (sptr == stack) break;
        pr = *(--sptr);
        pl = *(--sptr);
    }

    free(vp);
    return 0;
}


int main(){
  size_t index;
  char *l = malloc(NUM*LEN);
  char *lcpy = malloc(NUM*LEN);
  clock_t last, current;

  printf("Benchmark with %d strings of size %d\n", NUM, LEN);

/*   srand(time(NULL)); */
  srand(1);

  for(index = 0; index < NUM; ++index){
	sprintf(lcpy+index*LEN, "%d%c", rand(), '\0');
  }

  memcpy(l, lcpy, NUM*LEN);

  last = clock();
  qsort(l, NUM, LEN, compare_stringC);
  current = clock();

/*   for(index = 0; index < NUM; ++index){ */
/* 	printf("0-->%s", l+index*LEN); */
/*   } */
/*   printf("\n"); */

  printf("C qsort with C style compare: %f\n", (current-last)/(float)CLOCKS_PER_SEC);

  memcpy(l, lcpy, NUM*LEN);

  last = clock();
  qsort(l, NUM, LEN, compare_stringP);
  current = clock();

/*   for(index = 0; index < NUM; ++index){ */
/* 	printf("1-->%s", l+index*LEN); */
/*   } */
/*   printf("\n"); */

  printf("C qsort with Python style compare: %f\n", (current-last)/(float)CLOCKS_PER_SEC);

  memcpy(l, lcpy, NUM*LEN);

  last = clock();
  newqsort(l, NUM, LEN);
  current = clock();

/*   for(index = 0; index < NUM; ++index){ */
/* 	printf("2-->%s", l+index*LEN); */
/*   } */
/*   printf("\n"); */

  printf("NumPy newqsort: %f\n", (current-last)/(float)CLOCKS_PER_SEC);

  free(l);
  free(lcpy);
  return 0;
}

