Quick Sort, quicker, stable and anti-degeneration
Author |
Message |
Akira Wad #1 / 2
|
 Quick Sort, quicker, stable and anti-degeneration
/* Quick Sort, quicker, stable and anti-degeneration (qsortv.c). Please test and evaluate on the various platforms ! Thanks. e-mail : Akira Wada
/*============================================================================*/ /* a-wada/qsortlib/ccc/qsortv.c Aug. 2, 1998 */ /* Copyright (c) 1998 Akira Wada
/* <<< Example of QSORTBASE to generate sort subroutine Quote: >>> */
/* Quick sort compatible with "qsort in stdlib.h" (3) */ /* for efficiency about 1/3 sorting time by pointer_sort etc, */ /* stability to preserve the initial order for equal sort keys, and */ /* prevention from the degeneration caused by peculiar input, */ /* qsortv.c from qsortc.c optimized in level of C language */ #include <string.h> #include <stdlib.h> #define MDT 16 /* threshold for median of the 5 or the more, MDT Quote: >= 8 */
#define MDA 2 /* adjusting threshold slightly, MDT + MDA Quote: >= 4 */
#define MDM 2 /* multiplier for threshold, MDM Quote: >= 2 */
#define DGT 256 /* threshold of p-size for checking degeneration */ #define DGV 16 /* degeneration assumed if the smaller < (pz / DGV) */ #define DGM 0 /* threshold to STG-1 : selecting no median */ #define DGS 2 /* threshold to STG-2 : samples distributed uniformly */ #define DGU 4 /* threshold to STG-3 : sampling at random */ #define DGA 64 /* base of p-size to distribute samples uniformly */ #define DGB 2 /* log2 of divider for p-size < base ( -> 4 ) */ #define DGC 6 /* log2 of upper limit of divider ( -> 64 ) */ #define DGD 4 /* log2 of multiplier for base ( -> 16 ) */ #define SAMPLRDM(m, s, e) if (m != (n = s + ( \ randmn ((e - (s)) + 1)))) an = *n, *n = *m, *m = an static unsigned long randmn (unsigned long); static unsigned long setseed (unsigned long); typedef int (*ftp) (const void *, const void *); typedef char ** PTRTYP; void qsort (void *av, size_t rn, size_t rl, ftp qcmp) { PTRTYP i, j, k, l, m, n, r, *p, *s; char *ai, *aj, *ak, *am, *an, **ll, **rr; int pz, tr, iv, cw, sq, dgn = 0, t = sizeof (size_t) * 8 * 2; if (rn > 1 && rl > 0) { p = s = (PTRTYP *) malloc (sizeof (PTRTYP) * t); l = ll = (char **) malloc (sizeof (char *) * rn); r = rr = ll + (rn - 1); ai = (char *) av; setseed (0); for (i = l; i <= r; i++, ai += rl) *i = ai; for ( ; ; ) { m = l + (size_t) (pz = r - l) / 2, am = *m; i = l, j = r, iv = 1, sq = 1, tr = MDT, pz -= (MDA); if (pz > MDT && dgn > DGS) { tr = DGA, iv = DGB; while (pz >= tr && iv < DGC) tr <<= DGD, iv++; iv = pz >> iv; if (dgn > DGU) { SAMPLRDM (i, i, j); SAMPLRDM (j, i + 1, j); SAMPLRDM (m, i + 1, j - 1); am = *m;}} if (pz <= MDT || dgn <= DGM || dgn > DGS) for ( ; ; ) { ai = *i, aj = *j; if ((cw = (*qcmp) (am, aj)) < 0 || cw == 0 && am < aj) { if (i < m && !((cw = (*qcmp) (ai, am)) < 0 || cw == 0 && ai < am)) { *i = am, am = ai, sq = 0; for (n = m, k = j; ; ) { if (!((cw = (*qcmp) (am, aj)) < 0 || cw == 0 && am < aj)) n = k, am = aj; if ((k += iv) <= r) aj = *k; else break;} if (m != n) *n = ai; *m = am;}} else { if (i >= m || !((cw = (*qcmp) (ai, am)) < 0 || cw == 0 && ai < am)) *i = aj, *j = ai, sq = 0; else { *j = am, am = aj, sq = 0; for (n = m, k = i; ; ) { if (!((cw = (*qcmp) (ai, am)) < 0 || cw == 0 && ai < am)) n = k, am = ai; if (l <= (k -= iv)) ai = *k; else break;} if (n != m) *n = aj; *m = am;}} i += iv, j -= iv; if (iv > 1) { if (i >= m || m >= j) { i = l + 1, j = r - 1; break;} if (dgn > DGU) { SAMPLRDM (i, i + 1 - iv, m - 1); SAMPLRDM (j, m + 1, j - 1 + iv);} continue;} if (pz > tr) tr *= MDM; else break;} if (pz > MDT && sq != 0) { k = l, aj = *k; while (k < r && ((ai = aj, aj = *(k + 1), cw = ( *qcmp)(ai, aj)) < 0 || cw == 0 && ai < aj)) k++; if (k >= r) i = j; else if (k >= m) i = m;} if (i < j) { for ( ; ; ) { while (ai = *i, i < m && (( cw = (*qcmp) (ai, am)) < 0 || cw == 0 && ai < am)) i++; while (aj = *j, m < j && (( cw = (*qcmp) (am, aj)) < 0 || cw == 0 && am < aj)) j--; if (i < j) *i = aj, *j = ai; else break; if (m == j) m = i, j--; else if (i == m) m = j, i++; else j--, i++;} if ((--i) - l < r - (++j)) { if (pz >= DGT && i - l < pz / DGV) dgn++; if (l >= i) l = j; else *(p++) = j, *(p++) = r, r = i;} else { if (pz >= DGT && r - j < pz / DGV) dgn++; if (j >= r) r = i; else *(p++) = l, *(p++) = i, l = j;}} else { if (!(s >= p)) r = *(--p), l = *(--p); else break;}} ai = ak = (char *) av; an = (char *) malloc (sizeof (char) * rl); for (i = ll; i <= rr; i++, ai += rl) if ((aj = *i) != ai) { memcpy (an, ai, rl), am = ai, m = i; do *m = memcpy (am, aj, rl), am = aj, m = ll + (aj - ak) / rl; while ((aj = *m) != ai); *m = memcpy (am, an, rl);} free (an); free (ll); free (s);}} /* randmn.c */ static unsigned long random (); /** if not in <stdlib.h> **/ static unsigned long randmn (unsigned long n) { long t, s = 0x7fffffff / n; s = s * n; do t = random (); while (t > s); return (t % n);} /* randmn.c end */ /* setseed.c */ #include <time.h> static void srandom (unsigned long); /** if not in <stdlib.h> **/ static unsigned long setseed (unsigned long seed) { long h, t; if (seed == 0) { t = time (0); h = t / 3600; t = t - h * 3600; seed = (unsigned long) (h + t * 3601);} srandom (seed); return seed;} /* setseed.c end */ /* random.c ** if not in <stdlib.h> ** from ftp://ftp.freebsd.org/pub/FreeBSD /FreeBSD-current/src/sys/libkern/random.c Sat Feb 22, 1997 Copyright (c) 1992, 1993 The Regents of the University of California. */ static unsigned long randseed = 1; static void srandom (unsigned long seed) { randseed = seed;} static unsigned long random () { register long x, hi, lo, t; x = randseed; hi = x / 127773; lo = x % 127773; t = 16807 * lo - 2836 * hi; if (t <= 0) t += 0x7fffffff; randseed = t; return (t);} /* random.c end */ /* a-wada/qsortlib/ccc/qsortv.c end */ /*============================================================================*/ /* *** << Summary of A Bench-mark >> *** * average of 10 cases, N = 10,000 element-size = 32(A), 128(B) bytes * key1 = string [16] all same chars, key2 = double random unique time (10msec) *pno A B ky-cmp el-trf program & comment sys 46 84 150536 - qsort.stdlib.h of the system (SunOS 5.4 ss20-40) qs-3 46 84 150536 73809 qsort-Emacs.c (e) ftp: 3.= SunOS 5.4 != 5.5.1 qs-4 43 78 142661 73813 qsort-Postgrs.c (g) ftp: 4. qs-5 35 49 135937 74919 qsort-FreeBsd.c (f) ftp: 5. qs-6 34 45 140823 75345 qsort-Darcy.c (h) ftp: 6. qs-7 33 45 132017 64184 qsort-Kawamr6.c (k6) http: 7. qs-8 33 46 134702 70700 qsort-Kawamr7.c (k7) http: 7. ftp: 3. ftp://ring.aist.go.jp/pub/TeX/CTAN /indexing/makeindex/src/qsort.c May 26, 1993 ftp: 4. ftp://ftp.sra.co.jp/pub/cmd/postgres/6.2.1 /postgresql-6.2.1/src/backend/lib/qsort.c Oct 20, 1997 ftp: 5. ftp://ftp.jp.freebsd.org/pub/FreeBSD /FreeBSD-current/src/sys/libkern/qsort.c Feb 22, 1997 ftp: 6. ftp://ftp.nizkor.vex.net/local/src/darcy/common/qsort.c Jan 22, 1997 http: 7. http://www.*-*-*.com/ ~kawamura/ qs6.c & qs7.c Nov 27, 1997 the following codes and more informations on the web-site, http://www.*-*-*.com/ ~a-wada/qsortlib.html qs11 28 43 125511 74546 qsorta.c tailor-made, comp & swap inline qs14 31 46 125511 74546 qsortb.c compatible, word-swap, no stability qs17 35 38 125511 10007 qsortc.c compatible(ptr-sort) stablty, anti-dgn qs18* 33 36 125542 10007 **qsortv.c**(optimized in C from qsortc.c) qs20 30 33 125511 10007 qsortd.c tailor-made(ptr-sort) stblty, anti-dgn qs22 24 27 (125511) (10007) qsortds.s (optimized in Asm from
... read more »
|
Thu, 08 Mar 2001 03:00:00 GMT |
|
 |
Akira Wad #2 / 2
|
 Quick Sort, quicker, stable and anti-degeneration
Quote:
>>/* Quick Sort, quicker, stable and anti-degeneration (qsortv.c). >> Please test and evaluate on the various platforms ! >Why should anyone do your grunt work for you? **A** >>void qsort (void *av, size_t rn, size_t rl, ftp qcmp) { >A C program which defines an external name that matches one of the external >names defined by the standard library invokes undefined behavior. >You should call the function something else. **B** >> PTRTYP i, j, k, l, m, n, r, *p, *s; >> char *ai, *aj, *ak, *am, *an, **ll, **rr; >> int pz, tr, iv, cw, sq, dgn = 0, t = sizeof (size_t) * 8 * 2; >> if (rn > 1 && rl > 0) { >> p = s = (PTRTYP *) malloc (sizeof (PTRTYP) * t); >> l = ll = (char **) malloc (sizeof (char *) * rn); >> r = rr = ll + (rn - 1); ai = (char *) av; setseed (0); >> for (i = l; i <= r; i++, ai += rl) *i = ai; >> for ( ; ; ) { >> m = l + (size_t) (pz = r - l) / 2, am = *m; >> i = l, j = r, iv = 1, sq = 1, tr = MDT, pz -= (MDA); >This code is a little hard to read because, for starters, it uses a plethora >of cryptic variable names. And it's an understatement to say that there is a >dearth of comments. >Now it may be the case that these entities are simply not easy to name, or >that descriptive names would not not only fail to help, but only bloat the >text to make it even less comprehensible. **C** >Why don't you write a paper outlining the algorithm and perhaps include some >algorithm analysis. That would be worth much more than a benchmark against >some existing sort implementations; benchmark results can be affected by how >well a given algorithm is implemented, and on what type of machine the test is >run. >Then post your paper to an appropriate newsgroup, such as comp.programming. >Your implementation of this algorithm is simply too bogged down with C >programming details; someone wishing to understand the method has to >essentially reverse engineer the algorithm out of the code. **D** >> *m = memcpy (am, an, rl);} >> free (an); free (ll); free (s);}} >Ouch, this is an awful, not to mention unusual style for closing your >parentheses. I doubt that any software house would tolerate it. I have to >use a bracket-matching feature in my text editor just to be able to tell which >block is being closed. >You see that style in TeX code, but there is a reason for that: whitespace >is significant in TeX in ways not encountered in C.
**E** Sorry much for codes and messages with annoyed formats by my mistake, and thanks a lot for kindhearted advices, but please let me excuse myself,- **A** : Because the platforms available to me are limited a few, and it would be considered that the same implementation in C might behave differently on the different platforms, especially concerning the efficiency e.g. threshold of elmentsize for direct-swapping-sort or pointer-sort. And not for only myself, but for testers interested, and if they would respond the results to newsgroups, for readers of newsgroups. **B** : Intentionally named same, to override the system routine, and to keep portability of the application programs, i.e. the caller programs. And at least on the platforms I'd tested, it was not complained by compiler nor linker, and ran well expectedly. **C** : Sure, but variable-names are not entirely undescriptive, "l" points 'leftend', "r" 'rightend', "m" 'middle of current partition', and "tr", "pz", "iv" represent 'threshold', 'partition-size', 'interval' respectively, etc.. **E** : Well, when we deliver source codes to customers, we too observe the styles required, but they are too various to be standard. And personally, parentheses which are floating alone in lines, make me unrest, probably because I'd been using fortran, COBOL, and ASSEMBLERs for long years before C language, and short variable-names by the same reason. Instead I'm endeavoring to indent clearly to separate blocks. **D** : Reverse engineerings, of course without infringement of copyright, would be useful to brush up one's programming technique, I'd been forced to believe. I'd prepared the papers on my private web-site, and mentioned URL in the article, and c.l.c is the last one of the list of newsgroups multi-posted to, but only one respond was from c.l.c., I guess, thanks a lot! I'm wondering yet whether I should post the following before previous one, and afraid to be scolded for using the "#define macros" immoderately. ** Sorry again if too dogmatic ** /*============================================================================*/ /* a-wada/qsortlib/ccc/qsortb.c Sep. 20, 1997 */
/* <<< Example of QSORTBASE to generate sort subroutine >>> */ /* Quick sort compatible with "qsort in stdlib.h" (1) */ /* for efficiency about 1/2 sorting time by reducing the times of */ /* key_comparisons, and word_mode_swapping for word_aligned object. */ typedef int (*ftp) (const void *, const void *); typedef unsigned int size_t; /*se* void swap (char *, char *, size_t); *se*/ typedef char * PTRTYP; #define ARGMTS void *av, size_t rn, size_t rl, ftp qcmp #define DEFWRK STACK SWPWRK #define VALID rn > 1 && rl > 0 #define HSKP l = (char *) av, r = l + (rn - 1) * rl; STKINI SWPINI #define EZ (int) rl #define ORDR(i, j) ((*qcmp) (i, j) <= 0) #define SWAP(i, j) SWPINL (i, j) /*si* *si*/ /*se* swap (i, j, rl); *se*/ #define CLNUP STKEND #define STACK PTRTYP *p, s[sizeof (size_t) * 8 * 2]; #define STKINI p = s; #define PUSH(x, y) *(p++) = x, *(p++) = y, #define EMPTY (s >= p) #define POP(y, x) y = *(--p), x = *(--p); #define STKEND /* NOTHING */ #define SWPWRK int b, t, w, z = sizeof (int); /*si* *si*/ #define SWPINI b = ((int) av | rl) & (z - 1); /*si* *si*/ #define SWPINL(i, j) { /*si* *si*/ \ if (b != 0) { \ w = rl - 1; do t = *(i + w), *(i + w) = *(j + w), \ *(j + w) = t; while (--w >= 0);} \ else { \ w = rl - z; do t = *((int *) (i + w)), \ *((int *) (i + w)) = *((int *) (j + w)), \ *((int *) (j + w)) = t; while ((w -= z) >= 0);}} /*si* This inline swap routine is optimized for sparc architecture, *si*/ /*si* especially on condition that size of int is 4 or 8 bytes, *si*/ /*si* it might have to be changed for the other systems. *si*/ #define AVDGN /* AVDGN_E not installed */ #define BRDGN /* BRDGN_E not installed */ #define CKDGN /* CKDGN_E not installed */ #define DFDGN /* DFDGN_E not installed */ /* unsigned long random (); * if not in #include <stdlib.h> */ /* if the unique key in data object and/or compare function available, revive the above anti-degeneration macros commented out, for short object especially for the array of pointers prepared by user for the ojects with large or variable length etc, to eliminate overhead of indirect comparison duplicated by using qsortc.c or qsortv.c */ #include "qsortbase.h" QSORTBASE (qsort) /* a-wada/qsortlib/ccc/qsortb.c end */ /*============================================================================*/ /* a-wada/qsortlib/ccc/qsortc.c Sep. 20, 1997 */
/* <<< Example of QSORTBASE to generate sort subroutine >>> */ /* Quick sort compatible with "qsort in stdlib.h" (2) */ /* for efficiency about 1/3 sorting time by pointer_sort etc, */ /* stability to preserve the initial order for equal sort keys, and */ /* prevention from the degeneration caused by peculiar input */ #include <string.h> #include <stdlib.h> typedef int (*ftp) (const void *, const void *); typedef char ** PTRTYP; #define ARGMTS void *av, size_t rn, size_t rl, ftp qcmp #define DEFWRK STACK UNQKEY #define VALID rn > 1 && rl > 0 #define HSKP STKINI UKYINI #define EZ 1 #define ORDR(i, j) UORDR (i, j) #define SWAP(i, j) USWAP (i, j) #define CLNUP UARGELM UKYEND STKEND #define STACK PTRTYP *p, *s; int t = sizeof (size_t) * 8 * 2; #define STKINI p = s = (PTRTYP*) malloc (sizeof (PTRTYP) * t); #define PUSH(x, y) *(p++) = x, *(p++) = y, #define EMPTY (s >= p) #define POP(y, x) y = *(--p), x = *(--p); #define STKEND free (s); #define UNQKEY char *ai, *aj, *ak, *am, *an, **ll, **rr; int cw; #define UKYINI l = ll = (char **) malloc (sizeof (char *) * rn); \ r = rr = l + (rn - 1); ai = (char *) av; \ for (i = l; i <= r; i++, ai += rl) *i = ai; #define UORDR(i, j) ((cw = (*qcmp) (a ## i = *i, a ## j = *j)) < 0 || \ cw == 0 && a ## i < a ## j) #define USWAP(i, j) a ## i = *i, a ## j = *j, *j = a ## i, *i = a ## j; #define UARGELM l = ll, r = rr, ai = ak = (char *) av, tr = rl; \ an = (char *) malloc (sizeof (char) * tr); \ for (i = l; i <= r; i++, ai += tr) if ((aj = *i) != ai) { \ memcpy (an, ai, tr), am = ai, m = i; do \ *m =
... read more »
|
Mon, 12 Mar 2001 03:00:00 GMT |
|
|
|