Quick Sort, quicker, stable and anti-degeneration 
Author Message
 Quick Sort, quicker, stable and anti-degeneration

/*  Quick Sort, quicker, stable and anti-degeneration (qsortv.c).

    Please test and evaluate on the various platforms !

    Thanks.                      e-mail :  Akira Wada

/*============================================================================*/
/*  a-wada/qsortlib/ccc/qsortv.c                                Aug.  2,
1998 */
/*  Copyright (c) 1998  Akira Wada  

/*  <<< Example of QSORTBASE to generate sort subroutine

Quote:
>>>                  */

/*      Quick sort compatible with "qsort in stdlib.h"
(3)                    */
/*      for efficiency about 1/3 sorting time by pointer_sort
etc,            */
/*          stability to preserve the initial order for equal sort keys,
and  */
/*          prevention from the degeneration caused by peculiar
input,        */
/*      qsortv.c from qsortc.c optimized in level of C
language               */

#include <string.h>
#include <stdlib.h>
#define MDT     16   /* threshold for median of the 5 or the more,   MDT

Quote:
>= 8 */

#define MDA      2   /* adjusting threshold slightly,          MDT + MDA
Quote:
>= 4 */

#define MDM      2   /* multiplier for threshold,                    MDM

Quote:
>= 2 */

#define DGT    256   /* threshold of p-size for checking
degeneration         */
#define DGV     16   /* degeneration assumed if the smaller < (pz /
DGV)      */
#define DGM      0   /* threshold to STG-1 : selecting no
median              */
#define DGS      2   /* threshold to STG-2 : samples distributed
uniformly    */
#define DGU      4   /* threshold to STG-3 : sampling at
random               */
#define DGA     64   /* base of p-size to distribute samples
uniformly        */
#define DGB      2   /* log2 of divider for p-size < base ( ->  4
)           */
#define DGC      6   /* log2 of upper limit of divider    ( -> 64
)           */
#define DGD      4   /* log2 of multiplier for base       ( -> 16
)           */
#define SAMPLRDM(m, s, e) if (m != (n = s +
(                                  \
                    randmn ((e - (s)) + 1)))) an = *n, *n = *m, *m = an
static unsigned long randmn (unsigned long);
static unsigned long setseed (unsigned long);

typedef int (*ftp) (const void *, const void *);
typedef char **     PTRTYP;
void qsort (void *av, size_t rn, size_t rl, ftp qcmp) {
    PTRTYP i, j, k, l, m, n, r, *p, *s;
    char *ai, *aj, *ak, *am, *an, **ll, **rr;
    int pz, tr, iv, cw, sq, dgn = 0, t = sizeof (size_t) * 8 * 2;

    if (rn > 1 && rl > 0) {
        p = s = (PTRTYP *) malloc (sizeof (PTRTYP) * t);
        l = ll = (char **) malloc (sizeof (char *) * rn);
        r = rr = ll + (rn - 1); ai = (char *) av; setseed (0);
        for (i = l; i <= r; i++, ai += rl) *i = ai;

        for ( ; ; ) {
            m = l + (size_t) (pz = r - l) / 2, am = *m;
            i = l, j = r, iv = 1, sq = 1, tr = MDT, pz -= (MDA);

            if (pz > MDT && dgn > DGS) {
                tr = DGA, iv = DGB;
                while (pz >= tr && iv < DGC) tr <<= DGD, iv++;
                iv = pz >> iv;
                if (dgn > DGU) {
                    SAMPLRDM (i, i, j);
                    SAMPLRDM (j, i + 1, j);
                    SAMPLRDM (m, i + 1, j - 1); am = *m;}}

            if (pz <= MDT || dgn <= DGM || dgn > DGS)
                for ( ; ; ) {
                    ai = *i, aj = *j;
                    if ((cw = (*qcmp) (am, aj)) < 0 || cw == 0 && am <
aj) {
                        if (i < m && !((cw = (*qcmp) (ai, am)) < 0 ||
                            cw == 0 && ai < am)) {
                            *i = am, am = ai, sq = 0;
                            for (n = m, k = j; ; ) {
                                if (!((cw = (*qcmp) (am, aj)) < 0 ||
                                    cw == 0 && am < aj)) n = k, am = aj;
                                if ((k += iv) <= r) aj = *k; else
break;}
                            if (m != n) *n = ai;
                            *m = am;}}
                    else {
                        if (i >= m || !((cw = (*qcmp) (ai, am)) < 0 ||
                            cw == 0 && ai < am))
                            *i = aj, *j = ai, sq = 0;
                        else {
                            *j = am, am = aj, sq = 0;
                            for (n = m, k = i; ; ) {
                                if (!((cw = (*qcmp) (ai, am)) < 0 ||
                                    cw == 0 && ai < am)) n = k, am = ai;
                                if (l <= (k -= iv)) ai = *k; else
break;}
                            if (n != m) *n = aj;
                            *m = am;}}
                    i += iv, j -= iv;
                    if (iv > 1) {
                        if (i >= m || m >= j) {
                            i = l + 1, j = r - 1; break;}
                        if (dgn > DGU) {
                            SAMPLRDM (i, i + 1 - iv, m - 1);
                            SAMPLRDM (j, m + 1, j - 1 + iv);}
                        continue;}
                    if (pz > tr) tr *= MDM; else break;}

            if (pz > MDT && sq != 0) {
                k = l, aj = *k;
                while (k < r && ((ai = aj, aj = *(k + 1), cw = (
                    *qcmp)(ai, aj)) < 0 || cw == 0 && ai < aj)) k++;
                if (k >= r) i = j; else if (k >= m) i = m;}

            if (i < j) {
                for ( ; ; ) {
                    while (ai = *i,    i < m  && ((
                        cw = (*qcmp) (ai,  am)) < 0 ||
                        cw == 0  &&   ai < am))         i++;
                    while (aj = *j,    m < j  && ((
                        cw = (*qcmp) (am,  aj)) < 0 ||
                        cw == 0  &&   am < aj))         j--;
                    if (i <  j) *i = aj, *j = ai; else  break;
                    if (m == j)  m = i,   j--;    else
                    if (i == m)  m = j,   i++;    else  j--, i++;}

                if ((--i) - l < r - (++j)) {
                    if (pz >= DGT && i - l < pz / DGV)  dgn++;
                    if (l >= i) l = j; else
                        *(p++) = j, *(p++) = r, r = i;}
                else {
                    if (pz >= DGT && r - j < pz / DGV)  dgn++;
                    if (j >= r) r = i; else
                        *(p++) = l, *(p++) = i, l = j;}}

            else {
                if (!(s >= p)) r = *(--p), l = *(--p); else break;}}

        ai = ak = (char *) av;
        an = (char *) malloc (sizeof (char) * rl);
        for (i = ll; i <= rr; i++, ai += rl)
            if ((aj = *i) != ai) {
                memcpy (an, ai, rl), am = ai, m = i; do
                    *m = memcpy (am, aj, rl), am = aj,
                    m = ll + (aj - ak) / rl; while ((aj = *m) != ai);
                *m = memcpy (am, an, rl);}

        free (an); free (ll); free (s);}}

/*  randmn.c  */
static unsigned long random ();                     /** if not in
<stdlib.h> **/
static unsigned long randmn (unsigned long n) {
    long t, s = 0x7fffffff / n;  s = s * n;
    do t = random (); while (t > s);
    return (t % n);}
/*  randmn.c end  */
/*  setseed.c  */
#include <time.h>
static void srandom (unsigned long);                /** if not in
<stdlib.h> **/
static unsigned long setseed (unsigned long seed) {
    long h, t; if (seed == 0) {
        t = time (0); h = t / 3600; t = t - h * 3600;
        seed = (unsigned long) (h + t * 3601);}
    srandom (seed); return seed;}
/*  setseed.c end  */

/*  random.c                                         ** if not in
<stdlib.h> **
    from ftp://ftp.freebsd.org/pub/FreeBSD
              /FreeBSD-current/src/sys/libkern/random.c    Sat Feb 22,
1997
    Copyright (c)  1992, 1993   The Regents of the University of
California. */
static unsigned long randseed = 1;
static void srandom (unsigned long seed) {
    randseed = seed;}
static unsigned long random () {
    register long x, hi, lo, t;
    x = randseed; hi = x / 127773; lo = x % 127773;
    t = 16807 * lo - 2836 * hi; if (t <= 0) t += 0x7fffffff;
    randseed = t; return (t);}
/*  random.c end  */

/*  a-wada/qsortlib/ccc/qsortv.c end  */
/*============================================================================*/

/*       *** << Summary of  A Bench-mark >> ***
  * average of 10 cases, N = 10,000  element-size = 32(A), 128(B) bytes
  * key1 = string [16] all same chars,  key2 = double random unique

      time (10msec)
*pno   A   B  ky-cmp  el-trf   program & comment
sys   46  84  150536       -   qsort.stdlib.h of the system  (SunOS 5.4
ss20-40)
qs-3  46  84  150536   73809   qsort-Emacs.c   (e)   ftp: 3.= SunOS 5.4
!= 5.5.1
qs-4  43  78  142661   73813   qsort-Postgrs.c (g)   ftp: 4.
qs-5  35  49  135937   74919   qsort-FreeBsd.c (f)   ftp: 5.
qs-6  34  45  140823   75345   qsort-Darcy.c   (h)   ftp: 6.
qs-7  33  45  132017   64184   qsort-Kawamr6.c (k6) http: 7.
qs-8  33  46  134702   70700   qsort-Kawamr7.c (k7) http: 7.

 ftp: 3. ftp://ring.aist.go.jp/pub/TeX/CTAN
                                 /indexing/makeindex/src/qsort.c  May
26, 1993
 ftp: 4. ftp://ftp.sra.co.jp/pub/cmd/postgres/6.2.1
                       /postgresql-6.2.1/src/backend/lib/qsort.c  Oct
20, 1997
 ftp: 5. ftp://ftp.jp.freebsd.org/pub/FreeBSD
                        /FreeBSD-current/src/sys/libkern/qsort.c  Feb
22, 1997
 ftp: 6. ftp://ftp.nizkor.vex.net/local/src/darcy/common/qsort.c  Jan
22, 1997
http: 7. http://www.*-*-*.com/ ~kawamura/ qs6.c & qs7.c  Nov
27, 1997

  the following codes and more informations on the web-site,
      http://www.*-*-*.com/ ~a-wada/qsortlib.html

qs11  28  43  125511   74546   qsorta.c   tailor-made, comp & swap
inline
qs14  31  46  125511   74546   qsortb.c   compatible, word-swap, no
stability
qs17  35  38  125511   10007   qsortc.c   compatible(ptr-sort) stablty,
anti-dgn
qs18* 33  36  125542   10007 **qsortv.c**(optimized in C from qsortc.c)
qs20  30  33  125511   10007   qsortd.c   tailor-made(ptr-sort) stblty,
anti-dgn
qs22  24  27 (125511) (10007)  qsortds.s (optimized in Asm from
...

read more »



Thu, 08 Mar 2001 03:00:00 GMT  
 Quick Sort, quicker, stable and anti-degeneration


Quote:


>>/*  Quick Sort, quicker, stable and anti-degeneration (qsortv.c).

>>    Please test and evaluate on the various platforms !
>Why should anyone do your grunt work for you?
**A**
>>void qsort (void *av, size_t rn, size_t rl, ftp qcmp) {
>A C program which defines an external name that matches one of the external
>names defined by the standard library invokes undefined behavior.
>You should call the function something else.
**B**
>>    PTRTYP i, j, k, l, m, n, r, *p, *s;
>>    char *ai, *aj, *ak, *am, *an, **ll, **rr;
>>    int pz, tr, iv, cw, sq, dgn = 0, t = sizeof (size_t) * 8 * 2;

>>    if (rn > 1 && rl > 0) {
>>        p = s = (PTRTYP *) malloc (sizeof (PTRTYP) * t);
>>        l = ll = (char **) malloc (sizeof (char *) * rn);
>>        r = rr = ll + (rn - 1); ai = (char *) av; setseed (0);
>>        for (i = l; i <= r; i++, ai += rl) *i = ai;

>>        for ( ; ; ) {
>>            m = l + (size_t) (pz = r - l) / 2, am = *m;
>>            i = l, j = r, iv = 1, sq = 1, tr = MDT, pz -= (MDA);
>This code is a little hard to read because, for starters, it uses a plethora
>of cryptic variable names. And it's an understatement to say that there is a
>dearth of comments.
>Now it may be the case that these entities are simply not easy to name, or
>that descriptive names would not not only fail to help, but only bloat the
>text to make it even less comprehensible.
**C**
>Why don't you write a paper outlining the algorithm and perhaps include some
>algorithm analysis. That would be worth much more than a benchmark against
>some existing sort implementations; benchmark results can be affected by how
>well a given algorithm is implemented, and on what type of machine the test is
>run.
>Then post your paper to an appropriate newsgroup, such as comp.programming.
>Your implementation of this algorithm is simply too bogged down with C
>programming details; someone wishing to understand the method has to
>essentially reverse engineer the algorithm out of the code.
**D**
>>                *m = memcpy (am, an, rl);}

>>        free (an); free (ll); free (s);}}
>Ouch, this is an awful, not to mention unusual style for closing your
>parentheses. I doubt that any software house would tolerate it.   I have to
>use a bracket-matching feature in my text editor just to be able to tell which
>block is being closed.
>You see that style in TeX code, but there is a reason for that: whitespace
>is significant in TeX in ways not encountered in C.

**E**

 Sorry much for codes and messages with annoyed formats by my mistake, and
thanks a lot for kindhearted advices, but please let me excuse myself,-
**A** : Because the platforms available to me are limited a few, and it would be
considered that the same implementation in C might behave differently on the
different platforms, especially concerning the efficiency e.g. threshold of
elmentsize for direct-swapping-sort or pointer-sort.
 And not for only myself, but for testers interested, and if they would respond
the results to newsgroups, for readers of newsgroups.
**B** : Intentionally named same, to override the system routine, and to keep
portability of the application programs, i.e. the caller programs.
 And at least on the platforms I'd tested, it was not complained by compiler
nor linker, and ran well expectedly.
**C** : Sure, but variable-names are not entirely undescriptive, "l" points
'leftend', "r" 'rightend', "m" 'middle of current partition', and "tr", "pz",
"iv" represent 'threshold', 'partition-size', 'interval' respectively, etc..
**E** : Well, when we deliver source codes to customers, we too observe the
styles required, but they are too various to be standard.
 And personally, parentheses which are floating alone in lines, make me unrest,
probably because I'd been using fortran, COBOL, and ASSEMBLERs for long years
before C language, and short variable-names by the same reason.
 Instead I'm endeavoring to indent clearly to separate blocks.
**D** : Reverse engineerings, of course without infringement of copyright, would
be useful to brush up one's programming technique, I'd been forced to believe.
 I'd prepared the papers on my private web-site, and mentioned URL in the article,
and c.l.c is the last one of the list of newsgroups multi-posted to, but
only one respond was from c.l.c., I guess, thanks a lot!
 I'm wondering yet whether I should post the following before previous one,
and afraid to be scolded for using the "#define macros" immoderately.

**  Sorry again if too dogmatic **
/*============================================================================*/
/*  a-wada/qsortlib/ccc/qsortb.c                                Sep. 20, 1997 */

/*  <<< Example of QSORTBASE to generate sort subroutine >>>                  */
/*      Quick sort compatible with "qsort in stdlib.h" (1)                    */
/*      for efficiency about 1/2 sorting time by reducing the times of        */
/*          key_comparisons, and word_mode_swapping for word_aligned object.  */

typedef int (*ftp) (const void *, const void *);
typedef unsigned int size_t;                /*se*
void    swap (char *, char *, size_t);       *se*/

typedef char *      PTRTYP;
#define ARGMTS      void *av, size_t rn, size_t rl, ftp qcmp
#define DEFWRK      STACK  SWPWRK
#define VALID       rn > 1 && rl > 0
#define HSKP        l = (char *) av, r = l + (rn - 1) * rl;  STKINI  SWPINI
#define EZ          (int) rl
#define ORDR(i, j)  ((*qcmp) (i, j) <= 0)
#define SWAP(i, j)  SWPINL   (i, j)    /*si* *si*/  /*se* swap (i, j, rl); *se*/
#define CLNUP       STKEND

#define STACK       PTRTYP *p, s[sizeof (size_t) * 8 * 2];
#define STKINI      p = s;
#define PUSH(x, y)  *(p++) = x, *(p++) = y,
#define EMPTY       (s >= p)
#define POP(y, x)   y = *(--p), x = *(--p);
#define STKEND      /* NOTHING */

#define SWPWRK      int b, t, w, z = sizeof (int);                 /*si* *si*/
#define SWPINI      b = ((int) av | rl) & (z - 1);                 /*si* *si*/
#define SWPINL(i, j) {                                             /*si* *si*/ \
                    if (b != 0) {                                              \
                        w = rl - 1; do t = *(i + w), *(i + w) = *(j + w),      \
                            *(j + w) = t; while (--w >= 0);}                   \
                    else {                                                     \
                        w = rl - z; do t       = *((int *) (i + w)),           \
                            *((int *) (i + w)) = *((int *) (j + w)),           \
                            *((int *) (j + w)) = t; while ((w -= z) >= 0);}}

/*si*   This inline swap routine is optimized for sparc architecture,      *si*/
/*si*       especially on condition that size of int is 4 or 8 bytes,      *si*/
/*si*              it might have to be changed for the other systems.      *si*/

#define AVDGN   /* AVDGN_E  not installed */
#define BRDGN   /* BRDGN_E  not installed */
#define CKDGN   /* CKDGN_E  not installed */
#define DFDGN   /* DFDGN_E  not installed */
/* unsigned long random ();   * if not in #include <stdlib.h> */
/* if the unique key in data object and/or compare function available, revive
  the above anti-degeneration macros commented out, for short object especially
  for the array of pointers prepared by user for the ojects with large or
  variable length etc, to eliminate overhead of indirect comparison duplicated
  by using qsortc.c or qsortv.c */

#include "qsortbase.h"

    QSORTBASE (qsort)

/*  a-wada/qsortlib/ccc/qsortb.c end */
/*============================================================================*/
/*  a-wada/qsortlib/ccc/qsortc.c                                Sep. 20, 1997 */

/*  <<< Example of QSORTBASE to generate sort subroutine >>>                  */
/*      Quick sort compatible with "qsort in stdlib.h" (2)                    */
/*      for efficiency about 1/3 sorting time by pointer_sort etc,            */
/*          stability to preserve the initial order for equal sort keys, and  */
/*          prevention from the degeneration caused by peculiar input         */

#include <string.h>
#include <stdlib.h>
typedef int (*ftp) (const void *, const void *);

typedef char **     PTRTYP;
#define ARGMTS      void *av, size_t rn, size_t rl, ftp qcmp
#define DEFWRK      STACK   UNQKEY
#define VALID       rn > 1 && rl > 0
#define HSKP        STKINI  UKYINI
#define EZ          1
#define ORDR(i, j)  UORDR (i, j)
#define SWAP(i, j)  USWAP (i, j)
#define CLNUP       UARGELM  UKYEND  STKEND

#define STACK       PTRTYP *p, *s; int t = sizeof (size_t) * 8 * 2;
#define STKINI      p = s = (PTRTYP*) malloc (sizeof (PTRTYP) * t);
#define PUSH(x, y)  *(p++) = x, *(p++) = y,
#define EMPTY       (s >= p)
#define POP(y, x)   y = *(--p), x = *(--p);
#define STKEND      free (s);

#define UNQKEY      char *ai, *aj, *ak, *am, *an, **ll, **rr; int cw;
#define UKYINI      l = ll = (char **) malloc (sizeof (char *) * rn);          \
                    r = rr = l + (rn - 1); ai = (char *) av;                   \
                    for (i = l; i <= r; i++, ai += rl) *i = ai;
#define UORDR(i, j) ((cw = (*qcmp) (a ## i = *i, a ## j = *j)) < 0 ||          \
                      cw == 0 && a ## i < a ## j)
#define USWAP(i, j) a ## i = *i, a ## j = *j, *j = a ## i, *i = a ## j;
#define UARGELM     l = ll, r = rr, ai = ak = (char *) av, tr = rl;            \
                    an = (char *) malloc (sizeof (char) * tr);                 \
                    for (i = l; i <= r; i++, ai += tr) if ((aj = *i) != ai) {  \
                        memcpy (an, ai, tr), am = ai, m = i; do                \
                            *m =
...

read more »



Mon, 12 Mar 2001 03:00:00 GMT  
 
 [ 2 post ] 

 Relevant Pages 

1. almost sorted Quick Sort

2. quickest way to sort an almost sorted file

3. Merge Sort and Quick Sort comparison

4. Quick sort vs. bubble sort

5. Difference between Quick C and Quick C/Win

6. Quick C v2.0 versus Quick C v1.0

7. HELP:Need copy of MS Quick C w/Quick assembler 2.5x for windows

8. Help-Quick answer to quick memory query

9. needed: a good quick .CHM quick ref of C#

10. Quick Question, Quick Answer

11. Quick sort using median of three routine to find the pivot

12. about quick sort

 

 
Powered by phpBB® Forum Software