;;;==================================================================== ;;; ;;; ;;; Currently contains: ;;; ;;; VECTOR UTILITIES ;;; ;;; HEAP MANIPULATION FUNCTIONS ;;; ;;; HEAPSORT ;;; ;;; SIFT ;;; ;;; INSERTION SORT ;;; ;;; SHELLSORT ;;; ;;; INCREMENT SEQUENCES FOR SHELLSORT ;;; ;;; HEAPS USING SIFTING ;;; ;;; D-HEAPS ;;; ;;; SELECTION SORT ;;; ;;;==================================================================== ;@ ;;;==================================================================== ;;; ;;; VECTOR UTILITIES ;;; ;;; (vector-last v) - returns the index of the last element in a ;;; vector. ;;; ;;; (vector-swap! v i j) - interchanges the values of elements i ;;; and j in a vector. ;;; ;;; (vector-reverse! v) - reverses a vector in place (destructively). ;;; ;;; (vector-move! v to from) - move the value from element from to ;;; element to. ;;; ;;; (vector-compare predicate v first second) - compare element ;;; first with element second using predicate. ;;; ;;;==================================================================== (define-integrable (vector-last v) (-1+ (vector-length v))) (define-integrable (vector-swap! v i j) (let ((temp (vector-ref v i))) (vector-set! v i (vector-ref v j)) (vector-set! v j temp))) (define (vector-reverse! v) (do ((first 0 (1+ first)) (last (vector-last v) (-1+ last))) ((>= first last) v) (vector-swap! v first last))) (define-integrable (vector-move! v to from) (vector-set! v to (vector-ref v from))) (define-integrable (vector-compare predicate v first second) (predicate (vector-ref v first) (vector-ref v second))) ;@ ;;;==================================================================== ;;; ;;; SIFTING ;;; ;;; Sift is an algorithmic primitive which can be used to build ;;; a variety of sorting algorithms. It is a generalization of ;;; the bubbling operation in heaps. Given a vector, v, containing ;;; elements to be sorted, sift considers chains of elements. A chain ;;; is a sequence of elements whose indices in the vector are related ;;; functionally to one another. When bubbling up in an ordinary heap, ;;; for example, the next element in a chain has an index which is ;;; found by halving the current index. Sift also takes a value ;;; whose proper place within the chain is to be found. The proper ;;; place of a value within a chain is defined by a predicate, ;;; which is used to compare pairs of values. If (predicate a b) ;;; is satisfied, then a belongs ahead of b in the chain. Usually, ;;; the value passed to sift is a value already in the chain and ;;; currently out of place with respect to the predicate. Sift is ;;; invoked with this value and with a chain which is otherwise ;;; correct with respect to the predicate. After sifting, this value ;;; is in the correct place in the chain. Thus, a proper chain with ;;; one more element has been created. Starting with chains containing ;;; one element (which are trivially correct), sift is called to ;;; create larger chains which lead to a variety of structures useful ;;; in sorting. Examples of these are heaps (of many kinds), and partially ;;; sorted subsequences of elements. As we will see below, many variants ;;; of heapsort, shellsort, and selection sort can be created using sift. ;;; ;;; (sift v position next-function value fill-pointer predicate) - ;;; v - vector containing values to be sorted. ;;; current - position in v where sift is to start. ;;; next-function - function which returns the position ;;; of the next element to be considered in the sift; ;;; returns null if current position is the last element ;;; to be considered. ;;; value - the value to be placed in v. ;;; fill-pointer - last occupied position in v. ;;; predicate - predicate indicating ordering desired by ;;; the sort; i.e., (predicate v[i] v[j]) is satisfied for ;;; i < j at the end of the sort. ;;; ;;; (sift-all! v step-function start fill-pointer predicate) - ;;; iteratively invokes sift starting from positions ;;; start,start-1,... 0. This can be used to set up a ;;; heap, do an insertion sort, or do one phase of Shellsort. ;;; ;;;==================================================================== ;@ (define (sift! v current next-function value fill-pointer predicate) (let ((next (next-function v current fill-pointer predicate))) (cond ((or (null? next) (predicate value (vector-ref v next))) (vector-set! v current value)) (else (vector-set! v current (vector-ref v next)) (sift! v next next-function value fill-pointer predicate))))) (define (sift-all! v next-function start fill-pointer predicate) (do ((i start (- i 1))) ((< i 0) v) (sift! v i next-function (vector-ref v i) fill-pointer predicate))) ;@ ;;;==================================================================== ;;; ;;; INSERTION SORT ;;; ;;; To implement Insertion Sort using the sift primitive, we need ;;; only define an appropriate next-function. ;;; ;;; (insertion-next step) - next-function for insertion sort. Also, ;;; suitable for implementing one phase of Shellsort. ;;; Generates next postion by adding a constant to current ;;; position. ;;; ;;; (insertion-step-sort! v step predicate) - uses insertion-next ;;; and sift-all! to sort, or in the case of Sheelsort, ;;; to do one phase of a sort by sorting every step-th ;;; element in v. ;;; ;;; (insertion-sort! v predicate) - Insertion Sort. Invokes ;;; insertion-step-sort! with step=1. ;;; ;;;==================================================================== (define (insertion-step step) (lambda (v current fill-pointer predicate) (let ((next (+ current step))) (if (> next fill-pointer) '() next)))) (define (insertion-step-sort! v step predicate) (let ((l (vector-last v))) (sift-all! v (insertion-step step) (- l step) l predicate))) (define (insertion-sort! v predicate) (insertion-step-sort! v 1 predicate)) ;@ ;;;==================================================================== ;;; ;;; SHELLSORT ;;; ;;; Refs: D.E. Knuth, "The Art of Computer Programming," ;;; Vol. 3, "Sorting and Searching," pp. 84-95. ;;; Donald L. Shell, CACM, Vol. 2, 1959, pp.30-32. ;;; Collected Algorithms from CACM: Algorithm #201 ;;; Properties: Sorts vectors in place, not stable, partial sorting ;;; not possible, worst case complexity O[N^2], average ;;; case complexity varies and is in practice competitve ;;; with the best sorts. ;;; ;;; Shellsort takes as input a vector of values to be sorted and a ;;; sequence of increments. These increments control the sorting process. ;;; Each increment is used in turn to define the distance between elements ;;; in the vector. Elements in the vector at this distance are considered ;;; as a chain (see the description of the sifting operation above) and ;;; are sorted. The final increment in the sequence is 1 and so at the ;;; end of Shellsort, the vector is totally sorted. Thus, Shellsort can ;;; be thought of as a series of insertion sorts. The purpose of the ;;; initial sorts in the sequence is to quickly bring elements to ;;; positions which are close to the proper positions for these elements ;;; so that each individual pass of the algorithm does not have to work ;;; too hard; it is well known that insertion sort is very fast when ;;; the elements to be sorted do not have to move far. Picking a good ;;; sequence of increments is an art. We offer several good choices ;;; below. ;;; ;;; ((make-shellsort! increment-function) v predicate) ;;; v - vector of elements to be sorted. ;;; increments - a function of one argument (the number ;;; of elements to be sorted) which produces the ;;; sequence of increments defining the ;;; insertion sort to be used in each pass. ;;; predicate - predicate defining the desired ordering. ;;; ;;; ;;;==================================================================== (define (make-shellsort! increment-function) (lambda (v predicate) (for-each (lambda (step) (insertion-step-sort! v step predicate)) (increment-function (vector-length v))) v)) ;@ ;;;==================================================================== ;;; ;;; INCREMENT SEQUENCES FOR SHELLSORT ;;; ;;; The following are sequences shown to be good for Shellsort. ;;; (Reference: "Handbook of Algorithms and Data Structures", ;;; G. H. Gonnet Addison-Wesley, 1984) ;;; ;;; (knuth-increments n) - function yielding the sequence recommended ;;; by Knuth in his book. n is the number of elements in the ;;; vector of elements to be sorted. The sequence ;;; generated is (...., 40, 13, 4, 1). The sequence is ;;; generated starting with the value 1 at the end of the ;;; sequence. The next (i.e., preceding) value is generated ;;; from the current one by multiplying by 3 and adding 1. ;;; The final (first) element in the sequence is the largest ;;; such number which is less than n. ;;; ;;; (shellsort-knuth! v predicate) - Shellsort using Knuth increments. ;;; ;;; (pratt-increments n) - increments by shown by Pratt to guarantee ;;; O[n * (log (n)^2)] worst case preformance but very ;;; slow in practice. Elements of the sequence are composites ;;; of powers of 2 and powers of 3. For example if n is 50, ;;; the sequence is (48,36,32,27,24,18,16,12,9,6,4,3,2,1). ;;; ;;; (shellsort-pratt! v predicate) - Shellsort using Pratt increments. ;;; ;;; (gonnet-increments n) - increments recommended by Gonnet in his ;;; book. The sequence is generated by starting with ;;; floor(.4545n) and continuing to take floor(.4545i) ;;; until 1 is reached. ;;; ;;; (shellsort-gonnet! v predicate) - Shellsort using Gonnet increments. ;;; ;;; (stepanov-increments n) - increments recommended by A. Stepanov. ;;; The sequence is generated by taking floor(e^i + .5); ;;; i.e., powers of e rounded to the nearest integer. Again, ;;; the sequence is generated in reverse order and ends with ;;; the largest such value less than n. These increments are ;;; the most efficient ones we have found thus far. ;;; ;;; (shellsort-stepanov! v predicate) - Shellsort using Stepanov ;;; increments. ;;; ;;;==================================================================== ;@ (define (knuth-increments n) (do ((i 1 (+ (* i 3) 1)) (tail '() (cons i tail))) ((>= i n) (or (cdr tail) tail)))) (define shellsort-knuth! (make-shellsort! knuth-increments)) (define (pratt-increments n) (define (powers base n) (do ((x 1 (* x base)) (result '() (cons x result))) ((>= x n) result))) (filter (lambda (x) (< x n)) (parallel-reduce! (lambda (x y) (merge! x y >)) (outer-product * (powers 2 n) (powers 3 n))))) (define shellsort-pratt! (make-shellsort! pratt-increments)) (define (gonnet-increments n) (define (gonnet n) (floor (* n .45454))) (do ((i (gonnet n) (gonnet i)) (result '() (cons i result))) ((>= 1 i) (reverse! (cons 1 result))))) (define shellsort-gonnet! (make-shellsort! gonnet-increments)) (define (stepanov-increments n) (do ((i 1 (+ i 1)) (e 1 (floor (+ 0.5 (exp i)))) (tail '() (cons e tail))) ((>= e n) tail))) (define shellsort-stepanov! (make-shellsort! stepanov-increments)) ;@ ;;;==================================================================== ;;; ;;; HEAPS USING SIFTING ;;; ;;; Heaps can also be implemented using the sift primitive, inclusing ;;; an entire family of Heapsort algorithms. These algorithms also use ;;; some of the vector utilities described above. All of the heap ;;; utilities implemented above are reimplemented here using the same ;;; names for the functions. Thus, if this entire file is loaded and ;;; compiled, these are the functions which will be used, since they ;;; the last (most recent) ones defined. ;;; ;;; next-functions for sift: ;;; ;;; (heap-son v father fill-pointer predicate) - This is a next-function ;;; for sift. Given father, a position in the vector (v, ;;; fill-pointer, and predicate are as above in the description ;;; of sift) it returns the position of the "larger" successor ;;; of father. Thus, if father = i, it returns the false value ;;; if 2i+2 is greater than n. (Recall that our vectors are ;;; indexed starting from 0; thus a vector of n elements has ;;; elements with indices 0,1,...n-1 and the children of an ;;; element with index i are those with indices 2i+1 and 2i+2.) ;;; It returns 2i+1 if (predicate v[2i+1] v[2i+2]) is true or ;;; if 2i+3 is greater than n; and it returns 2i+2 if ;;; (predicate v[2i+1] v[2i+2]) is false. This is the ;;; appropriate next-function for bubbling down in ordinary heaps. ;;; ;;; (heap-up-pointer son) - floor( (son-1)/2 ) ;;; ;;; (heap-father v son fill-pointer predicate) - The appropriate ;;; next-function for bubbling up in an ordinary heap. ;;; It returns (heap-up-pointer son) if son is positive ;;; and the false value otherwise. ;;; ;;; Heap utilities - These functions are described in the Heap Utilities ;;; section above. They are reimplimented here using sift: ;;; ;;; (downheap! v father value fill-pointer predicate) ;;; (upheap! v son value predicate) ;;; (build-heap! v fill-pointer predicate) ;;; (heap-set! v position value fill-pointer predicate) ;;; ;;;==================================================================== ;@ (define (heap-son v father fill-pointer predicate) (let ((son (* 2 (1+ father)))) (cond ((>= fill-pointer son) (if (predicate (vector-ref v son) (vector-ref v (-1+ son))) son (-1+ son))) ((= fill-pointer (-1+ son)) (-1+ son)) (else '())))) (define (heap-up-pointer son) (quotient (-1+ son) 2)) (define (heap-father v son fill-pointer predicate) (if (>= 0 son) '() (heap-up-pointer son))) (define (downheap! v father value fill-pointer predicate) (sift! v father heap-son value fill-pointer predicate)) (define (upheap! v son value predicate) (sift! v son heap-father value son (lambda (x y) (predicate y x)))) (define (build-heap! v fill-pointer predicate) (sift-all! v heap-son (heap-up-pointer fill-pointer) fill-pointer predicate)) (define (heap-set! v position value fill-pointer predicate) (if (predicate (vector-ref v position) value) (downheap! v position value fill-pointer predicate) (upheap! v position value predicate))) ;@ ;;;====================================================================== ;;; ;;; HEAPSORT - Williams' Heapsort Algorithm ;;; ;;; Refs: Knuth Volume 3 , p. 145-149 ;;; Collected Algorithms from CACM: Algorithm #232 ;;; CACM, Vol. 7 (1964) pp. 347-348 ;;; ;;; Properties: sorts vectors in place, not stable, partial sort ;;; possible, worst case running time O[N*log(N)]. ;;; ;;; Heapsort works by setting up a heap. A heap is a binary tree ;;; with the following properties. The descendents of node i are ;;; nodes 2i and 2i+1. Thus, the links pointing to the descendents of ;;; a node are implicit in the nodes' positions in the vector. A node ;;; satisfies the predicate (passed as an argument to heapsort) with ;;; respect to all its descendents. Thus, for example, if the ;;; predicate is <, each node is less than all its descendents. ;;; Heapsort begins by building a heap (using build-heap). ;;; The heap is built by checking that the predicate is ;;; satisfied and interchanging a node with its smaller (in the sense ;;; of the predicate) descendent if necessary, so that after the ;;; exchange the predicate is satisfied. Traditionally, for the sake of ;;; efficiency, the heap is built upside down, in reverse order of ;;; the predicate. Here, for clarity, the heap is built right side up. ;;; The function of "bubbling down an element, ;;; in some cases several levels in the heap, until the ;;; predicate is satisfied or the element reaches the bottom of the ;;; heap, is handled by downheap. After the heap is set up, ;;; the element which should be in ;;; the first position in the sorted vector is at the top of the ;;; heap (in position 1). The first and last element in the heap ;;; are interchanged and the last element is removed from further ;;; consideration by decreasing the size of the heap. The new top ;;; heap element (taken from the bottom of the heap in the above ;;; exchange) is bubbled down. The process of exchange and bubbling ;;; is repeated until the entire vector is sorted. At this point, ;;; the vector in in reverse order, so reverse! is called to put the ;;; vector in the desired sorted order. ;;; ;;; (heapsort! v predicate) - Heapsort. v is the vector to be ;;; sorted using the predicate. ;;; ;;; (read-heap! v fill-pointer predicate) - pop all the elements out ;;; of the heap in order. ;;; ;===================================================================== ;@ ;;;==================================================================== ;;; ;;; HEAPSORT USING SIFTING ;;; ;;; (heapsort! v predicate) - Heapsort. See description above. ;;; This is the traditional version of Heapsort. The ;;; heap is built in reverse order of the predicate, ;;; which allows the read operation to pop out the elements ;;; in reverse ordr and then place them in their proper ;;; positions in the sorted vector when the popped element ;;; and the last element in the heap are interchanged. ;;; ;;; (read-heap! v fill-pointer predicate) - pop all the elements out ;;; of a heap. See description above. ;;; ;;; (reverse-heapsort! v predicate) - This is the more natural version ;;; of Heapsort, as described in the section above. The heap ;;; is built in the natural order and the sorted list is ;;; reversed at the end of the sort. ;;; ;;; (top-down-build-heap! v fill-pointer predicate) - The heap can be ;;; built from the top down. This is useful if the elements ;;; are not all available at the time the heap is originally ;;; being formed. This has worst case complexity O[nlog(n)]. ;;; ;;; (top-down-heapsort! v predicate) - Heapsort using top-down- ;;; build-heap. ;;; ;;;==================================================================== ;@ (define (read-heap! v fill-pointer predicate) (do ((position fill-pointer (-1+ position))) ((>= 0 position) v) (vector-swap! v position 0) (downheap! v 0 (vector-ref v 0) (-1+ position) predicate))) (define (heapsort! v predicate) (build-heap! v (vector-last v) (lambda (x y) (predicate y x))) (read-heap! v (vector-last v) (lambda (x y) (predicate y x)))) (define (reverse-heapsort! v predicate) (build-heap! v (vector-last v) predicate) (read-heap! v (vector-last v) predicate) (vector-reverse! v)) ;;;====================================================================== ;;; ;;; TOP-DOWN-BUILD-HEAP ;;; ;;; Top-down-build-heap! allows us to build a heap one element at a time. ;;; It is O[N*log(N)] in the worst case and O[N] on the average. ;;; We can also implement heapsort with top-down-build-heap! ;;; ;;;====================================================================== (define (top-down-build-heap! v fill-pointer predicate) (do ((position 1 (1+ position))) ((> position fill-pointer) v) (upheap! v position (vector-ref v position) predicate))) (define (top-down-heapsort! v predicate) (top-down-build-heap! v (vector-last v) predicate) (read-heap! v (vector-last v) predicate) (vector-reverse! v)) ;@ ;;;==================================================================== ;;; ;;; 3-HEAPS ;;; ;;; 3-heaps are slightly faster (3% fewer comparisons and 2% less time) ;;; than ordinary heaps (2-heaps). In 3-heaps, each non-terminal node ;;; has up to 3 children. This results in a shallower tree but requires ;;; an additional comparison per level. Of all the possible breadths ;;; of heaps, we found 3-heaps to be the best. Note that this section ;;; redefines the functions heap-son and heap-up-pointer and should ;;; not be loaded unless you intend to use 3-heaps instead of ordinary ;;; heaps. ;;; ;;;==================================================================== (define (heap-son v father fill-pointer predicate) (define (test i j) (predicate (vector-ref v i) (vector-ref v j))) (let ((son (* 3 (1+ father)))) (cond ((>= fill-pointer son) (if (test son (- son 1)) (if (test son (- son 2)) son (- son 2)) (if (test (- son 1) (- son 2)) (- son 1) (- son 2)))) ((= fill-pointer (-1+ son)) (if (test (- son 1) (- son 2)) (- son 1) (- son 2))) ((= fill-pointer (- son 2)) (- son 2)) (else '())))) (define (heap-up-pointer son) (quotient (-1+ son) 3)) ;@ ;;;==================================================================== ;;; ;;; D-HEAPS ;;; ;;; Using sifting, d-heaps (heaps with d successors per node) can ;;; be implemented. This is useful in order to carry out experiments ;;; on the relative efficiency of different values of d, which is ;;; interesting in the case where there are additions, deletions and ;;; changes in value of the vector elements. It is possible, by giving ;;; some nodes d children and other d+1 children to form d-heaps for ;;; non-integer values of d. We do not do this here, however. ;;; ;;; (largest-in-the-range v first last predicate) - returns the ;;; largest element between position first and position ;;; last, where v[i] is largest if (predicate v[i] v[j]) ;;; is true for all j in the range. ;;; ;;; (make-d-heap-son d) - returns a heap-son function for a d-heap. ;;; For example (define heap-son (make-d-heap-son 4)) sets ;;; up the heap-son function for a 4-heap. ;;; ;;; (make-d-heap-up-pointer d) - returns a heap-up-pointer function ;;; for a d-heap. ;;; ;;; (selection-sort! v predicate) - Selection sort using sifting. ;;; Selection sort places the largest of the remaining ;;; unsorted elements (in positions i through n) in ;;; position i. It has the virtues of being very simple ;;; and of allowing a partial sort. Its complexity is ;;; O[kn] to find the k largest elements and thus ;;; O[n^2] to sort completely. ;;; ;;;==================================================================== ;@ (define (largest-in-the-range v first last predicate) (if (> first last) '() (do ((next (1+ first) (1+ next))) ((> next last) first) (if (predicate (vector-ref v next) (vector-ref v first)) (set! first next))))) (define (make-d-heap-son d) (lambda (v father fill-pointer predicate) (let ((x (* d father))) (largest-in-the-range v (+ x 1) (min (+ x d) fill-pointer) predicate)))) (define (make-d-heap-up-pointer d) (lambda (son) (quotient (-1+ son) d))) (define (selection-sort! v predicate) (do ((last (vector-last v)) (i 0 (1+ i))) ((>= i last) v) (vector-swap! v i (largest-in-the-range v i last predicate)))) ;@