;;; -*- Mode: Scheme -*- ;;;; Hash Tries: Persistent Trie-Structured Hash Tables ;;; Copyright (c) 2009, Taylor R. Campbell ;;; ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions ;;; are met: ;;; ;;; * Redistributions of source code must retain the above copyright ;;; notice, this list of conditions and the following disclaimer. ;;; ;;; * Redistributions in binary form must reproduce the above copyright ;;; notice, this list of conditions and the following disclaimer in ;;; the documentation and/or other materials provided with the ;;; distribution. ;;; ;;; * Neither the names of the authors nor the names of contributors ;;; may be used to endorse or promote products derived from this ;;; software without specific prior written permission. ;;; ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ;;; This implements what are called Hash Array Mapped Tries (HAMT) in ;;; ;;; Phil Bagwell, `Ideal Hash Trees', Technical Report, 2001. ;;; ;;; This is a hash table stored in a compact array-mapped trie -- a ;;; trie where each branch is stored compactly with a bitmap of ;;; children to avoid reserving space for null pointers. Collision ;;; resolution is by chaining (no open-addressing or double-hashing). ;;; ;;; Lookup and update run in O(hash length) time in the RAM model, ;;; conditional on the event of no collisions -- worst-case, not ;;; amortized, since there's no rehashing ever. Running time in the ;;; event of collisions is as usual for chaining; see, e.g., Knuth ;;; vol. III (2e) sec. 6.4, pp. 520--525. Hash tries on their own ;;; provide no defence against `hash-flooding' attacks; if these are a ;;; concern, the user should either use a PRF under a secret key as ;;; the hash function, or choose a different data structure that is ;;; not vulnerable to hash-flooding. ;;; ;;; Yes, this is the same data structure as Clojure uses to implement ;;; its hash maps and hash sets. Similarly to Clojure, this code uses ;;; hash collision buckets rather than the paper's suggestion of ;;; repeating hashes salted by the trie depth. ;;; ;;; Although the pronunciation is identical, and despite the title of ;;; Bagwell's paper, a hash trie is not a hash tree. Sorry. Nor do ;;; these hash tries have any relation to what Knuth calls hash tries. ;;; ;;; This code depends on SRFIs 8 (RECEIVE), 9 (DEFINE-RECORD-TYPE), 23 ;;; (ERROR), and 33 (Integer Bitwise Operations) ;;;; Documentation ;;; Except where this is obviously not the case (HASH-TABLE/FOLD, for ;;; example), every procedure here runs in constant expected time under ;;; the assumption of a good key hash function, ignoring the time taken ;;; by garbage collection and the time taken by the key hash function ;;; and the key equality predicate of the relevant hash trie type. ;;; When searching in a hash trie, the key equality predicate is ;;; applied to only as many keys extra as share a common hash value ;;; with the key whose association is sought. Thus in a hash trie with ;;; no collisions, every search involves at most one invocation of the ;;; key hash function, and at most one invocation of the key equality ;;; predicate. ;;; ;;; (MAKE-HASH-TRIE-TYPE ) -> hash-trie-type ;;; ;;; Constructor for hash trie types. must be a key ;;; equality predicate, a procedure of two arguments that returns ;;; true to indicate that they are equal and false to indicate that ;;; they are not, and that behaves transitively, symmetrically, and ;;; reflexively. must be a key hash function that ;;; preserves the key equality predicate, i.e. for keys A and B, it ;;; must be that if ( A B), then (= ( A) ;;; ( B)). ;;; ;;; (HASH-TRIE-TYPE? ) -> boolean ;;; ;;; Disjoint type predicate for hash trie types. ;;; ;;; (HASH-TRIE-TYPE/KEY-EQUALITY-PREDICATE ) ;;; -> key-equality-predicate ;;; (HASH-TRIE-TYPE/KEY-HASH-FUNCTION ) ;;; -> key-hash-function ;;; ;;; Accessors for the key equality predicates and key hash ;;; functions of hash trie types. ;;; ;;; (MAKE-HASH-TRIE ) -> hash-trie ;;; ;;; Hash trie constructor. ;;; ;;; (HASH-TRIE/TYPE ) -> hash-trie-type ;;; ;;; Returns the hash trie type of . ;;; ;;; (HASH-TRIE/COUNT ) -> exact, nonnegative integer ;;; ;;; Returns the number of associations in . ;;; ;;; (HASH-TRIE/EMPTY? ) -> boolean ;;; ;;; Returns true if has no associations, or false if it ;;; has any. ;;; (HASH-TRIE/SEARCH ) ;;; ;;; Searches for an association for in . If there ;;; is one, tail-calls with one argument, the datum ;;; associated with key. If not, tail-calls with ;;; zero arguments. ;;; ;;; (HASH-TRIE/LOOKUP ) -> datum or ;;; ;;; Searches for an association for in . If there ;;; is one, returns its associated datum; otherwise returns ;;; . ;;; ;;; (HASH-TRIE/MEMBER? ) -> boolean ;;; ;;; Returns true if has an association for , or ;;; false if not. ;;; ;;; (HASH-TRIE/UPDATE ) ;;; ;;; Searches for an association for in . If there ;;; is one, tail-calls with three arguments: ;;; ;;; . the associated datum, ;;; ;;; . a procedure (REPLACE ) that returns a new hash trie ;;; with all the associations in , but with ;;; substituted for the datum associated with ; and ;;; ;;; . a procedure (DELETE) that returns a new hash trie with all ;;; the associations in excluding the association for ;;; . ;;; ;;; If there is no such association, tail-calls with ;;; one argument, a procedure (INSERT ) that returns a new ;;; hash trie with all the associations in as well as ;;; an association of with . ;;; ;;; (HASH-TRIE/INSERT ) -> hash-trie ;;; ;;; Returns a hash trie with all the associations in , ;;; but associating with , whether had an ;;; association for or not. ;;; ;;; (HASH-TRIE/MODIFY ) -> hash-trie ;;; ;;; Returns a hash trie with all the associations in , ;;; but associating ( D) with if ;;; associated a datum D with , or associating ( ;;; ) with if had no association for ;;; . ;;; ;;; (HASH-TRIE/INTERN ) -> [datum hash-trie] ;;; ;;; If has an association for , returns its ;;; associated datum and . Otherwise, calls ;;; ( ) to obtain a datum D, and returns D and a ;;; hash trie with all the associations in as well as ;;; an association of D with . ;;; ;;; (HASH-TRIE/DELETE ) -> hash-trie ;;; ;;; Returns a hash trie with all the associations in , ;;; excluding its association, if any, for . ;;; (HASH-TRIE/FOLD ) -> value ;;; ;;; Folds by , starting with an initial ;;; value V of and updating it for each association ;;; of a datum D with a key K, in no particular order, by ;;; ( K D V). ;;; ;;; (HASH-TRIE->ALIST ) -> alist ;;; (HASH-TRIE/KEY-LIST ) -> alist ;;; (HASH-TRIE/DATUM-LIST ) -> alist ;;; ;;; HASH-TRIE->ALIST returns a list of pairs, in no particular ;;; order, corresponding with the associations in , with ;;; keys in the cars and associated data in the respective cdrs. ;;; HASH-TRIE/KEY-LIST returns a list of all the keys in ;;; , in no particular order. HASH-TRIE/DATUM-LIST ;;; returns a list of all the data in , in no particular ;;; order. ;;; ;;; (ALIST->HASH-TRIE ) -> hash-trie ;;; ;;; Returns a hash trie of the given type with the associations ;;; listed in , taking keys from the cars and corresponding ;;; data from the respective cdrs. ;;; ;;; (STRING-HASH ) -> hash ;;; (SYMBOL-HASH ) -> hash ;;; (EXACT-INTEGER-HASH ) -> hash ;;; (REAL-NUMBER-HASH ) -> hash ;;; (COMPLEX-NUMBER-HASH ) -> hash ;;; ;;; Hash functions for various types of data. EXACT-INTEGER-HASH, ;;; REAL-NUMBER-HASH, and COMPLEX-NUMBER-HASH all agree where their ;;; domains coincide. The current implementations of these hash ;;; functions are all based on the FNV (Fowler-Noll-Vo) family of ;;; hash functions, tweaked slightly so that it is more likely to ;;; fit into the range of fixnums (small exact integers that can be ;;; represented immediately) for most Scheme systems on 32-bit ;;; machines. ;;; ;;; HASH-TRIE-TYPE:STRING ;;; HASH-TRIE-TYPE:SYMBOL ;;; HASH-TRIE-TYPE:EXACT-INTEGER ;;; HASH-TRIE-TYPE:REAL-NUMBER ;;; HASH-TRIE-TYPE:COMPLEX-NUMBER ;;; ;;; Hash trie types for the above hash functions, with appropriate ;;; key equality predicates: STRING=? for strings, EQ? for symbols, ;;; and = for the numeric types. ;;; ;;; Commented at the bottom of the file are procedures to map between ;;; hash tries and streams of pairs, which can be used to implement a ;;; foof-loop ;;; iterator, also commented at the bottom of the file. ;;;; Hash Trie Structure (define-record-type (%make-hash-trie-type key-equality-predicate key-hash-function operation/search operation/lookup operation/member? operation/update operation/insert operation/modify operation/intern operation/delete ) hash-trie-type? (key-equality-predicate hash-trie-type.key-equality-predicate) (key-hash-function hash-trie-type.key-hash-function) (operation/search hash-trie-type.operation/search) (operation/lookup hash-trie-type.operation/lookup) (operation/member? hash-trie-type.operation/member?) (operation/update hash-trie-type.operation/update) (operation/insert hash-trie-type.operation/insert) (operation/modify hash-trie-type.operation/modify) (operation/intern hash-trie-type.operation/intern) (operation/delete hash-trie-type.operation/delete) ) (define-record-type (%make-hash-trie type count root) hash-trie? (type hash-trie.type) (count hash-trie.count) (root hash-trie.root)) (define (make-hash-trie type) (%make-hash-trie type 0 (non-node))) (define (hash-trie/type hash-trie) (hash-trie.type hash-trie)) (define (hash-trie/count hash-trie) (hash-trie.count hash-trie)) (define (hash-trie/empty? hash-trie) (zero? (hash-trie.count hash-trie))) (define (hash-trie-type/key-equality-predicate hash-trie-type) (hash-trie-type.key-equality-predicate hash-trie-type)) (define (hash-trie-type/key-hash-function hash-trie-type) (hash-trie-type.key-hash-function hash-trie-type)) ;;;; Hash Trie Operations (define (hash-trie/search hash-trie key if-found if-not-found) ((hash-trie-type.operation/search (hash-trie.type hash-trie)) hash-trie key if-found if-not-found)) (define (hash-trie/lookup hash-trie key default) ((hash-trie-type.operation/lookup (hash-trie.type hash-trie)) hash-trie key default)) (define (hash-trie/member? hash-trie key) ((hash-trie-type.operation/member? (hash-trie.type hash-trie)) hash-trie key)) (define (hash-trie/update hash-trie key if-found if-not-found) ((hash-trie-type.operation/update (hash-trie.type hash-trie)) hash-trie key if-found if-not-found)) (define (hash-trie/insert hash-trie key datum) ((hash-trie-type.operation/insert (hash-trie.type hash-trie)) hash-trie key datum)) (define (hash-trie/modify hash-trie key default modifier) ((hash-trie-type.operation/modify (hash-trie.type hash-trie)) hash-trie key default modifier)) (define (hash-trie/intern hash-trie key generator) ((hash-trie-type.operation/intern (hash-trie.type hash-trie)) hash-trie key generator)) (define (hash-trie/delete hash-trie key) ((hash-trie-type.operation/delete (hash-trie.type hash-trie)) hash-trie key)) ;;;; Nodes ;;; A node has one of the following two structures: ;;; ;;; #( ...) ;;; ;;; A branch stored compactly with a bit map of children; see the ;;; page below titled `Branch Child Maps'. Children are indexed by ;;; BITS-PER-CHUNK chunks of their hashes, beginning at the root with ;;; their lowest-order bits. ;;; ;;; ( ( . ) ...) ;;; ;;; A hash collision bucket, all of whose keys share a common hash, ;;; whose suffix is . (Suffix, not whole hash, because we know ;;; the prefix by getting to this node in the first place.) (define (non-node) #f) (define (bucket? object) (pair? object)) (define (make-bucket hash associations) (cons hash associations)) (define (bucket/hash bucket) (car bucket)) (define (bucket/list bucket) (cdr bucket)) (define (branch? object) ;; (and (vector? object) ;; (> (vector-length object) 1)) (vector? object)) (define (branch/count branch) (- (vector-length branch) 1)) (define (make-unary-branch chunk child) (vector (child-map/insert (empty-child-map) chunk) child)) (define (make-binary-branch chunk-a child-a chunk-b child-b) ;; Assumption: (not (= chunk-a chunk-b)) (let ((child-map (child-map/insert (child-map/insert (empty-child-map) chunk-a) chunk-b))) (if (< chunk-a chunk-b) (vector child-map child-a child-b) (vector child-map child-b child-a)))) (define (branch/lookup branch chunk) (let ((child-map (vector-ref branch 0))) (cond ((child-map/full? child-map) (vector-ref branch (+ chunk 1))) ((child-map/contains? child-map chunk) (vector-ref branch (child-map/chunk->index child-map chunk))) (else (non-node))))) ;;;;; Branch Update ;;; The only mutation in this whole file happens here, and it is ;;; restricted to the initialization of new vectors for branches. ;++ The following loops are probably worth unrolling, since they will ;++ run for no more than 2^b iterations, where b is BITS-PER-CHUNK. (define (branch/insert branch chunk child) (let ((length (+ (vector-length branch) 1)) (child-map (vector-ref branch 0))) (let ((branch* (make-vector length)) (child-map* (child-map/insert child-map chunk))) (vector-set! branch* 0 child-map*) (let ((index (child-map/chunk->index child-map* chunk))) (do ((i 1 (+ i 1))) ((>= i index)) (vector-set! branch* i (vector-ref branch i))) (vector-set! branch* index child) (let loop ((i index)) (let ((i* (+ i 1))) (if (< i* length) (begin (vector-set! branch* i* (vector-ref branch i)) (loop i*)))))) branch*))) (define (branch/replace branch chunk child) (let ((length (vector-length branch))) (if (and (= length 2) (bucket? child) ;; Buckets are guaranteed to be non-empty. ;; (pair? (bucket/list child)) (null? (cdr (bucket/list child)))) (make-bucket (join-hash chunk (bucket/hash child)) (bucket/list child)) (let ((branch* (make-vector length))) (let ((index (child-map/chunk->index (vector-ref branch 0) chunk))) (do ((i 0 (+ i 1))) ((>= i index)) (vector-set! branch* i (vector-ref branch i))) (vector-set! branch* index child) (do ((i (+ index 1) (+ i 1))) ((>= i length)) (vector-set! branch* i (vector-ref branch i)))) branch*)))) (define (branch/delete branch chunk) ;; This is called only when the number of children in the branch is ;; greater than 1. If you change that invariant, change this ;; definition. (let ((length (vector-length branch)) (child-map (vector-ref branch 0))) (let ((child-map* (child-map/delete child-map chunk)) (index (child-map/chunk->index child-map chunk))) (if (= length 3) ; Ergo, CHILD-MAP* is a singleton. (let ((child (vector-ref branch (- 3 index)))) ;; Check whether the other child (2 if INDEX is 1, 1 if ;; INDEX is 2) is a singleton bucket. (if (and (bucket? child) ;; Buckets are guaranteed to be non-empty. ;; (pair? (bucket/list child)) (null? (cdr (bucket/list child)))) (make-bucket (join-hash (singleton-child-map/chunk child-map*) (bucket/hash child)) (bucket/list child)) (vector child-map* child))) (let* ((length* (- length 1)) (branch* (make-vector length*))) (vector-set! branch* 0 child-map*) (do ((i 1 (+ i 1))) ((>= i index)) (vector-set! branch* i (vector-ref branch i))) (let loop ((i index)) (if (< i length*) (let ((i* (+ i 1))) (vector-set! branch* i (vector-ref branch i*)) (loop i*)))) branch*))))) ;;;;; Branch Child Maps ;;; A branch is a compact vector of children, without storage for ;;; indices that are not used. The child map is a map of all the ;;; indices to bits saying whether or not the branch has a child for ;;; the respective index. For example, if each branch has sixteen ;;; children (giving four bits per chunk), and some branch has the ;;; zeroth, eighth, and eleventh children, its child map will be ;;; #b100100000001. We find which index in the vector to use (1 for ;;; the zeroth child, 2 for the eighth, and 3 for the eleventh) by ;;; counting the bits that are set below and including the index bit in ;;; the child map. (define (bit-mask size) (bitwise-not (arithmetic-shift -1 size))) ;;; If you change BITS-PER-CHUNK, change FULL-CHILD-MAP below. ;;; ;;; Why this particular choice of BITS-PER-CHUNK, rather than the ;;; paper's suggested 5? This makes child maps fit in the range of ;;; fixnums for most Scheme systems. (define bits-per-chunk 4) (define (split-hash hash) (values (bitwise-and hash (bitwise-not (arithmetic-shift -1 bits-per-chunk))) (arithmetic-shift hash (- bits-per-chunk)))) (define (join-hash chunk hash) (bitwise-ior chunk (arithmetic-shift hash bits-per-chunk))) (define (full-child-map) ;; (bit-mask (arithmetic-shift 1 bits-per-chunk)) #xffff) (define (empty-child-map) 0) (define (child-map/full? child-map) (= child-map (full-child-map))) (define (child-map/insert child-map chunk) (bitwise-ior child-map (arithmetic-shift 1 chunk))) (define (child-map/delete child-map chunk) (bitwise-and child-map (bitwise-not (arithmetic-shift 1 chunk)))) (define (child-map/contains? child-map chunk) (not (zero? (bitwise-and child-map (arithmetic-shift 1 chunk))))) (define (child-map/chunk->index child-map chunk) (bit-count (bitwise-and child-map (bit-mask (+ chunk 1))))) (define (singleton-child-map/chunk child-map) ;; Assumption: CHILD-MAP has only one bit set. ;; (- (integer-length child-map) 1) (bit-count (- child-map 1))) ;;;; Making Hash Trie Types (define (make-hash-trie-type key=? key-hash) (define (search hash-trie key if-found if-not-found) (define (leaf-search bucket hash) (let ((hash* (bucket/hash bucket)) (associations (bucket/list bucket))) (if (= hash hash*) (linear-search associations) (if-not-found)))) ;; Buckets are guaranteed to be non-empty. Invert the usual linear ;; search loop. (define (linear-search associations) (let ((association (car associations))) (if (key=? key (car association)) (if-found (cdr association)) (let ((associations (cdr associations))) (if (pair? associations) (linear-search associations) (if-not-found)))))) (define (trie-search branch hash) (receive (chunk hash*) (split-hash hash) (let ((node (branch/lookup branch chunk))) (cond ((branch? node) (trie-search node hash*)) ((bucket? node) (leaf-search node hash*)) (else (if-not-found)))))) ;; Unrolling the first iteration of this loop avoids computing the ;; hash when there is only one bucket. Probably a gratuitous ;; optimization. (let ((node (hash-trie.root hash-trie))) (cond ((branch? node) (trie-search node (key-hash key))) ((bucket? node) ;; Perform at most one of KEY-HASH or KEY=? if possible. ;; This is really silly. (let ((list (bucket/list node))) (cond ((pair? (cdr list)) (leaf-search node (key-hash key))) ((key=? key (caar list)) (if-found (cdar list))) (else (if-not-found))))) (else (if-not-found))))) (define (lookup hash-trie key default) (search hash-trie key (lambda (datum) datum) (lambda () default))) (define (member? hash-trie key) (search hash-trie key (lambda (datum) datum #t) (lambda () #f))) ;;;;; MAKE-HASH-TRIE-TYPE, continued: update algorithm (define (update hash-trie key if-found if-not-found) (define (leaf-search bucket hash replace delete) (let ((hash* (bucket/hash bucket)) (associations (bucket/list bucket))) (if (not (= hash hash*)) (if-not-found (lambda (datum) (replace +1 (branch-node hash (cons (cons key datum) '()) hash* associations)))) (linear-search associations (lambda (count associations) (replace count (make-bucket hash associations))) delete)))) ;; Buckets are guaranteed to be non-empty. Invert the usual linear ;; search loop. (define (linear-search associations replace delete) (let ((association (car associations)) (associations (cdr associations))) (cond ((key=? key (car association)) (if-found (cdr association) (lambda (datum) (replace 0 (cons (cons key datum) associations))) delete)) ((pair? associations) (linear-search associations (lambda (count associations) (replace count (cons association associations))) (lambda () (replace -1 associations)))) (else (if-not-found (lambda (datum) (replace +1 (cons (cons key datum) '())))))))) ;;;;; MAKE-HASH-TRIE-TYPE, continued: update algorithm, continued (define (trie-search branch hash replace delete) (receive (chunk hash*) (split-hash hash) (let ((node (branch/lookup branch chunk))) (if node (let ((replace (lambda (count node) (replace count (branch/replace branch chunk node)))) (delete (if (= 1 (branch/count branch)) delete (lambda () (replace -1 (branch/delete branch chunk)))))) (cond ((branch? node) (trie-search node hash* replace delete)) ((bucket? node) (leaf-search node hash* replace delete)) (else (error "Invalid hash trie node:" node) #f))) (if-not-found (lambda (datum) (replace +1 (branch/insert branch chunk (make-bucket hash* (cons (cons key datum) '())))))))))) (define (replace-root count-adjustment root-node) (%make-hash-trie (hash-trie.type hash-trie) (+ (hash-trie.count hash-trie) count-adjustment) root-node)) (define (delete-root) (replace-root -1 (non-node))) (let ((node (hash-trie.root hash-trie))) (cond ((branch? node) (trie-search node (key-hash key) replace-root delete-root)) ((bucket? node) (leaf-search node (key-hash key) replace-root delete-root)) (else (if-not-found (lambda (datum) (replace-root +1 (make-bucket (key-hash key) (cons (cons key datum) '()))))))))) ;;;;; MAKE-HASH-TRIE-TYPE continued: branching, and update utilities (define (branch-node hash-a associations-a hash-b associations-b) ;; Assumption: (not (= hash-a hash-b)) (receive (chunk-a hash-a) (split-hash hash-a) (receive (chunk-b hash-b) (split-hash hash-b) (if (= chunk-a chunk-b) (make-unary-branch chunk-a (branch-node hash-a associations-a hash-b associations-b)) (make-binary-branch chunk-a (make-bucket hash-a associations-a) chunk-b (make-bucket hash-b associations-b)))))) (define (insert hash-trie key datum) (update hash-trie key (lambda (datum* replace delete) datum* delete ;ignore (replace datum)) (lambda (insert) (insert datum)))) (define (modify hash-trie key default modifier) (update hash-trie key (lambda (datum replace delete) delete ;ignore (replace (modifier datum))) (lambda (insert) (insert (modifier default))))) (define (intern hash-trie key generator) (update hash-trie key (lambda (datum replace delete) replace delete ;ignore (values datum hash-trie)) (lambda (insert) (let ((datum (generator key))) (values datum (insert datum)))))) (define (delete hash-trie key) (update hash-trie key (lambda (datum replace delete) datum replace ;ignore (delete)) (lambda (insert) insert ;ignore hash-trie))) (%make-hash-trie-type key=? key-hash search lookup member? update insert modify intern delete)) ;;;; FNV-Based (Fowler-Noll-Vo) Hash Functions ;;; This definition doesn't give us the real FNV hash, but it is likely ;;; to fit within most Schemes' fixnum arithmetic. The FNV prime ;;; (#x1000193) is the usual 32-bit FNV prime, but the offset basis ;;; (#x1cf42a8) differs. It was computed like the usual FNV offset ;;; basis, by applying the same iteration but starting with a hash of 0 ;;; to the 32 octets `chongo /\../\'. Probably this ;;; is all a load of nonsense, because 25-bit modular arithmetic ;;; instead of 32-bit modular arithmetic probably changes all the ;;; constraints that were put on the design of the parameters, but this ;;; seems to work and to give a reasonable distribution in some random, ;;; unscientific tests. (define (string-hash string) (let loop ((index 0) (hash #x1cf42a8)) (if (< index (string-length string)) (loop (+ index 1) (bitwise-xor (char->integer (string-ref string index)) (bitwise-and #x1ffffff (* hash #x1000193)))) hash))) (define (symbol-hash symbol) (string-hash (symbol->string symbol))) (define (exact-integer-hash k) (let loop ((k k) (hash #x1cf42a8)) (if (zero? k) hash (loop (arithmetic-shift k -8) (bitwise-xor (bitwise-and k #xFF) (bitwise-and #x1ffffff (* hash #x1000193))))))) (define (real-number-hash x) (cond ((integer? x) (exact-integer-hash (inexact->exact x))) ;; This is appealing because it involves no non-integer ;; arithmetic, but it gives 1/2 and 0.5 distinct hashes, while ;; (= 1/2 0.5). ;; ((exact? x) ;; (bitwise-xor ;; (exact-integer-hash (bitwise-xor #xfacade (numerator x))) ;; (exact-integer-hash (denominator x)))) (else (let* ((integral-part (truncate x)) (fractional-part (round (/ 1 (- x integral-part))))) ;; TRUNCATE and ROUND guarantee that INEXACT->EXACT here ;; will yield an exact integer, fit for BITWISE-XOR and for ;; EXACT-INTEGER-HASH. (bitwise-xor (exact-integer-hash (bitwise-xor #xfedcad (inexact->exact integral-part))) (exact-integer-hash (inexact->exact fractional-part))))))) (define (complex-number-hash z) (if (real? z) (real-number-hash z) ;++ This does not distinguish the imaginary and real parts. (bitwise-xor #xdeface (bitwise-xor (real-number-hash (imag-part z)) (real-number-hash (real-part z)))))) ;;;; Miscellaneous Hash Trie Types (define hash-trie-type:complex-number (make-hash-trie-type = complex-number-hash)) (define hash-trie-type:real-number (make-hash-trie-type = real-number-hash)) (define hash-trie-type:exact-integer (make-hash-trie-type = exact-integer-hash)) (define hash-trie-type:symbol (make-hash-trie-type eq? symbol-hash)) (define hash-trie-type:string (make-hash-trie-type string=? string-hash)) ;; (define hash-trie-type:string-ci ;; (make-hash-trie-type string-ci=? string-hash-ci)) ;;; With the following definitions of HASH-TRIE/BUCKET-FOLD and ;;; HASH-TRIE/FOLD, the last call to the combinator is a tail call. (define (hash-trie/fold hash-trie initial-value combinator) (define (fold-bucket associations value) ;; Buckets are guaranteed to be non-empty. Invert the usual FOLD ;; loop. (let ((association (car associations)) (associations (cdr associations))) (if (pair? associations) (fold-bucket associations (combinator (car association) (cdr association) value)) (combinator (car association) (cdr association) value)))) (define (fold-branch branch index value) ;; Branches are guaranteed to have at least one child. (if (> index 2) (let ((index (- index 1))) (fold-branch branch index (fold-node (vector-ref branch index) value))) (fold-node (vector-ref branch 1) value))) (define (fold-node node value) (cond ((branch? node) (fold-branch node (vector-length node) value)) ((bucket? node) (fold-bucket (bucket/list node) value)) (else (error "Invalid hash trie node:" node) #f))) (let ((root (hash-trie.root hash-trie))) (if root (fold-node root initial-value) initial-value))) (define (hash-trie/key-list hash-trie) (hash-trie/fold hash-trie '() (lambda (key datum list) datum ;ignore (cons key list)))) (define (hash-trie/datum-list hash-trie) (hash-trie/fold hash-trie '() (lambda (key datum list) key ;ignore (cons datum list)))) (define (hash-trie->alist hash-trie) (hash-trie/fold hash-trie '() (lambda (key datum alist) (cons (cons key datum) alist)))) (define (alist->hash-trie alist hash-trie-type) (let loop ((alist alist) (hash-trie (make-hash-trie hash-trie-type))) (if (pair? alist) (loop (cdr alist) (hash-trie/insert hash-trie (caar alist) (cdar alist))) hash-trie))) ;; (define (hash-trie->stream hash-trie) ;; (define (bucket->stream list tail) ;; (if (pair? list) ;; (stream-cons (car list) (lazy (bucket->stream (cdr list) tail))) ;; tail)) ;; (define (branch->stream branch tail) ;; (let recur ((index (vector-length branch)) (value value)) ;; (if (> index 2) ;; (let ((index (- index 1))) ;; (node->stream (vector-ref branch index) ;; (lazy (recur index tail)))) ;; tail))) ;; (define (node->stream node tail) ;; (cond ((branch? node) (branch->stream node tail)) ;; ((bucket? node) (bucket->stream (bucket/list node) tail)) ;; (else (error "Invalid hash trie node:" node) #f))) ;; (lazy (let ((root (hash-trie.root hash-trie))) ;; (if root ;; (node->stream root stream-nil) ;; stream-nil)))) ;; ;; (define (stream->hash-trie stream hash-trie-type) ;; (let loop ((stream stream) (hash-trie (make-hash-trie hash-trie-type))) ;; (if (stream-pair? stream) ;; (loop (stream-cdr stream) ;; (hash-trie/insert hash-trie (stream-car stream))) ;; hash-trie))) ;; ;; ;;; Iterator for foof-loop; see ;; ;;; . ;; ;; (define-syntax in-hash-trie ;; (syntax-rules () ;; ((IN-HASH-TRIE (key-variable datum-variable) ;; (hash-trie-expression) ;; next . rest) ;; (next (((HASH-TRIE) hash-trie-expression)) ;Outer bindings ;; ((STREAM (HASH-TRIE->STREAM HASH-TRIE) ;Loop variables ;; STREAM*)) ;; () ;Entry bindings ;; ((NOT (STREAM-PAIR? STREAM))) ;Termination conditions ;; (((key-variable datum-variable) ;Body bindings ;; (LET ((ASSOCIATION (STREAM-CAR STREAM))) ;; (VALUES (CAR ASSOCIATION) (CDR ASSOCIATION)))) ;; ((STREAM*) (STREAM-CDR STREAM))) ;; () ;Final bindings ;; next . rest))))