;;;  cl-cdb.lisp
;;;  by Yusuke Shinyama <yusuke at cs dot nyu dot edu>
;;;
;;;  * public domain *
;;;

;;;  Usage:
;;;
;;;   To build:
;;;    (with-cdb-make (db "foo.cdb" "foo.tmp")
;;;      (add db "key" "value")
;;;      ...)
;;;
;;;   To use:
;;;    (with-cdb-init (db "foo.cdb")
;;;      (getitem db "key")
;;;      (dolist (k (getkeys db)) ...)
;;;      ...)
;;;

(defpackage #:cl-cdb
  (:use #:common-lisp)
  (:export #:cl-cdb
	   #:cdb-init
	   #:cdb-close
	   #:cdb-reader
	   #:getitem
	   #:getitems
	   #:getkeys
	   #:getvalues
	   #:cdb-make
	   #:cdb-writer
	   #:add
	   #:finish
	   #:with-cdb-init
	   #:with-cdb-make
	   ))

(in-package :cl-cdb)


;;  Utility functions
;;
; read a 32-bit little-endian.
(defun readui32 (stream)
  (let ((a (make-array 4 :element-type 'unsigned-byte)))
    (read-sequence a stream)
    (dpb (aref a 3) (byte 8 24) (dpb (aref a 2) (byte 8 16) (dpb (aref a 1) (byte 8 8) (aref a 0))))))

; write a 32-bit little-endian.
(defun writeui32 (stream n)
  (write-byte (ldb (byte 8 0) n) stream)
  (write-byte (ldb (byte 8 8) n) stream)
  (write-byte (ldb (byte 8 16) n) stream)
  (write-byte (ldb (byte 8 24) n) stream)
  )

; read string.
(defun readstrn (n stream)
  (let ((a (make-array n :element-type 'unsigned-byte)))
    (read-sequence a stream)
    (map 'string #'code-char a)))

; write string.
(defun writestr (stream s)
  (write-sequence (map 'vector #'char-code s) stream))

; compute a cdb hash value for a given string.
(defun hash (s)
  (reduce (lambda (h c) 
	    (logand
	     (logxor (+ (* 32 h) h) (char-code c))
	     #xffffffff))
	  s
	  :initial-value 5381))


;;  CDB Reader
;;
(defclass cdb-reader ()
  ((cdb-name :accessor cdb-name :initarg :cdb-name)
   (file-object :accessor file-object)
   (read-it :accessor read-it :initarg :read-it)
   (end-of-data :accessor end-of-data)
   (cache :accessor cache)
   ))

; cdb-reader: constructor.
(defmethod initialize-instance :after ((reader cdb-reader) &rest initargs)
  (declare (ignore initargs))
  (setf (file-object reader)
	(open (cdb-name reader)
	      :direction :input
	      :element-type 'unsigned-byte
	      :if-does-not-exist :error))
  (setf (end-of-data reader) (readui32 (file-object reader)))
  (setf (cache reader) (make-hash-table))
  )

; cdb-reader: destructor.
(defmethod cdb-close ((reader cdb-reader))
  (close (file-object reader))
  (setf (file-object reader) nil))

; cdb-reader: get an item.
(defmethod getitem ((reader cdb-reader) key0 &optional (default nil))
  (or (gethash key0 (cache reader))
      (let* ((key (if (stringp key0) key0 (write-to-string key0)))
	     (h (hash key))
	     (fp (file-object reader)))
	(file-position fp (* 8 (mod h 256)))
	;(format t "fp=~s, h=~s, pos=~s~%" fp h (file-position fp))
	(let* ((pos-bucket (readui32 fp))
	       (ncells (readui32 fp)))
	  ;(format t "pos-bucket=~s, ncells=~s~%" pos-bucket ncells)
	  (when (zerop ncells)
	    (return-from getitem default))
	  (let ((start (mod (ldb (byte 24 8) h) ncells)))
	    ;(format t "start=~s~%" start)
	    (dotimes (i ncells)
	      ;(format t "i=~s, pos=~s~%" i (* 8 (mod (+ start i) ncells)))
	      (file-position fp (+ pos-bucket (* 8 (mod (+ start i) ncells))))
	      (let* ((h1 (readui32 fp))
		     (p1 (readui32 fp)))
		;(format t "h1=~s, p1=~s~%" h1 p1)
		(cond ((zerop p1)
		       (return-from getitem default))
		      ((eql h h1)
		       (file-position fp p1)
		       (let* ((klen (readui32 fp))
			      (vlen (readui32 fp))
			      (k1 (readstrn klen fp))
			      (v1 (readstrn vlen fp)))
			 ;(format t "k1=~s, v1=~s~%" k1 v1)
			 (when (string-equal k1 key)
			   (let ((value (if (read-it reader) (read-from-string v1) v1)))
			     (setf (gethash key0 (cache reader)) value)
			     (return-from getitem value)))))
		      )))
	    default)))))

; cdb-reader: get an iterator over items.
(defmethod getiter ((reader cdb-reader))
  (make-instance 'cdb-iterator
		 :read-it (read-it reader)
		 :file-object (file-object reader)
		 :end-of-data (end-of-data reader)))

; cdb iterator object.
(defclass cdb-iterator ()
  ((file-object :accessor file-object :initarg :file-object)
   (end-of-data :accessor end-of-data :initarg :end-of-data)
   (read-it :accessor read-it :initarg :read-it)
   (key-pos :accessor key-pos :initform 2048)
   ))

; cdb-iterator: get a next item.
(defmethod next ((iterator cdb-iterator))
  (and (< (key-pos iterator) (end-of-data iterator))
       (let ((fp (file-object iterator)))
	 (file-position fp (key-pos iterator))
	 (let* ((klen (readui32 fp))
		(vlen (readui32 fp))
		(k1 (readstrn klen fp))
		(v1 (readstrn vlen fp)))
	   ;(format t "k1=~s, v1=~s~%" k1 v1)
	   (setf (key-pos iterator) (+ 8 klen vlen (key-pos iterator)))
	   (cons k1 (if (read-it iterator) (read-from-string v1) v1))))))

; cdb-iterator: get items/keys/values. (too much Pythonic way?)
(defmethod getitems ((iterator cdb-iterator))
  (loop for i = (next iterator) while i collecting i))
(defmethod getkeys ((iterator cdb-iterator))
  (loop for i = (next iterator) while i collecting (car i)))
(defmethod getvalues ((iterator cdb-iterator))
  (loop for i = (next iterator) while i collecting (cdr i)))


;;  CDB Writer
;;
(defclass cdb-writer ()
  ((cdb-name :accessor cdb-name :initarg :cdb-name)
   (tmp-name :accessor tmp-name :initarg :tmp-name)
   (file-object :accessor file-object)
   (file-pos :accessor file-pos)
   (buckets :accessor buckets)
   ))

; cdb-writer: constructor.
(defmethod initialize-instance :after ((writer cdb-writer) &rest initargs)
  (declare (ignore initargs))
  (setf (file-object writer)
	(open (tmp-name writer)
	      :direction :output
	      :element-type 'unsigned-byte
	      :if-does-not-exist :create
	      :if-exists :error))
  (setf (file-pos writer) 2048)
  (setf (buckets writer) (make-array 256 :initial-element nil))
  )

; cdb-writer: add an item.
(defmethod add ((writer cdb-writer) k v)
  (let* ((k (if (stringp k) k (write-to-string k)))
	 (v (if (stringp v) v (write-to-string v)))
	 (fp (file-object writer))
	 (pos (file-pos writer))
	 (klen (length k))
	 (vlen (length v))
	 (h (hash k)))
    (file-position fp pos)
    (writeui32 fp klen)
    (writeui32 fp vlen)
    (writestr fp k)
    (writestr fp v)
    (push (cons h pos) (aref (buckets writer) (mod h 256)))
    (setf (file-pos writer) (+ 8 klen vlen pos))
    ))

; cdb-writer: finalizer/destructor.
(defmethod finish ((writer cdb-writer))
  (let ((fp (file-object writer))
	(hash-pos (file-pos writer)))
    ;(format t "fp=~a, hash-pos=~a~%" fp hash-pos)
    (file-position fp hash-pos)
    ; write the hashtables.
    (loop for b across (buckets writer) do
      (when b
	(let* ((ncells (* 2 (length b)))
	       (cells-hash (make-array ncells))
	       (cells-pos (make-array ncells :initial-element 0)))
	  ;(format t "b=~a, ncells=~a~%" b ncells)
	  (dolist (h-p b)
	    (let ((index (mod (truncate (car h-p) 256) ncells)))
	      (loop while (/= 0 (aref cells-pos index))
		    do (setf index (mod (1+ index) ncells)))
	      (setf (aref cells-hash index) (car h-p))
	      (setf (aref cells-pos index) (cdr h-p))))
	  (dotimes (index ncells)
	    (writeui32 fp (aref cells-hash index))
	    (writeui32 fp (aref cells-pos index)))
	  )))
    ; write the master table.
    (file-position fp 0)
    (loop for b across (buckets writer) do
      (writeui32 fp hash-pos)
      (writeui32 fp (* 2 (length b)))
      (setf hash-pos (+ (* 16 (length b)) hash-pos))
      )
    (close fp))
  ; rename: tmp-name -> cdb-name.
  (rename-file (tmp-name writer) (cdb-name writer))
  )

; cdb-writer: add multiple items at once.
(defmethod add-alist ((writer cdb-writer) alist)
  (dolist (i alist)
    (add writer (car i) (cdr i))))


;;  APIs
;;

; cdb-init: Returns a cdb object. 
;   Obtained values are converted into a lisp object if read-it is non-nil.
(defun cdb-init (cdb-name &key (read-it nil))
  (make-instance 'cdb-reader :cdb-name cdb-name :read-it read-it))

; with-cdb-init: a utility macro.
(defmacro with-cdb-init ((db . args) &rest body)
  `(let ((,db (cdb-init ,@args)))
    ,@body
    (cdb-close ,db))
  )

; getitems/getkeys/getvalues: returns a list of all items/keys/values.
(defmethod getitems ((reader cdb-reader))
  (getitems (getiter reader)))
(defmethod getkeys ((reader cdb-reader))
  (getkeys (getiter reader)))
(defmethod getvalues ((reader cdb-reader))
  (getvalues (getiter reader)))

; cdb-make: Returns a cdbmake object.
(defun cdb-make (cdb-name &optional tmp-name)
  (make-instance 'cdb-writer 
		 :cdb-name cdb-name
		 :tmp-name (or tmp-name (translate-pathname cdb-name "*.cdb" "*.tmp"))))

; with-cdb-make: a utility macro.
(defmacro with-cdb-make ((db . args) &rest body)
  `(let ((,db (cdb-make ,@args)))
    ,@body
    (finish ,db))
  )


;; testing
;;
(defun simple-test ()
  (with-cdb-make (db "/tmp/foo.cdb" "/tmp/foo.tmp")
    (add db "abc" 123)
    (add db "def" 456789)
    (add db "goo" 'baa)
    )
  (with-cdb-init (db "/tmp/foo.cdb" :read-it t)
    (flet ((testkey (k v0) 
	     (let ((v1 (getitem db k)))
	       (format t "test: k=~s, v=~s, expected=~s~%" k v1 v0)
	       (assert (equal v0 v1)))))
      (testkey "abc" 123)
      (testkey "def" 456789)
      (testkey "goo" 'baa)
      (testkey 123 nil))
    (format t "items: ~s~%" (getitems db))
    (format t "keys: ~s~%" (getkeys db))
    (format t "values: ~s~%" (getvalues db))
    )
  )
#+test-cdb
(cl-cdb::simple-test)
