
;;; psyk, a learning conversation program

(use-modules (srfi srfi-1))
(define random-integer random)
(define (union set1 set2) (lset-union equal? set1 set2))

;; global parameters

(define *used-penalty* 1/5)
(define *react-threshold* 1/3)
(define *short-phrase-penalty* 9)
(define *context-history-length* 3)

;; generic helpers

(define (false-protect f arg) (if arg (f arg) #f))
(define (between n min max) (and (<= min n) (< n max)))

(define (maybe-utf8->latin1 str)
  ; return latin1 of str if str is utf8 convertible to latin1.
  ; WARNING: dependency on char->integer returning the byte values.
  (define (byte-at index) (char->integer (string-ref str index)))
  (let loop ((index 0) (converted '()))
    (cond ((>= index (string-length str)) (list->string (reverse converted)))
	  ((< (byte-at index) 128)
	   (loop (+ index 1) (cons (string-ref str index) converted)))
	  ((and (< index (+ 1 (string-length str)))
		(between (byte-at index) 194 196)
		(between (byte-at (+ 1 index)) 128 192))
	   (loop (+ index 2)
		 (cons (integer->char (+ (byte-at (+ 1 index))
					 (* 64 (- (byte-at index) 194))))
		       converted)))
	  (else str))))

;; btree is used by used-count

(define (make-btree) (make-hash-table 641))
(define btree-get hash-ref)
(define btree-set! hash-set!)
(define (btree-sum h)
  (hash-fold (lambda (key val acc) (+ val acc)) 0 h))

;; parsing support

(define (my-char-alphabetic? c)
  (or (char-alphabetic? c)
      (memv c '(#\å #\ä #\ö #\Å #\Ä #\Ö))))

(define (drop-until pred list)
  (drop-while (lambda (el) (not (pred el))) list))

(define (skip-garbage clist) (drop-until my-char-alphabetic? clist))
(define (prefix-word clist) (take-while my-char-alphabetic? clist))
(define (skip-word clist) (drop-while my-char-alphabetic? clist))
(define (skip-whitespace clist) (drop-while char-whitespace? clist))

;; conversation parsing

(define (my-char-downcase c)
  (case c ((#\Å) #\å) ((#\Ä) #\ä) ((#\ö) #\ö) (else (char-downcase c))))

(define (words clause)
  (let ((clause (skip-garbage clause)))
    (if (null? clause) '()
        (cons (prefix-word clause) (words (skip-word clause))))))

(define (word-symbols clause)
  (map string->symbol (map list->string (words (map my-char-downcase clause)))))

;; context matching, the functional core of psyk

(define (common-prefix-length ls1 ls2)
  (if (or (null? ls1) (null? ls2) (not (eq? (car ls1) (car ls2)))) 0
    (+ 1 (common-prefix-length (cdr ls1) (cdr ls2)))))

(define (longest-prefix-match needle haystack)
  (let ((match (memq (car needle) haystack)))
    (if (not match) 0
      (max (common-prefix-length needle match)
	   (longest-prefix-match needle (cdr match))))))

(define (common-subseq-length-square-sum ls1 ls2)
  (if (null? ls1) 0
    (let ((match-len (longest-prefix-match ls1 ls2)))
      (if (= 0 match-len)
	(common-subseq-length-square-sum (cdr ls1) ls2)
	(+ (* match-len match-len)
	   (common-subseq-length-square-sum (drop ls1 match-len) ls2))))))

(define (list-similarity ls1 ls2)
  (/ (common-subseq-length-square-sum ls1 ls2)
     (+ *short-phrase-penalty* (* (length ls1) (length ls2)))))

(define (context-similarity c1 c2)
  (if (or (null? c1) (null? c2)) 0
    (let* ((score (list-similarity (car c1) (car c2))))
      (for-each
        (lambda (p1 p2)
	  (set! score (* score (+ 1 (list-similarity p1 p2)))))
	(cdr c1) (cdr c2))
      score)))

;; reuse prevention

(define *used* (make-btree))

(define (used-count str) (btree-get *used* str 0))

(define (mark-used! str)
  (btree-set! *used* str (+ 1 (used-count str))))

;; answer database handling

(define *answers* '((() "Hmh?")))

(define (save-answers! file)
  (let ((db (open-output-file file)))
    (display "(" db) (newline db)
    (for-each (lambda (rule) (write rule db) (newline db)) *answers*)
    (display ")" db) (newline db))
  (display "(saved answer database.)") (newline))

(define (load-answers! file)
  (set! *answers* (read (open-input-file file)))
  (display "(loaded answer database.)") (newline))

(define (merge-answers! file)
  (let ((new-answers (read (open-input-file file))))
    (set! *answers* (union new-answers *answers*))
    (display "(merged answers from file.)") (newline)))

;; best answer searching

(define *last-max-score* 0)

(define (filter-max-scores scoring ls)
  (let loop ((max-score 0) (found '()) (ls ls))
    (if (null? ls) (begin (set! *last-max-score* max-score) found)
      (let* ((elem (car ls))
	     (score (scoring elem)))
	(loop (max score max-score)
	      (cond ((> score max-score) (list elem))
		    ((= score max-score) (cons elem found))
		    (else found))
	      (cdr ls))))))

(define (find-best-answers context)
  (filter-max-scores
    (lambda (rule)
      (- (context-similarity context (car rule))
	 (* *used-penalty* (used-count (cadr rule)))))
    *answers*))

;; context handling helpers

(define (make-context utterance)
  (if (string? utterance)
    (make-context (string->list (maybe-utf8->latin1 utterance)))
    (list (word-symbols utterance))))

(define (update-context new-context old-context)
  (let* ((context (append new-context old-context))
	 (len (min *context-history-length* (length context))))
    (take context len)))

;; user interface stuff

(define *answer-history* '())

(define (random-choice ls)
  (let ((len (length ls)))
    (and (> len 0) (list-ref ls (random-integer len)))))

(define (get-answer context)
  (let ((answer (or (random-choice (find-best-answers context)) '(() "..."))))
    (set! *answer-history* (cons answer *answer-history*))
    (cadr answer)))

(define (learn! phrase context)
  (if (pair? phrase) (learn! (list->string phrase) context)
    (begin
      (set! *answers* (cons (list context (maybe-utf8->latin1 phrase))
			    *answers*))
      (mark-used! phrase))))

;; special commands

(define *saved-answer-history* '())

(define (take-answer-history)
  (let ((last (take *answer-history* (min 3 (length *answer-history*)))))
    (set! *saved-answer-history* last)
    last))

(define (delete-answer num)
  (let ((answer (list-ref *saved-answer-history* num)))
    (set! *answers* (delete! answer *answers* eq?))
    `(deleted ,answer)))

(define special-command-handlers
  `(("help"
     ,(lambda (arg)
	(for-each (lambda (line) (display line) (newline))
	     '( "Commands available (note the hash mark):"
		"#help - display this help"
		"#save <file> - save answer database in <file>"
		"#quit - end the discussion"
		"#load <file> - load new answer database from <file>"
		"#merge <file> - merge in answer database from <file>"
		"#history <length> - show a short learning history"
		"#test <clause> - show all best answers for <clause>"
	        "#penalty <number> - penalty of reusing answers"
		"#debug - show debug information about internal state"))
	#t))
    ("quit" ,(lambda (arg) (exit)))
    ("save" ,(lambda (arg) (save-answers! (list->string arg)) #t))
    ("load" ,(lambda (arg) (load-answers! (list->string arg)) #t))
    ("merge" ,(lambda (arg) (merge-answers! (list->string arg)) #t))
    ("penalty"
     ,(lambda (arg)
	(cond ((null? arg)
	       (display "Currently ") (display *used-penalty*) (newline))
	      (else (set! *used-penalty* (string->number (list->string arg)))))
	#t))
    ("history"
     ,(lambda (arg)
        (display "Learning history: ") (newline)
        (for-each (lambda (item) (write item) (newline))
	     (take *answers* (string->number (list->string arg))))
	(newline) #t))
    ("test"
     ,(lambda (clause)
        (display "Best answers: ")
        (write (find-best-answers (make-context clause)))
        (newline) #t))
    ("debug"
     ,(lambda (arg)
        (display "#answers ") (display (length *answers*))
        (display " #used ") (display (btree-sum *used*))
        (display " #last-score ") (display *last-max-score*)
        (newline) #t))))

