[cmucl-commit] CMUCL commit: src/code (unidata.lisp)

Raymond Toy rtoy at common-lisp.net
Wed Feb 23 04:02:34 CET 2011


    Date: Tuesday, February 22, 2011 @ 22:02:34
  Author: rtoy
    Path: /project/cmucl/cvsroot/src/code

Modified: unidata.lisp

Fix bug where cmucl was no longer recognizing things like
#\latin_small_letter_a.  This failure is caused by the new
SEARCH-DICTIONARY function that does partial completion, and
UNICODE-NAME-TO-CODEPOINT function wan't aware of the new way.

We could change UNICODE-NAME-TO-CODEPOINT to do the appropriate thing
with the new way, but I (rtoy) decided it would be nice to have the
old function around too.  Hence, restore the old version and use it.


--------------+
 unidata.lisp |   61 +++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 12 deletions(-)


Index: src/code/unidata.lisp
diff -u src/code/unidata.lisp:1.23 src/code/unidata.lisp:1.24
--- src/code/unidata.lisp:1.23	Wed Sep 29 16:51:19 2010
+++ src/code/unidata.lisp	Tue Feb 22 22:02:33 2011
@@ -4,7 +4,7 @@
 ;;; This code was written by Paul Foley and has been placed in the public
 ;;; domain.
 ;;; 
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/code/unidata.lisp,v 1.23 2010-09-29 20:51:19 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/code/unidata.lisp,v 1.24 2011-02-23 03:02:33 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -18,7 +18,7 @@
 
 (defconstant +unidata-path+ #p"ext-formats:unidata.bin")
 
-(defvar *unidata-version* "$Revision: 1.23 $")
+(defvar *unidata-version* "$Revision: 1.24 $")
 
 (defstruct unidata
   range
@@ -339,6 +339,45 @@
 			      i))
 		     stack))))))))
 
+;; Like SEARCH-DICTIONARY, but we don't try to do partial matches.  We
+;; do an exact match on the given string.
+(defun exact-match-dictionary (string dictionary)
+  (declare (optimize (speed 3) (space 0) (safety 0)
+		     (ext:inhibit-warnings 3))
+	   (type string string) (type dictionary dictionary))
+  (let* ((codebook (dictionary-cdbk dictionary))
+	 (current 0)
+	 (posn 0)
+	 (stack '()))
+    (declare (type (unsigned-byte 32) current) (type lisp::index posn))
+    (loop
+      (let ((keyv (ash (aref (dictionary-nextv dictionary) current) -18)))
+	(dotimes (i (aref (dictionary-keyl dictionary) keyv)
+		    (if stack
+			(let ((next (pop stack)))
+			  (setq posn (car next) current (cdr next)))
+			(return-from exact-match-dictionary nil)))
+	  (let* ((str (aref codebook (aref (dictionary-keyv dictionary)
+					   (+ keyv i))))
+		 (len (length str)))
+	    (declare (type simple-base-string str))
+	    (when (and (>= (length string) (+ posn len))
+		       (string= string str :start1 posn :end1 (+ posn len)))
+	      (setq current
+		  (+ (logand (aref (dictionary-nextv dictionary) current)
+			     #x3FFFF)
+		     i))
+	      (when (= (incf posn len) (length string))
+		(return-from exact-match-dictionary current))
+	      (return))			; from DOTIMES - loop again
+	    (when (or (string= str " ") (string= str "-"))
+	      (push (cons posn
+			  (+ (logand (aref (dictionary-nextv dictionary)
+					   current)
+				     #x3FFFF)
+			     i))
+		    stack))))))))
+
 (defun search-range (code range)
   (declare (optimize (speed 3) (space 0) (safety 0))
 	   (type codepoint code) (type range range))
@@ -727,20 +766,18 @@
 	       nil)))
 	(t
 	 (unless (unidata-name+ *unicode-data*) (load-names))
-	 (let* ((names (unidata-name+ *unicode-data*)))
-	   (multiple-value-bind (n p)
-	       (search-dictionary name names)
-	     (when (and n (= p (length name)))
-	       (let ((cp (aref (dictionary-codev names) n)))
-		 (if (minusp cp) nil cp))))))))
+	 (let* ((names (unidata-name+ *unicode-data*))
+		(n (exact-match-dictionary name names)))
+	   (when n
+	     (let ((cp (aref (dictionary-codev names) n)))
+	       (if (minusp cp) nil cp)))))))
 
 (defun unicode-1.0-name-to-codepoint (name)
   (declare (type string name))
   (unless (unidata-name1+ *unicode-data*) (load-1.0-names))
-  (let* ((names (unidata-name1+ *unicode-data*)))
-    (multiple-value-bind (n p)
-	(search-dictionary name names)
-      (when (and n (= p (length name)))
+  (let* ((names (unidata-name1+ *unicode-data*))
+	 (n (exact-match-dictionary name names)))
+      (when n
 	(let ((cp (aref (dictionary-codev names) n)))
 	  (if (minusp cp) nil cp))))))
 


More information about the cmucl-commit mailing list