CMUCL commit: src/code (unidata.lisp)

Mon Sep 20 01:07:46 CEST 2010

Date: Sunday, September 19, 2010 @ 19:07:46
  Author: rtoy
    Path: /project/cmucl/cvsroot/src/code

Modified: unidata.lisp

o Move %STR, %STRX and %MATCH around so that we can inline them
  (because they're so simple).
o Add some comments for %STR.
o Change implementation of %MATCH to be simpler and add comments on
  why we do what we do and explain what happens if we don't.
o Handle completion of Hangul syllables better:
  - Match "Hangul_S" instead of "Hangul_Syllable" because there's
    #\Hangul_Single_Dot_Tone_Mark. 
  - If we match "Hangul_S", try to complete some Hangul syllables so
    we don't fool slime into thinking "Hangul_Syllable_" is the only
    completion.  There are obviously more.
o Handle completion of CJK Unified Ideographs better by trying to
  complete more so slime isn't fooled into thinking
  "CJK_Unified_Ideograph-" is the only possible completion.


--------------+
 unidata.lisp |   91 +++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 63 insertions(+), 28 deletions(-)


Index: src/code/unidata.lisp
diff -u src/code/unidata.lisp:1.18 src/code/unidata.lisp:1.19

--- src/code/unidata.lisp:1.18	Sat Sep 18 22:37:10 2010
+++ src/code/unidata.lisp	Sun Sep 19 19:07:46 2010
@@ -4,7 +4,7 @@
 ;;; This code was written by Paul Foley and has been placed in the public
 ;;; domain.
 ;;; 
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/code/unidata.lisp,v 1.18 2010-09-19 02:37:10 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/code/unidata.lisp,v 1.19 2010-09-19 23:07:46 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -18,7 +18,7 @@
 
 (defconstant +unidata-path+ #p"ext-formats:unidata.bin")
 
-(defvar *unidata-version* "$Revision: 1.18 $")
+(defvar *unidata-version* "$Revision: 1.19 $")
 
 (defstruct unidata
   range
@@ -1132,7 +1132,43 @@
 (defvar *cjk-unified-ideograph-dictionary* nil
   "Dictionary of CJK Unified ideographs")
 
-;;
+;; Convert the string into the form we want for character names.
+;; Basically the Unicode name has spaces replaced by underscores, and
+;; the result is capitalized.
+(declaim (inline %str %strx))
+(defun %str (x)
+  (nsubstitute #\_ #\Space (string-capitalize x)))
+
+(defun %strx (x)
+  (%str (car x)))
+
+(declaim (inline %match))
+#+(or)
+(defun %match (part prefix posn)
+  (and (>= (length part) (- (length prefix) posn))
+       (string= part prefix :start2 posn :end1 (- (length prefix) posn))))
+
+#+(or)
+(defun %match (part prefix posn)
+  (let ((s1 (search part prefix :start2 posn))
+	(s2 (search prefix part :start1 posn)))
+    (or (and s1 (= s1 posn))
+	(and s2 (zerop s2)))))
+
+;; Test if the string PART matches the string PREFIX starting from
+;; position POSN.  Basically test that the initial parts of the
+;; strings match each other exactly.  For if the prefix is "BO", then
+;; both "B" and "BOX" should match.  (This is needed to get the
+;; completion of "cjk_radical_bo" to match "cjk_radical_box" as well
+;; as "cjk_radical_bone" and others because at one point in the
+;; algorithm the part is "B", which we do want to match "BO" so that
+;; we can get the possible completions BONE" and "BOLT OF CLOTH".
+(defun %match (part prefix posn)
+  (let ((len (min (length part)
+		  (- (length prefix) posn))))
+    (string= part prefix :end1 len :start2 posn :end2 (+ posn len))))
+
+
 (defun unicode-complete-name (prefix
 			      &optional (dict (unidata-name+
 					       *unicode-data*)))
@@ -1270,36 +1306,35 @@
       (when completep
 	(push prefix-match names))
       ;; Match prefix against Hangul and/or Hangul_syllable
-      (cond ((or (string= prefix-match "Hangul_")
-		 (search "Hangul_Syllable_" prefix-match :end1 (min 16 (length prefix-match))))
-	     ;; Add syllable as possible completion
-	     (push "Hangul_Syllable_" names))
+      (cond ((search "Hangul_S" prefix-match
+		     :end1 (min 8 (length prefix-match)))
+	     ;; Add syllable as possible completion, and then try to
+	     ;; complete some more so that we don't end up with slime
+	     ;; saying "Hangul_Syllable_" is the only completion.
+	     (multiple-value-bind (m suffixes)
+		 (unicode-complete-name (subseq prefix-match (min 16 (length prefix-match)))
+					*hangul-syllable-dictionary*)
+	       (declare (ignore m))
+	       (if suffixes
+		   (loop for n in suffixes
+		      do (push (concatenate 'string "Hangul_Syllable_" n) names))
+		   (push "Hangul_Syllable_" names))))
 	    ((or ;;(string= prefix-match "Cjk_")
-	      (search "Cjk_Unified_Ideograph-" prefix-match :end1 (min 22 (length prefix-match))))
-	     ;; Add Unified
-	     (push "Cjk_Unified_Ideograph-" names)))
+	      (search "Cjk_Unified_Ideograph-" prefix-match
+		      :end1 (min 22 (length prefix-match))))
+	     ;; Try to complete the first part so we don't get
+	     ;; "Cjk_Unified_Ideograph-" as the only completion.
+	     (multiple-value-bind (m suffixes)
+		 (unicode-complete-name (subseq prefix-match (min 22 (length prefix-match)))
+					*cjk-unified-ideograph-dictionary*)
+	       (declare (ignore m))
+	       (loop for n in suffixes
+		    do (push (concatenate 'string "Cjk_Unified_Ideograph-" n) names)))))
       (setf names (mapcar #'string-capitalize names))
       ;;(format t "Final names = ~S~%" names)
       names)))
 
-;; Convert the string into the form we want for character names.
-(defun %str (x)
-  (nsubstitute #\_ #\Space (string-capitalize x)))
-
-(defun %strx (x)
-  (%str (car x)))
-
-#+(or)
-(defun %match (part prefix posn)
-  (and (>= (length part) (- (length prefix) posn))
-       (string= part prefix :start2 posn :end1 (- (length prefix) posn))))
-
-(defun %match (part prefix posn)
-  (let ((s1 (search part prefix :start2 posn))
-	(s2 (search prefix part :start1 posn)))
-    (or (and s1 (= s1 posn))
-	(and s2 (zerop s2)))))
-
+;; Find the longest initial substring of the STRINGS.
 (defun %mip (strings)
   (let* ((first (first strings))
 	 (posn (length first)))