CMUCL commit: RELEASE-20B-BRANCH src/code (unidata.lisp)
Raymond Toy
rtoy at common-lisp.net
Tue Sep 14 07:58:02 CEST 2010
Date: Tuesday, September 14, 2010 @ 01:58:02
Author: rtoy
Path: /project/cmucl/cvsroot/src/code
Tag: RELEASE-20B-BRANCH
Modified: unidata.lisp
UNICODE-NAME-TO-CODEPOINT was incorrectly accepting any value after
#\cjk_unified_ideograph-nnnn and returning the character whose code
was nnnn. This is wrong.
o Add a new function to check for valid ranges for CJK unified
ideographs.
o Use it in UNICODE-NAME-TO-CODEPOINT and UNICODE-NAME.
--------------+
unidata.lisp | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
Index: src/code/unidata.lisp
diff -u src/code/unidata.lisp:1.8 src/code/unidata.lisp:1.8.4.1
--- src/code/unidata.lisp:1.8 Tue Apr 20 13:57:45 2010
+++ src/code/unidata.lisp Tue Sep 14 01:58:01 2010
@@ -4,7 +4,7 @@
;;; This code was written by Paul Foley and has been placed in the public
;;; domain.
;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/code/unidata.lisp,v 1.8 2010-04-20 17:57:45 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/code/unidata.lisp,v 1.8.4.1 2010-09-14 05:58:01 rtoy Exp $")
;;;
;;; **********************************************************************
;;;
@@ -15,7 +15,7 @@
(defconstant +unidata-path+ #p"ext-formats:unidata.bin")
-(defvar *unidata-version* "$Revision: 1.8 $")
+(defvar *unidata-version* "$Revision: 1.8.4.1 $")
(defstruct unidata
range
@@ -635,6 +635,14 @@
(defvar *reverse-hangul-jungseong*)
(defvar *reverse-hangul-jongseong*)
+(defun valid-cjk-unified-ideograph-code-p (code)
+ ;; Look in UnicodeData.txt to find the range of the CJK Ideograph
+ ;; characters.
+ (or (<= #x3400 code #x4DB5) ; CJK Ideograph Extension A
+ (<= #x4E00 code #x9FC3) ; CJK Ideograph
+ (<= #x20000 code #x2A6D6) ; CJK Ideograph Extension B
+ ))
+
(defun unicode-name-to-codepoint (name)
(declare (type string name))
(cond ((and (> (length name) 22)
@@ -642,8 +650,10 @@
(string= name "CJKUNIFIED" :end1 10)))
(let* ((x (search "IDEOGRAPH" name))
(n (and x (position-if (lambda (x) (digit-char-p x 16)) name
- :start (+ x 9)))))
- (and n (values (parse-integer name :start n :radix 16)))))
+ :start (+ x 9))))
+ (code (and n (values (parse-integer name :start n :radix 16)))))
+ (when (and code (valid-cjk-unified-ideograph-code-p code))
+ code)))
((and (> (length name) 15)
(or (string= name "HANGUL SYLLABLE" :end1 15)
(string= name "HANGULSYLLABLE" :end1 14)))
@@ -736,9 +746,7 @@
s))))
(defun unicode-name (code)
- (cond ((or (<= #x3400 code #x4DB5) ; CJK Ideograph Extension A
- (<= #x4E00 code #x9FC3) ; CJK Ideograph
- (<= #x20000 code #x2A6D6)) ; CJK Ideograph Extension B
+ (cond ((valid-cjk-unified-ideograph-code-p code)
(format nil "CJK UNIFIED IDEOGRAPH-~4,'0X" code))
((<= #xAC00 code #xD7A3) ; Hangul Syllable
(apply #'concatenate 'string "HANGUL SYLLABLE "
More information about the cmucl-commit
mailing list