CMUCL commit: src/pcl/simple-streams/external-formats (utf-8.lisp)

Raymond Toy rtoy at
Wed Jul 7 15:43:12 CEST 2010

    Date: Wednesday, July 7, 2010 @ 09:43:12
  Author: rtoy
    Path: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats

Modified: utf-8.lisp

o When encountering an overlong sequence, a surrogate code, or a
  too-large codepoint, swallow the whole sequence instead  of putting
  back the characters like we did previously.  (This new way is in
  line with how we handle other bad sequences.)
o Pass the number of octets read to the error handler in these cases.
o Update the message strings to include the number of octets.

 utf-8.lisp |   14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

Index: src/pcl/simple-streams/external-formats/utf-8.lisp
diff -u src/pcl/simple-streams/external-formats/utf-8.lisp:1.11 src/pcl/simple-streams/external-formats/utf-8.lisp:1.12
--- src/pcl/simple-streams/external-formats/utf-8.lisp:1.11	Tue Jul  6 00:46:35 2010
+++ src/pcl/simple-streams/external-formats/utf-8.lisp	Wed Jul  7 09:43:12 2010
@@ -4,7 +4,7 @@
 ;;; This code was written by Paul Foley and has been placed in the public
 ;;; domain.
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-8.lisp,v 1.11 2010-07-06 04:46:35 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-8.lisp,v 1.12 2010-07-07 13:43:12 rtoy Exp $")
 (in-package "STREAM")
@@ -52,17 +52,19 @@
 				 (svref #(127 2047 65535) (1- ,i)))) ; overlong
 			(lisp::surrogatep ,n)) ; surrogate
-		      (,unput ,i)
+		      ;; Replace the entire sequence with the
+		      ;; replacment character
 		      (values (if ,error
 				    ((>= ,n lisp:codepoint-limit)
-				     (funcall ,error "Invalid codepoint #x~X" ,n nil))
+				     (funcall ,error "Invalid codepoint #x~X of ~D octets"
+					      ,n (1+ ,i)))
 				    ((lisp::surrogatep ,n)
-				     (funcall ,error "Invalid surrogate code #x~X" ,n nil))
+				     (funcall ,error "Invalid surrogate code #x~X" ,n (1+ ,i)))
-				     (funcall ,error "Overlong utf8 sequence" nil nil)))
+				     (funcall ,error "Overlong utf8 sequence of ~*~D octets" nil (1+ ,i))))
-			      1))
+			      (1+ ,i)))
 		    (values ,n (1+ ,i)))))
       (let ((,c ,input))
 	(declare (optimize (ext:inhibit-warnings 3)))

More information about the cmucl-commit mailing list