CMUCL commit: src/pcl/simple-streams/external-formats (utf-8.lisp)
Raymond Toy
rtoy at common-lisp.net
Wed Jul 7 15:43:12 CEST 2010
Date: Wednesday, July 7, 2010 @ 09:43:12
Author: rtoy
Path: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats
Modified: utf-8.lisp
o When encountering an overlong sequence, a surrogate code, or a
too-large codepoint, swallow the whole sequence instead of putting
back the characters like we did previously. (This new way is in
line with how we handle other bad sequences.)
o Pass the number of octets read to the error handler in these cases.
o Update the message strings to include the number of octets.
------------+
utf-8.lisp | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
Index: src/pcl/simple-streams/external-formats/utf-8.lisp
diff -u src/pcl/simple-streams/external-formats/utf-8.lisp:1.11 src/pcl/simple-streams/external-formats/utf-8.lisp:1.12
--- src/pcl/simple-streams/external-formats/utf-8.lisp:1.11 Tue Jul 6 00:46:35 2010
+++ src/pcl/simple-streams/external-formats/utf-8.lisp Wed Jul 7 09:43:12 2010
@@ -4,7 +4,7 @@
;;; This code was written by Paul Foley and has been placed in the public
;;; domain.
;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-8.lisp,v 1.11 2010-07-06 04:46:35 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-8.lisp,v 1.12 2010-07-07 13:43:12 rtoy Exp $")
(in-package "STREAM")
@@ -52,17 +52,19 @@
(svref #(127 2047 65535) (1- ,i)))) ; overlong
(lisp::surrogatep ,n)) ; surrogate
(progn
- (,unput ,i)
+ ;; Replace the entire sequence with the
+ ;; replacment character
(values (if ,error
(cond
((>= ,n lisp:codepoint-limit)
- (funcall ,error "Invalid codepoint #x~X" ,n nil))
+ (funcall ,error "Invalid codepoint #x~X of ~D octets"
+ ,n (1+ ,i)))
((lisp::surrogatep ,n)
- (funcall ,error "Invalid surrogate code #x~X" ,n nil))
+ (funcall ,error "Invalid surrogate code #x~X" ,n (1+ ,i)))
(t
- (funcall ,error "Overlong utf8 sequence" nil nil)))
+ (funcall ,error "Overlong utf8 sequence of ~*~D octets" nil (1+ ,i))))
+replacement-character-code+)
- 1))
+ (1+ ,i)))
(values ,n (1+ ,i)))))
(let ((,c ,input))
(declare (optimize (ext:inhibit-warnings 3)))
More information about the cmucl-commit
mailing list