CMUCL commit: src/pcl/simple-streams/external-formats (10 files)

Raymond Toy rtoy at common-lisp.net
Sat Jul 3 01:13:12 CEST 2010


    Date: Friday, July 2, 2010 @ 19:13:12
  Author: rtoy
    Path: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats

Modified: ascii.lisp iso8859-2.lisp mac-roman.lisp utf-16-be.lisp
          utf-16-le.lisp utf-16.lisp utf-32-be.lisp utf-32-le.lisp
          utf-32.lisp utf-8.lisp

ascii.lisp:
o Forgot to add error parameter for code-to-octets and
  octets-to-code. 
o Call error handler for output errors.

iso8859-2.lisp:
mac-roman.lisp:
o If defined, call error handler for output errors.

utf-16.lisp:
utf-16-be.lisp:
utf-16-le.lisp:
o If defined, call error handler for input and output errors.

utf-32.lisp:
utf-32-be.lisp:
utf-32-le.lisp:
o If defined, call error handler for input and output errors.
  (Previously, we didn't signal any output errors, but large
  codepoints and surrogate characters are not allowed in UTF-32 output
  streams.)

utf-8.lisp:
o Use lisp:codepoint-limit instead of #x10ffff.


----------------+
 ascii.lisp     |   18 +++++++++++++-----
 iso8859-2.lisp |    8 ++++++--
 mac-roman.lisp |    9 +++++++--
 utf-16-be.lisp |   17 +++++++++++++----
 utf-16-le.lisp |   19 ++++++++++++++-----
 utf-16.lisp    |   17 +++++++++++++----
 utf-32-be.lisp |   30 ++++++++++++++++++++++++------
 utf-32-le.lisp |   27 +++++++++++++++++++++------
 utf-32.lisp    |   20 ++++++++++++++++----
 utf-8.lisp     |    6 +++---
 10 files changed, 130 insertions(+), 41 deletions(-)


Index: src/pcl/simple-streams/external-formats/ascii.lisp
diff -u src/pcl/simple-streams/external-formats/ascii.lisp:1.1 src/pcl/simple-streams/external-formats/ascii.lisp:1.2
--- src/pcl/simple-streams/external-formats/ascii.lisp:1.1	Tue Nov 10 10:31:43 2009
+++ src/pcl/simple-streams/external-formats/ascii.lisp	Fri Jul  2 19:13:11 2010
@@ -4,16 +4,24 @@
 ;;; This code was written by Raymond Toy and has been placed in the public
 ;;; domain.
 ;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/ascii.lisp,v 1.1 2009-11-10 15:31:43 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/ascii.lisp,v 1.2 2010-07-02 23:13:11 rtoy Exp $")
 
 (in-package "STREAM")
 
 (define-external-format :ascii (:size 1)
   ()
-  (octets-to-code (state input unput c)
+  (octets-to-code (state input unput error c)
     `(let ((,c ,input))		  
-       (values (if (< ,c #x80) ,c +replacement-character-code+)
+       (values (if (< ,c #x80)
+		   ,c
+		   (if ,error
+		       (funcall ,error "Invalid octet #x~4,0X for ASCII" ,c 1)
+		       +replacement-character-code+))
 	       1))
-  (code-to-octets (code state output)
-    `(,output (if (> ,code #x7F) #x3F ,code))))
+  (code-to-octets (code state output error)
+    `(,output (if (> ,code #x7F)
+		  (if ,error
+		      (funcall ,error "Cannot output codepoint #x~X to ASCII stream" ,code)
+		      #x3F)
+		  ,code)))))
 
Index: src/pcl/simple-streams/external-formats/iso8859-2.lisp
diff -u src/pcl/simple-streams/external-formats/iso8859-2.lisp:1.3 src/pcl/simple-streams/external-formats/iso8859-2.lisp:1.4
--- src/pcl/simple-streams/external-formats/iso8859-2.lisp:1.3	Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/iso8859-2.lisp	Fri Jul  2 19:13:11 2010
@@ -4,7 +4,7 @@
 ;;; This code was written by Paul Foley and has been placed in the public
 ;;; domain.
 ;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/iso8859-2.lisp,v 1.3 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/iso8859-2.lisp,v 1.4 2010-07-02 23:13:11 rtoy Exp $")
 
 (in-package "STREAM")
 
@@ -30,4 +30,8 @@
     `(,output (if (< ,code 160)
 		  ,code
 		  (let ((,code (get-inverse ,itable ,code)))
-		    (if ,code (+ (the (unsigned-byte 7) ,code) 160) #x3F))))))
+		    (if ,code
+			(+ (the (unsigned-byte 7) ,code) 160)
+			(if ,error
+			    (funcall ,error "Cannot output codepoint #x~X to ISO8859-2 stream" ,code)
+			    #x3F)))))))
Index: src/pcl/simple-streams/external-formats/mac-roman.lisp
diff -u src/pcl/simple-streams/external-formats/mac-roman.lisp:1.4 src/pcl/simple-streams/external-formats/mac-roman.lisp:1.5
--- src/pcl/simple-streams/external-formats/mac-roman.lisp:1.4	Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/mac-roman.lisp	Fri Jul  2 19:13:11 2010
@@ -4,7 +4,7 @@
 ;;; This code was written by Paul Foley and has been placed in the public
 ;;; domain.
 ;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/mac-roman.lisp,v 1.4 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/mac-roman.lisp,v 1.5 2010-07-02 23:13:11 rtoy Exp $")
 
 (in-package "STREAM")
 
@@ -35,4 +35,9 @@
     `(,output (if (< ,code 128)
 		  ,code
 		  (let ((,code (get-inverse ,itable ,code)))
-		    (if ,code (+ (the (unsigned-byte 7) ,code) 128) #x3F))))))
+		    (if ,code
+			(+ (the (unsigned-byte 7) ,code) 128)
+			(if ,error
+			    (funcall ,error "Cannot output codepoint #x~X to MAC-ROMAN stream"
+				     ,code)
+			    #x3F)))))))
Index: src/pcl/simple-streams/external-formats/utf-16-be.lisp
diff -u src/pcl/simple-streams/external-formats/utf-16-be.lisp:1.5 src/pcl/simple-streams/external-formats/utf-16-be.lisp:1.6
--- src/pcl/simple-streams/external-formats/utf-16-be.lisp:1.5	Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/utf-16-be.lisp	Fri Jul  2 19:13:11 2010
@@ -4,7 +4,7 @@
 ;;; This code was written by Paul Foley and has been placed in the public
 ;;; domain.
 ;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-16-be.lisp,v 1.5 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-16-be.lisp,v 1.6 2010-07-02 23:13:11 rtoy Exp $")
 
 (in-package "STREAM")
 
@@ -26,7 +26,10 @@
 		  (setf ,code (+ (ash (- (the (integer #xd800 #xdbff) ,state) #xD800) 10)
 				 ,code #x2400)
 			,state nil)
-		  (setf ,code +replacement-character-code+)))
+		  (setf ,code
+			(if ,error
+			    (funcall ,error "Bare low surrogate #x~4,0X" ,code 2)
+			    +replacement-character-code+))))
 	     ((lisp::surrogatep ,code :high)
 	      ;; Remember the high surrogate in case we bail out
 	      ;; reading the low surrogate (for octets-to-string.)
@@ -42,10 +45,16 @@
 		;; next time around?
 		(if (lisp::surrogatep ,next :low)
 		    (setq ,code (+ (ash (- ,code #xD800) 10) ,next #x2400))
-		    (setf ,code +replacement-character-code+))))
+		    (setf ,code
+			  (if ,error
+			      (funcall ,error "High surrogate followed by #x~4,0X instead of low surrogate" ,next ,wd)
+			      +replacement-character-code+)))))
 	     ((= ,code #xFFFE)
 	      ;; Replace with REPLACEMENT CHARACTER.  
-	      (setf ,code +replacement-character-code+))
+	      (setf ,code
+		    (if ,error
+			(funcall ,error "BOM is not valid within a UTF-16 stream" ,code ,wd)
+			+replacement-character-code+)))
 	     (t (setf ,state nil)))
        (values ,code 2)))
   (code-to-octets (code state output error c c1 c2)
Index: src/pcl/simple-streams/external-formats/utf-16-le.lisp
diff -u src/pcl/simple-streams/external-formats/utf-16-le.lisp:1.5 src/pcl/simple-streams/external-formats/utf-16-le.lisp:1.6
--- src/pcl/simple-streams/external-formats/utf-16-le.lisp:1.5	Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/utf-16-le.lisp	Fri Jul  2 19:13:11 2010
@@ -4,7 +4,7 @@
 ;;; This code was written by Paul Foley and has been placed in the public
 ;;; domain.
 ;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-16-le.lisp,v 1.5 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-16-le.lisp,v 1.6 2010-07-02 23:13:11 rtoy Exp $")
 
 (in-package "STREAM")
 
@@ -28,7 +28,10 @@
 		  (setf ,code (+ (ash (- (the (integer #xd800 #xdbff) ,state) #xD800) 10)
 				 ,code #x2400)
 			,state nil)
-		  (setf ,code +replacement-character-code+)))
+		  (setf ,code
+			(if ,error
+			    (funcall ,error "Bare low surrogate #x~4,0X" ,code 2)
+			    +replacement-character-code+))))
 	     ((lisp::surrogatep ,code :high)
 	      ;; Remember the high surrogate in case we bail out
 	      ;; reading the low surrogate (for octets-to-string.)
@@ -43,10 +46,16 @@
 		;; next time around?
 		(if (lisp::surrogatep ,next :low)
 		    (setq ,code (+ (ash (- ,code #xD800) 10) ,next #x2400))
-		    (setq ,code +replacement-character-code+))))
+		    (setq ,code
+			  (if ,error
+			      (funcall ,error "High surrogate followed by #x~4,0X instead of low surrogate" ,next 2)
+			      +replacement-character-code+)))))
 	     ((= ,code #xFFFE)
-	      ;; replace with REPLACEMENT CHARACTER ?
-	      (error "Illegal character U+FFFE in UTF-16 sequence."))
+	      ;; replace with REPLACEMENT CHARACTER.
+	      (setf ,code
+		    (if ,error
+			(funcall ,error "BOM is not valid within a UTF-16 stream" ,code 2)
+			+replacement-character-code+)))
 	     (t (setf ,state nil)))
       (values ,code 2)))
   (code-to-octets (code state output error c c1 c2)
Index: src/pcl/simple-streams/external-formats/utf-16.lisp
diff -u src/pcl/simple-streams/external-formats/utf-16.lisp:1.7 src/pcl/simple-streams/external-formats/utf-16.lisp:1.8
--- src/pcl/simple-streams/external-formats/utf-16.lisp:1.7	Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/utf-16.lisp	Fri Jul  2 19:13:11 2010
@@ -1,7 +1,7 @@
 ;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
 ;;;
 ;;; **********************************************************************
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-16.lisp,v 1.7 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-16.lisp,v 1.8 2010-07-02 23:13:11 rtoy Exp $")
 
 (in-package "STREAM")
 
@@ -55,7 +55,10 @@
 					   10)
 				      ,code #x2400)
 			     ,state nil)
-		       (setf ,code +replacement-character-code+)))
+		       (setf ,code
+			     (if ,error
+				 (funcall ,error "Bare low surrogate #x~4,0X" ,code 2)
+				 +replacement-character-code+))))
 		  ((lisp::surrogatep ,code :high)
 		   ;; Save the high (leading) code in the state, in
 		   ;; case we fail to read the low (trailing)
@@ -76,14 +79,20 @@
 		     (if (lisp::surrogatep ,next :low)
 			 (setq ,code (+ (ash (- ,code #xD800) 10) ,next #x2400)
 			       ,wd 4)
-			 (setf ,code +replacement-character-code+))))
+			 (setf ,code
+			       (if ,error
+				   (funcall ,error "High surrogate followed by #x~4,0X instead of low surrogate" ,next ,wd)
+				   +replacement-character-code+)))))
 		  ((and (= ,code #xFFFE) (zerop ,st))
 		   (setf (car ,state) 1) (go :again))
 		  ((and (= ,code #xFEFF) (zerop ,st))
 		   (setf (car ,state) 2) (go :again))
 		  ((= ,code #xFFFE)
 		   ;; Replace with REPLACEMENT CHARACTER.  
-		   (setf ,code +replacement-character-code+)))
+		   (setf ,code
+			 (if ,error
+			     (funcall ,error "BOM is not valid within a UTF-16 stream" ,code ,wd)
+			     +replacement-character-code+))))
 	    (return (values ,code ,wd))))))
   (code-to-octets (code state output error c c1 c2)
     `(flet ((output (code)
Index: src/pcl/simple-streams/external-formats/utf-32-be.lisp
diff -u src/pcl/simple-streams/external-formats/utf-32-be.lisp:1.4 src/pcl/simple-streams/external-formats/utf-32-be.lisp:1.5
--- src/pcl/simple-streams/external-formats/utf-32-be.lisp:1.4	Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/utf-32-be.lisp	Fri Jul  2 19:13:11 2010
@@ -4,7 +4,7 @@
 ;;; This code was written by Raymond Toy and has been placed in the public
 ;;; domain.
 ;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-32-be.lisp,v 1.4 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-32-be.lisp,v 1.5 2010-07-02 23:13:11 rtoy Exp $")
 
 (in-package "STREAM")
 
@@ -22,13 +22,31 @@
 		   ,c4)))
        (declare (type (unsigned-byte 8) ,c1 ,c2 ,c3 ,c4)
 		(optimize (speed 3)))
-       (cond ((or (> ,c #x10ffff)
+       (cond ((or (>= ,c lisp:codepoint-limit)
 		  (lisp::surrogatep ,c))
 	      ;; Surrogates are illegal.  Use replacement character.
-	      (values +replacement-character-code+ 4))
+	      (values (if ,error
+			  (if (>= ,code lisp:codepoint-limit)
+			      (funcall ,error "Illegal codepoint #x~4,0X" ,code 4)
+			      (funcall ,error "Surrogate #x~4,0X not allowed in UTF32" ,code 4))
+			  +replacement-character-code+)
+		      4))
 	     (t
 	      (values ,c 4)))))
 
-  (code-to-octets (code state output error i)
-    `(dotimes (,i 4)
-       (,output (ldb (byte 8 (* 8 (- 3 ,i))) ,code)))))
+  (code-to-octets (code state output error c i)
+    `(flet ((out (,c)
+	      ;; Big-endian output
+	      (dotimes (,i 4)
+		(,output (ldb (byte 8 (* 8 (- 3 ,i))) ,c)))))
+       ;; Write BOM
+       (unless ,state
+	 (out #xFEFF)
+	 (setf ,state t))
+       (cond ((lisp:surrogatep ,code)
+	      (out (if ,error
+		       (funcall ,error "Surrogate code #x~4,0X is illegal for UTF32 output"
+				,code)
+		       +replacement-character-code+)))
+	     (t
+	      (out ,code))))))
Index: src/pcl/simple-streams/external-formats/utf-32-le.lisp
diff -u src/pcl/simple-streams/external-formats/utf-32-le.lisp:1.4 src/pcl/simple-streams/external-formats/utf-32-le.lisp:1.5
--- src/pcl/simple-streams/external-formats/utf-32-le.lisp:1.4	Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/utf-32-le.lisp	Fri Jul  2 19:13:12 2010
@@ -4,7 +4,7 @@
 ;;; This code was written by Raymond Toy and has been placed in the public
 ;;; domain.
 ;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-32-le.lisp,v 1.4 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-32-le.lisp,v 1.5 2010-07-02 23:13:12 rtoy Exp $")
 
 (in-package "STREAM")
 
@@ -22,13 +22,28 @@
 		   (ash ,c4 24))))
        (declare (type (unsigned-byte 8) ,c1 ,c2 ,c3 ,c4)
 		(optimize (speed 3)))
-       (cond ((or (> ,c #x10ffff)
+       (cond ((or (>= ,c lisp:codepoint-limit)
 		  (lisp::surrogatep ,c))
 	      ;; Surrogates are illegal.  Use replacement character.
-	      (values +replacement-character-code+ 4))
+	      (values (if ,error
+			  (if (>= ,code lisp:codepoint-limit)
+			      (funcall ,error "Illegal codepoint #x~4,0X" ,code 4)
+			      (funcall ,error "Surrogate #x~4,0X not allowed in UTF32"
+				       ,code 4))
+			  +replacement-character-code+)
+		      4))
 	     (t
 	      (values ,c 4)))))
 
-  (code-to-octets (code state output error i)
-    `(dotimes (,i 4)
-       (,output (ldb (byte 8 (* 8 ,i)) ,code)))))
+  (code-to-octets (code state output error c i)
+    `(flet ((out (,c)
+	      ;; Little-endian output
+	      (dotimes (,i 4)
+		(,output (ldb (byte 8 (* 8 ,i)) ,c)))))
+       (cond ((lisp:surrogatep ,code)
+	      (out (if ,error
+		       (funcall ,error "Surrogate code #x~4,0X is illegal for UTF32 output"
+				,code)
+		       +replacement-character-code+)))
+	     (t
+	      (out ,code))))))
Index: src/pcl/simple-streams/external-formats/utf-32.lisp
diff -u src/pcl/simple-streams/external-formats/utf-32.lisp:1.6 src/pcl/simple-streams/external-formats/utf-32.lisp:1.7
--- src/pcl/simple-streams/external-formats/utf-32.lisp:1.6	Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/utf-32.lisp	Fri Jul  2 19:13:12 2010
@@ -4,7 +4,7 @@
 ;;; This code was written by Raymond Toy and has been placed in the public
 ;;; domain.
 ;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-32.lisp,v 1.6 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-32.lisp,v 1.7 2010-07-02 23:13:12 rtoy Exp $")
 
 (in-package "STREAM")
 
@@ -54,12 +54,18 @@
 	    (declare (type (integer 0 2) ,st)
 		     (type (unsigned-byte 8) ,c1 ,c2 ,c3 ,c4)
 		     (optimize (speed 3)))
-	    (cond ((or (> ,code #x10fff)
+	    (cond ((or (>= ,code lisp:codepoint-limit)
 		       (lisp::surrogatep ,code))
 		   ;; Surrogates are illegal and codepoints outside
 		   ;; the Unicode range are illegal.  Use replacement
 		   ;; character.
-		   (setf ,code +replacement-character-code+))
+		   (setf ,code
+			 (if ,error
+			     (if (>= ,code lisp:codepoint-limit)
+				 (funcall ,error "Illegal codepoint #x~4,0X" ,code 4)
+				 (funcall ,error "Surrogate #x~4,0X not allowed in UTF32"
+					  ,code 4))
+			     +replacement-character-code+)))
 		  ((and  (zerop ,st) (= ,code #xFFFE0000))
 		   ;; BOM for little-endian
 		   (setf ,state 1)
@@ -79,7 +85,13 @@
        (unless ,state
 	 (out #xFEFF)
 	 (setf ,state t))
-       (out code)))
+       (cond ((lisp:surrogatep ,code)
+	      (out (if ,error
+		       (funcall ,error "Surrogate code #x~4,0X is illegal for UTF32 output"
+				,code)
+		       +replacement-character-code+)))
+	     (t
+	      (out ,code)))))
   nil
   (copy-state (state)
     ;; The state is either NIL or T, so we can just return that.
Index: src/pcl/simple-streams/external-formats/utf-8.lisp
diff -u src/pcl/simple-streams/external-formats/utf-8.lisp:1.8 src/pcl/simple-streams/external-formats/utf-8.lisp:1.9
--- src/pcl/simple-streams/external-formats/utf-8.lisp:1.8	Fri Jul  2 12:36:01 2010
+++ src/pcl/simple-streams/external-formats/utf-8.lisp	Fri Jul  2 19:13:12 2010
@@ -4,7 +4,7 @@
 ;;; This code was written by Paul Foley and has been placed in the public
 ;;; domain.
 ;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-8.lisp,v 1.8 2010-07-02 16:36:01 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-8.lisp,v 1.9 2010-07-02 23:13:12 rtoy Exp $")
 
 (in-package "STREAM")
 
@@ -47,7 +47,7 @@
 		;; encode to codepoints that don't need that long of a
 		;; sequence) and any surrogate values and any code
 		;; outside the 21-bit Unicode range.
-		(if (or (> ,n #x10ffff)
+		(if (or (>= ,n lisp:codepoint-limit)
 			(<= ,n (the (member 127 1023 32767)
 				 (svref #(127 1023 32767) (1- ,i)))) ; overlong
 			(lisp::surrogatep ,n)) ; surrogate
@@ -55,7 +55,7 @@
 		      (,unput ,i)
 		      (values (if ,error
 				  (cond
-				    ((> ,n #x10ffff)
+				    ((>= ,n lisp:codepoint-limit)
 				     (funcall ,error "Invalid codepoint" nil nil))
 				    ((lisp::surrogatep ,n)
 				     (funcall ,error "Invalid surrogate code #x~4,0X" ,n nil))



More information about the cmucl-commit mailing list