CMUCL commit: src/pcl/simple-streams/external-formats (10 files)
Raymond Toy
rtoy at common-lisp.net
Sat Jul 3 01:13:12 CEST 2010
Date: Friday, July 2, 2010 @ 19:13:12
Author: rtoy
Path: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats
Modified: ascii.lisp iso8859-2.lisp mac-roman.lisp utf-16-be.lisp
utf-16-le.lisp utf-16.lisp utf-32-be.lisp utf-32-le.lisp
utf-32.lisp utf-8.lisp
ascii.lisp:
o Forgot to add error parameter for code-to-octets and
octets-to-code.
o Call error handler for output errors.
iso8859-2.lisp:
mac-roman.lisp:
o If defined, call error handler for output errors.
utf-16.lisp:
utf-16-be.lisp:
utf-16-le.lisp:
o If defined, call error handler for input and output errors.
utf-32.lisp:
utf-32-be.lisp:
utf-32-le.lisp:
o If defined, call error handler for input and output errors.
(Previously, we didn't signal any output errors, but large
codepoints and surrogate characters are not allowed in UTF-32 output
streams.)
utf-8.lisp:
o Use lisp:codepoint-limit instead of #x10ffff.
----------------+
ascii.lisp | 18 +++++++++++++-----
iso8859-2.lisp | 8 ++++++--
mac-roman.lisp | 9 +++++++--
utf-16-be.lisp | 17 +++++++++++++----
utf-16-le.lisp | 19 ++++++++++++++-----
utf-16.lisp | 17 +++++++++++++----
utf-32-be.lisp | 30 ++++++++++++++++++++++++------
utf-32-le.lisp | 27 +++++++++++++++++++++------
utf-32.lisp | 20 ++++++++++++++++----
utf-8.lisp | 6 +++---
10 files changed, 130 insertions(+), 41 deletions(-)
Index: src/pcl/simple-streams/external-formats/ascii.lisp
diff -u src/pcl/simple-streams/external-formats/ascii.lisp:1.1 src/pcl/simple-streams/external-formats/ascii.lisp:1.2
--- src/pcl/simple-streams/external-formats/ascii.lisp:1.1 Tue Nov 10 10:31:43 2009
+++ src/pcl/simple-streams/external-formats/ascii.lisp Fri Jul 2 19:13:11 2010
@@ -4,16 +4,24 @@
;;; This code was written by Raymond Toy and has been placed in the public
;;; domain.
;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/ascii.lisp,v 1.1 2009-11-10 15:31:43 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/ascii.lisp,v 1.2 2010-07-02 23:13:11 rtoy Exp $")
(in-package "STREAM")
(define-external-format :ascii (:size 1)
()
- (octets-to-code (state input unput c)
+ (octets-to-code (state input unput error c)
`(let ((,c ,input))
- (values (if (< ,c #x80) ,c +replacement-character-code+)
+ (values (if (< ,c #x80)
+ ,c
+ (if ,error
+ (funcall ,error "Invalid octet #x~4,0X for ASCII" ,c 1)
+ +replacement-character-code+))
1))
- (code-to-octets (code state output)
- `(,output (if (> ,code #x7F) #x3F ,code))))
+ (code-to-octets (code state output error)
+ `(,output (if (> ,code #x7F)
+ (if ,error
+ (funcall ,error "Cannot output codepoint #x~X to ASCII stream" ,code)
+ #x3F)
+ ,code)))))
Index: src/pcl/simple-streams/external-formats/iso8859-2.lisp
diff -u src/pcl/simple-streams/external-formats/iso8859-2.lisp:1.3 src/pcl/simple-streams/external-formats/iso8859-2.lisp:1.4
--- src/pcl/simple-streams/external-formats/iso8859-2.lisp:1.3 Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/iso8859-2.lisp Fri Jul 2 19:13:11 2010
@@ -4,7 +4,7 @@
;;; This code was written by Paul Foley and has been placed in the public
;;; domain.
;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/iso8859-2.lisp,v 1.3 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/iso8859-2.lisp,v 1.4 2010-07-02 23:13:11 rtoy Exp $")
(in-package "STREAM")
@@ -30,4 +30,8 @@
`(,output (if (< ,code 160)
,code
(let ((,code (get-inverse ,itable ,code)))
- (if ,code (+ (the (unsigned-byte 7) ,code) 160) #x3F))))))
+ (if ,code
+ (+ (the (unsigned-byte 7) ,code) 160)
+ (if ,error
+ (funcall ,error "Cannot output codepoint #x~X to ISO8859-2 stream" ,code)
+ #x3F)))))))
Index: src/pcl/simple-streams/external-formats/mac-roman.lisp
diff -u src/pcl/simple-streams/external-formats/mac-roman.lisp:1.4 src/pcl/simple-streams/external-formats/mac-roman.lisp:1.5
--- src/pcl/simple-streams/external-formats/mac-roman.lisp:1.4 Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/mac-roman.lisp Fri Jul 2 19:13:11 2010
@@ -4,7 +4,7 @@
;;; This code was written by Paul Foley and has been placed in the public
;;; domain.
;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/mac-roman.lisp,v 1.4 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/mac-roman.lisp,v 1.5 2010-07-02 23:13:11 rtoy Exp $")
(in-package "STREAM")
@@ -35,4 +35,9 @@
`(,output (if (< ,code 128)
,code
(let ((,code (get-inverse ,itable ,code)))
- (if ,code (+ (the (unsigned-byte 7) ,code) 128) #x3F))))))
+ (if ,code
+ (+ (the (unsigned-byte 7) ,code) 128)
+ (if ,error
+ (funcall ,error "Cannot output codepoint #x~X to MAC-ROMAN stream"
+ ,code)
+ #x3F)))))))
Index: src/pcl/simple-streams/external-formats/utf-16-be.lisp
diff -u src/pcl/simple-streams/external-formats/utf-16-be.lisp:1.5 src/pcl/simple-streams/external-formats/utf-16-be.lisp:1.6
--- src/pcl/simple-streams/external-formats/utf-16-be.lisp:1.5 Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/utf-16-be.lisp Fri Jul 2 19:13:11 2010
@@ -4,7 +4,7 @@
;;; This code was written by Paul Foley and has been placed in the public
;;; domain.
;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-16-be.lisp,v 1.5 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-16-be.lisp,v 1.6 2010-07-02 23:13:11 rtoy Exp $")
(in-package "STREAM")
@@ -26,7 +26,10 @@
(setf ,code (+ (ash (- (the (integer #xd800 #xdbff) ,state) #xD800) 10)
,code #x2400)
,state nil)
- (setf ,code +replacement-character-code+)))
+ (setf ,code
+ (if ,error
+ (funcall ,error "Bare low surrogate #x~4,0X" ,code 2)
+ +replacement-character-code+))))
((lisp::surrogatep ,code :high)
;; Remember the high surrogate in case we bail out
;; reading the low surrogate (for octets-to-string.)
@@ -42,10 +45,16 @@
;; next time around?
(if (lisp::surrogatep ,next :low)
(setq ,code (+ (ash (- ,code #xD800) 10) ,next #x2400))
- (setf ,code +replacement-character-code+))))
+ (setf ,code
+ (if ,error
+ (funcall ,error "High surrogate followed by #x~4,0X instead of low surrogate" ,next ,wd)
+ +replacement-character-code+)))))
((= ,code #xFFFE)
;; Replace with REPLACEMENT CHARACTER.
- (setf ,code +replacement-character-code+))
+ (setf ,code
+ (if ,error
+ (funcall ,error "BOM is not valid within a UTF-16 stream" ,code ,wd)
+ +replacement-character-code+)))
(t (setf ,state nil)))
(values ,code 2)))
(code-to-octets (code state output error c c1 c2)
Index: src/pcl/simple-streams/external-formats/utf-16-le.lisp
diff -u src/pcl/simple-streams/external-formats/utf-16-le.lisp:1.5 src/pcl/simple-streams/external-formats/utf-16-le.lisp:1.6
--- src/pcl/simple-streams/external-formats/utf-16-le.lisp:1.5 Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/utf-16-le.lisp Fri Jul 2 19:13:11 2010
@@ -4,7 +4,7 @@
;;; This code was written by Paul Foley and has been placed in the public
;;; domain.
;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-16-le.lisp,v 1.5 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-16-le.lisp,v 1.6 2010-07-02 23:13:11 rtoy Exp $")
(in-package "STREAM")
@@ -28,7 +28,10 @@
(setf ,code (+ (ash (- (the (integer #xd800 #xdbff) ,state) #xD800) 10)
,code #x2400)
,state nil)
- (setf ,code +replacement-character-code+)))
+ (setf ,code
+ (if ,error
+ (funcall ,error "Bare low surrogate #x~4,0X" ,code 2)
+ +replacement-character-code+))))
((lisp::surrogatep ,code :high)
;; Remember the high surrogate in case we bail out
;; reading the low surrogate (for octets-to-string.)
@@ -43,10 +46,16 @@
;; next time around?
(if (lisp::surrogatep ,next :low)
(setq ,code (+ (ash (- ,code #xD800) 10) ,next #x2400))
- (setq ,code +replacement-character-code+))))
+ (setq ,code
+ (if ,error
+ (funcall ,error "High surrogate followed by #x~4,0X instead of low surrogate" ,next 2)
+ +replacement-character-code+)))))
((= ,code #xFFFE)
- ;; replace with REPLACEMENT CHARACTER ?
- (error "Illegal character U+FFFE in UTF-16 sequence."))
+ ;; replace with REPLACEMENT CHARACTER.
+ (setf ,code
+ (if ,error
+ (funcall ,error "BOM is not valid within a UTF-16 stream" ,code 2)
+ +replacement-character-code+)))
(t (setf ,state nil)))
(values ,code 2)))
(code-to-octets (code state output error c c1 c2)
Index: src/pcl/simple-streams/external-formats/utf-16.lisp
diff -u src/pcl/simple-streams/external-formats/utf-16.lisp:1.7 src/pcl/simple-streams/external-formats/utf-16.lisp:1.8
--- src/pcl/simple-streams/external-formats/utf-16.lisp:1.7 Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/utf-16.lisp Fri Jul 2 19:13:11 2010
@@ -1,7 +1,7 @@
;;; -*- Mode: LISP; Syntax: ANSI-Common-Lisp; Package: STREAM -*-
;;;
;;; **********************************************************************
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-16.lisp,v 1.7 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-16.lisp,v 1.8 2010-07-02 23:13:11 rtoy Exp $")
(in-package "STREAM")
@@ -55,7 +55,10 @@
10)
,code #x2400)
,state nil)
- (setf ,code +replacement-character-code+)))
+ (setf ,code
+ (if ,error
+ (funcall ,error "Bare low surrogate #x~4,0X" ,code 2)
+ +replacement-character-code+))))
((lisp::surrogatep ,code :high)
;; Save the high (leading) code in the state, in
;; case we fail to read the low (trailing)
@@ -76,14 +79,20 @@
(if (lisp::surrogatep ,next :low)
(setq ,code (+ (ash (- ,code #xD800) 10) ,next #x2400)
,wd 4)
- (setf ,code +replacement-character-code+))))
+ (setf ,code
+ (if ,error
+ (funcall ,error "High surrogate followed by #x~4,0X instead of low surrogate" ,next ,wd)
+ +replacement-character-code+)))))
((and (= ,code #xFFFE) (zerop ,st))
(setf (car ,state) 1) (go :again))
((and (= ,code #xFEFF) (zerop ,st))
(setf (car ,state) 2) (go :again))
((= ,code #xFFFE)
;; Replace with REPLACEMENT CHARACTER.
- (setf ,code +replacement-character-code+)))
+ (setf ,code
+ (if ,error
+ (funcall ,error "BOM is not valid within a UTF-16 stream" ,code ,wd)
+ +replacement-character-code+))))
(return (values ,code ,wd))))))
(code-to-octets (code state output error c c1 c2)
`(flet ((output (code)
Index: src/pcl/simple-streams/external-formats/utf-32-be.lisp
diff -u src/pcl/simple-streams/external-formats/utf-32-be.lisp:1.4 src/pcl/simple-streams/external-formats/utf-32-be.lisp:1.5
--- src/pcl/simple-streams/external-formats/utf-32-be.lisp:1.4 Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/utf-32-be.lisp Fri Jul 2 19:13:11 2010
@@ -4,7 +4,7 @@
;;; This code was written by Raymond Toy and has been placed in the public
;;; domain.
;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-32-be.lisp,v 1.4 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-32-be.lisp,v 1.5 2010-07-02 23:13:11 rtoy Exp $")
(in-package "STREAM")
@@ -22,13 +22,31 @@
,c4)))
(declare (type (unsigned-byte 8) ,c1 ,c2 ,c3 ,c4)
(optimize (speed 3)))
- (cond ((or (> ,c #x10ffff)
+ (cond ((or (>= ,c lisp:codepoint-limit)
(lisp::surrogatep ,c))
;; Surrogates are illegal. Use replacement character.
- (values +replacement-character-code+ 4))
+ (values (if ,error
+ (if (>= ,code lisp:codepoint-limit)
+ (funcall ,error "Illegal codepoint #x~4,0X" ,code 4)
+ (funcall ,error "Surrogate #x~4,0X not allowed in UTF32" ,code 4))
+ +replacement-character-code+)
+ 4))
(t
(values ,c 4)))))
- (code-to-octets (code state output error i)
- `(dotimes (,i 4)
- (,output (ldb (byte 8 (* 8 (- 3 ,i))) ,code)))))
+ (code-to-octets (code state output error c i)
+ `(flet ((out (,c)
+ ;; Big-endian output
+ (dotimes (,i 4)
+ (,output (ldb (byte 8 (* 8 (- 3 ,i))) ,c)))))
+ ;; Write BOM
+ (unless ,state
+ (out #xFEFF)
+ (setf ,state t))
+ (cond ((lisp:surrogatep ,code)
+ (out (if ,error
+ (funcall ,error "Surrogate code #x~4,0X is illegal for UTF32 output"
+ ,code)
+ +replacement-character-code+)))
+ (t
+ (out ,code))))))
Index: src/pcl/simple-streams/external-formats/utf-32-le.lisp
diff -u src/pcl/simple-streams/external-formats/utf-32-le.lisp:1.4 src/pcl/simple-streams/external-formats/utf-32-le.lisp:1.5
--- src/pcl/simple-streams/external-formats/utf-32-le.lisp:1.4 Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/utf-32-le.lisp Fri Jul 2 19:13:12 2010
@@ -4,7 +4,7 @@
;;; This code was written by Raymond Toy and has been placed in the public
;;; domain.
;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-32-le.lisp,v 1.4 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-32-le.lisp,v 1.5 2010-07-02 23:13:12 rtoy Exp $")
(in-package "STREAM")
@@ -22,13 +22,28 @@
(ash ,c4 24))))
(declare (type (unsigned-byte 8) ,c1 ,c2 ,c3 ,c4)
(optimize (speed 3)))
- (cond ((or (> ,c #x10ffff)
+ (cond ((or (>= ,c lisp:codepoint-limit)
(lisp::surrogatep ,c))
;; Surrogates are illegal. Use replacement character.
- (values +replacement-character-code+ 4))
+ (values (if ,error
+ (if (>= ,code lisp:codepoint-limit)
+ (funcall ,error "Illegal codepoint #x~4,0X" ,code 4)
+ (funcall ,error "Surrogate #x~4,0X not allowed in UTF32"
+ ,code 4))
+ +replacement-character-code+)
+ 4))
(t
(values ,c 4)))))
- (code-to-octets (code state output error i)
- `(dotimes (,i 4)
- (,output (ldb (byte 8 (* 8 ,i)) ,code)))))
+ (code-to-octets (code state output error c i)
+ `(flet ((out (,c)
+ ;; Little-endian output
+ (dotimes (,i 4)
+ (,output (ldb (byte 8 (* 8 ,i)) ,c)))))
+ (cond ((lisp:surrogatep ,code)
+ (out (if ,error
+ (funcall ,error "Surrogate code #x~4,0X is illegal for UTF32 output"
+ ,code)
+ +replacement-character-code+)))
+ (t
+ (out ,code))))))
Index: src/pcl/simple-streams/external-formats/utf-32.lisp
diff -u src/pcl/simple-streams/external-formats/utf-32.lisp:1.6 src/pcl/simple-streams/external-formats/utf-32.lisp:1.7
--- src/pcl/simple-streams/external-formats/utf-32.lisp:1.6 Wed Jun 30 00:02:53 2010
+++ src/pcl/simple-streams/external-formats/utf-32.lisp Fri Jul 2 19:13:12 2010
@@ -4,7 +4,7 @@
;;; This code was written by Raymond Toy and has been placed in the public
;;; domain.
;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-32.lisp,v 1.6 2010-06-30 04:02:53 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-32.lisp,v 1.7 2010-07-02 23:13:12 rtoy Exp $")
(in-package "STREAM")
@@ -54,12 +54,18 @@
(declare (type (integer 0 2) ,st)
(type (unsigned-byte 8) ,c1 ,c2 ,c3 ,c4)
(optimize (speed 3)))
- (cond ((or (> ,code #x10fff)
+ (cond ((or (>= ,code lisp:codepoint-limit)
(lisp::surrogatep ,code))
;; Surrogates are illegal and codepoints outside
;; the Unicode range are illegal. Use replacement
;; character.
- (setf ,code +replacement-character-code+))
+ (setf ,code
+ (if ,error
+ (if (>= ,code lisp:codepoint-limit)
+ (funcall ,error "Illegal codepoint #x~4,0X" ,code 4)
+ (funcall ,error "Surrogate #x~4,0X not allowed in UTF32"
+ ,code 4))
+ +replacement-character-code+)))
((and (zerop ,st) (= ,code #xFFFE0000))
;; BOM for little-endian
(setf ,state 1)
@@ -79,7 +85,13 @@
(unless ,state
(out #xFEFF)
(setf ,state t))
- (out code)))
+ (cond ((lisp:surrogatep ,code)
+ (out (if ,error
+ (funcall ,error "Surrogate code #x~4,0X is illegal for UTF32 output"
+ ,code)
+ +replacement-character-code+)))
+ (t
+ (out ,code)))))
nil
(copy-state (state)
;; The state is either NIL or T, so we can just return that.
Index: src/pcl/simple-streams/external-formats/utf-8.lisp
diff -u src/pcl/simple-streams/external-formats/utf-8.lisp:1.8 src/pcl/simple-streams/external-formats/utf-8.lisp:1.9
--- src/pcl/simple-streams/external-formats/utf-8.lisp:1.8 Fri Jul 2 12:36:01 2010
+++ src/pcl/simple-streams/external-formats/utf-8.lisp Fri Jul 2 19:13:12 2010
@@ -4,7 +4,7 @@
;;; This code was written by Paul Foley and has been placed in the public
;;; domain.
;;;
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-8.lisp,v 1.8 2010-07-02 16:36:01 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/pcl/simple-streams/external-formats/utf-8.lisp,v 1.9 2010-07-02 23:13:12 rtoy Exp $")
(in-package "STREAM")
@@ -47,7 +47,7 @@
;; encode to codepoints that don't need that long of a
;; sequence) and any surrogate values and any code
;; outside the 21-bit Unicode range.
- (if (or (> ,n #x10ffff)
+ (if (or (>= ,n lisp:codepoint-limit)
(<= ,n (the (member 127 1023 32767)
(svref #(127 1023 32767) (1- ,i)))) ; overlong
(lisp::surrogatep ,n)) ; surrogate
@@ -55,7 +55,7 @@
(,unput ,i)
(values (if ,error
(cond
- ((> ,n #x10ffff)
+ ((>= ,n lisp:codepoint-limit)
(funcall ,error "Invalid codepoint" nil nil))
((lisp::surrogatep ,n)
(funcall ,error "Invalid surrogate code #x~4,0X" ,n nil))
More information about the cmucl-commit
mailing list