[cmucl-commit] [git] CMU Common Lisp branch master updated. snapshot-2012-04-6-ga602748
Raymond Toy
rtoy at common-lisp.net
Fri Apr 20 16:36:08 UTC 2012
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "CMU Common Lisp".
The branch, master has been updated
via a60274829b62c938292920f8f19b1c269240e8c2 (commit)
via 99d39125b83f6173912c2a689e41db4c40096061 (commit)
from 9f6b065b0ea4b758a00fa9b3b790b5ef2bb8853d (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit a60274829b62c938292920f8f19b1c269240e8c2
Author: Raymond Toy <toy.raymond at gmail.com>
Date: Fri Apr 20 09:35:43 2012 -0700
Oops. Debugging print accidentally left in.
diff --git a/src/code/stream.lisp b/src/code/stream.lisp
index a91d4d7..b5d34ad 100644
--- a/src/code/stream.lisp
+++ b/src/code/stream.lisp
@@ -829,6 +829,7 @@
(flet
((convert-buffer ()
(let ((old-state (fd-stream-oc-state stream)))
+ #+(or debug-frc-sr)
(format t "old-state = ~S~%" old-state)
(multiple-value-bind (s char-count octet-count new-state)
(stream::octets-to-string-counted
commit 99d39125b83f6173912c2a689e41db4c40096061
Author: Raymond Toy <toy.raymond at gmail.com>
Date: Fri Apr 20 09:34:48 2012 -0700
UTF-16-BE and UTF-16-LE external formats were returning the incorrect
number of octets for surrogate pairs. Two was returned instead of
four.
* src/pcl/simple-streams/external-formats/utf-16-be.lisp:
* src/pcl/simple-streams/external-formats/utf-16-le.lisp:
* Return correct number of octets
* src/general-info/release-20d.txt
* Update.
diff --git a/src/general-info/release-20d.txt b/src/general-info/release-20d.txt
index 6bd52d5..fe9baaf 100644
--- a/src/general-info/release-20d.txt
+++ b/src/general-info/release-20d.txt
@@ -61,6 +61,9 @@ New in this release:
double-float multiplier from being used when sse3 is available.
* External format for UTF-32 was generating an error when
converting octets to a string.
+ * The UTF-16-BE and UTF-16-LE external formats were returning the
+ incorrect number of octets when surrogates pairs were decoded.
+ This confuses the stream buffering code.
* Trac Tickets:
* #50: Print/read error with make-pathname.
diff --git a/src/pcl/simple-streams/external-formats/utf-16-be.lisp b/src/pcl/simple-streams/external-formats/utf-16-be.lisp
index e2780ea..948a24e 100644
--- a/src/pcl/simple-streams/external-formats/utf-16-be.lisp
+++ b/src/pcl/simple-streams/external-formats/utf-16-be.lisp
@@ -21,10 +21,11 @@ By default, illegal inputs and illegal outputs are replaced by the
Unicode replacement character.")
()
- (octets-to-code (state input unput error c1 c2 code next)
+ (octets-to-code (state input unput error c1 c2 code wd next)
`(let* ((,c1 ,input)
(,c2 ,input)
- (,code (+ (* 256 ,c1) ,c2)))
+ (,code (+ (* 256 ,c1) ,c2))
+ (, wd 2))
(declare (type lisp:codepoint ,code))
(cond ((lisp::surrogatep ,code :low)
;; Got low surrogate. Combine with the state (high
@@ -55,7 +56,8 @@ Unicode replacement character.")
;; unput 2 so it'll be read as another character
;; next time around?
(if (lisp::surrogatep ,next :low)
- (setq ,code (+ (ash (- ,code #xD800) 10) ,next #x2400))
+ (setq ,code (+ (ash (- ,code #xD800) 10) ,next #x2400)
+ ,wd 4)
(setf ,code
(if ,error
(locally
@@ -74,7 +76,7 @@ Unicode replacement character.")
(funcall ,error "BOM is not valid within a UTF-16 stream" ,code 2))
+replacement-character-code+)))
(t (setf ,state nil)))
- (values ,code 2)))
+ (values ,code ,wd)))
(code-to-octets (code state output error c c1 c2)
`(flet ((output (code)
(,output (ldb (byte 8 8) code))
diff --git a/src/pcl/simple-streams/external-formats/utf-16-le.lisp b/src/pcl/simple-streams/external-formats/utf-16-le.lisp
index b7faa28..6fb4b65 100644
--- a/src/pcl/simple-streams/external-formats/utf-16-le.lisp
+++ b/src/pcl/simple-streams/external-formats/utf-16-le.lisp
@@ -22,10 +22,11 @@ By default, illegal inputs and illegal outputs are replaced by the
Unicode replacement character.")
()
- (octets-to-code (state input unput error c1 c2 code next)
+ (octets-to-code (state input unput error c1 c2 code wd next)
`(let* ((,c1 ,input)
(,c2 ,input)
- (,code (+ (* 256 ,c2) ,c1)))
+ (,code (+ (* 256 ,c2) ,c1))
+ (,wd 2))
(declare (type lisp:codepoint ,code))
(cond ((lisp::surrogatep ,code :low)
;; If possible combine this low surrogate with the
@@ -56,14 +57,15 @@ Unicode replacement character.")
;; unput 2 so it'll be read as another character
;; next time around?
(if (lisp::surrogatep ,next :low)
- (setq ,code (+ (ash (- ,code #xD800) 10) ,next #x2400))
+ (setq ,code (+ (ash (- ,code #xD800) 10) ,next #x2400)
+ ,wd 4)
(setq ,code
(if ,error
(locally
;; No warnings about fdefinition
(declare (optimize (ext:inhibit-warnings 3)))
(funcall ,error "High surrogate followed by #x~4,'0X ~
- instead of low surrogate" ,next 2))
+ instead of low surrogate" ,next ,wd))
+replacement-character-code+)))))
((= ,code #xFFFE)
;; replace with REPLACEMENT CHARACTER.
@@ -75,7 +77,7 @@ Unicode replacement character.")
(funcall ,error "BOM is not valid within a UTF-16 stream" ,code 2))
+replacement-character-code+)))
(t (setf ,state nil)))
- (values ,code 2)))
+ (values ,code ,wd)))
(code-to-octets (code state output error c c1 c2)
`(flet ((output (code)
(,output (ldb (byte 8 0) code))
-----------------------------------------------------------------------
Summary of changes:
src/code/stream.lisp | 1 +
src/general-info/release-20d.txt | 3 +++
.../simple-streams/external-formats/utf-16-be.lisp | 10 ++++++----
.../simple-streams/external-formats/utf-16-le.lisp | 12 +++++++-----
4 files changed, 17 insertions(+), 9 deletions(-)
hooks/post-receive
--
CMU Common Lisp
More information about the cmucl-commit
mailing list