CMUCL commit: RELEASE-20B-BRANCH src/code (4 files)

Sun Aug 15 17:07:51 CEST 2010

Date: Sunday, August 15, 2010 @ 11:07:51
  Author: rtoy
    Path: /project/cmucl/cvsroot/src/code
     Tag: RELEASE-20B-BRANCH

Modified: extfmts.lisp fd-stream.lisp stream.lisp struct.lisp

Merge fix from HEAD to fix trac #36: file-position wrong.


----------------+
 extfmts.lisp   |   71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fd-stream.lisp |   56 ++++++++++++++++++++++++++++++-------------
 stream.lisp    |   20 ++++++++-------
 struct.lisp    |   16 +++++++++---
 4 files changed, 132 insertions(+), 31 deletions(-)


Index: src/code/extfmts.lisp
diff -u src/code/extfmts.lisp:1.35.4.2 src/code/extfmts.lisp:1.35.4.3

--- src/code/extfmts.lisp:1.35.4.2	Sat Aug 14 19:51:08 2010
+++ src/code/extfmts.lisp	Sun Aug 15 11:07:51 2010
@@ -5,7 +5,7 @@
 ;;; domain.
 ;;; 
 (ext:file-comment
- "$Header: /project/cmucl/cvsroot/src/code/extfmts.lisp,v 1.35.4.2 2010-08-14 23:51:08 rtoy Exp $")
+ "$Header: /project/cmucl/cvsroot/src/code/extfmts.lisp,v 1.35.4.3 2010-08-15 15:07:51 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -920,6 +920,75 @@
 		 error)
       (values (if stringp string (lisp::shrink-vector string pos)) pos last-octet new-state))))
 
+
+(def-ef-macro ef-octets-to-string-counted (extfmt lisp::lisp +ef-max+ +ef-os+)
+  `(lambda (octets ptr end state ocount string s-start s-end error
+	    &aux (pos s-start) (last-octet 0))
+     (declare (optimize (speed 3) (safety 0) #|(space 0) (debug 0)|#)
+	      (type (simple-array (unsigned-byte 8) (*)) octets ocount)
+	      (type kernel:index pos end last-octet s-start s-end)
+	      (type (integer -1 (#.array-dimension-limit)) ptr)
+	      (type simple-string string)
+	      (ignorable state))
+     (catch 'end-of-octets
+       (loop for k of-type fixnum from 0 
+	  while (< pos s-end)
+	  do (setf (schar string pos)
+		   (octets-to-char ,extfmt state (aref ocount k)
+				   (if (>= ptr end)
+				       (throw 'end-of-octets nil)
+				       (aref octets (incf ptr)))
+				   (lambda (n) (decf ptr n))
+				   error))
+	  (incf pos)
+	  (incf last-octet (aref ocount k))))
+     (values string pos last-octet state)))
+
+;; Like OCTETS-TO-STRING, but we take an extra argument which is an
+;; array which will contain the number of octets read for each
+;; character placed in the output string.
+(defun octets-to-string-counted (octets ocount
+				 &key (start 0) end (external-format :default)
+				 (string nil stringp)
+				 (s-start 0) (s-end nil s-end-p)
+				 (state nil)
+				 error)
+  "Octets-to-string converts an array of octets in Octets to a string
+  according to the specified External-format.  The array of octets is
+  bounded by Start (defaulting ot 0) and End (defaulting to the end of
+  the array.  If String is not given, a new string is created.  If
+  String is given, the converted octets are stored in String, starting
+  at S-Start (defaulting to the 0) and ending at S-End (defaulting to
+  the length of String).  If the string is not large enough to hold
+  all of characters, then some octets will not be converted.  A State
+  may also be specified; this is used as the state of the external
+  format.
+
+  In Ocount, the number of octets read for each character in the
+  string is saved
+
+  Four values are returned: the string, the number of characters read,
+  the number of octets actually consumed and the new state of the
+  external format."
+  (declare (type (simple-array (unsigned-byte 8) (*)) octets ocount)
+	   (type kernel:index start s-start)
+	   (type (or kernel:index null) end)
+	   (type (or simple-string null) string))
+  (let ((s-end (if s-end-p
+		   s-end
+		   (if stringp
+		       (length string)
+		       (length octets)))))
+    (multiple-value-bind (string pos last-octet new-state)
+	(funcall (ef-octets-to-string-counted external-format)
+		 octets (1- start) (1- (or end (length octets)))
+		 state
+		 ocount
+		 (or string (make-string (length octets)))
+		 s-start s-end
+		 error)
+      (values (if stringp string (lisp::shrink-vector string pos)) pos last-octet new-state))))
+
 
 
 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
Index: src/code/fd-stream.lisp
diff -u src/code/fd-stream.lisp:1.114 src/code/fd-stream.lisp:1.114.2.1
--- src/code/fd-stream.lisp:1.114	Tue Jul 20 18:53:11 2010
+++ src/code/fd-stream.lisp	Sun Aug 15 11:07:51 2010
@@ -5,7 +5,7 @@
 ;;; Carnegie Mellon University, and has been placed in the public domain.
 ;;;
 (ext:file-comment
-  "$Header: /project/cmucl/cvsroot/src/code/fd-stream.lisp,v 1.114 2010-07-20 22:53:11 rtoy Exp $")
+  "$Header: /project/cmucl/cvsroot/src/code/fd-stream.lisp,v 1.114.2.1 2010-08-15 15:07:51 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -1450,6 +1450,8 @@
 		;; FAST-READ-CHAR.)
 		(setf (lisp-stream-string-buffer stream)
 		      (make-string (1+ in-buffer-length)))
+		(setf (fd-stream-octet-count stream)
+		      (make-array in-buffer-length :element-type '(unsigned-byte 8)))
 		(setf (lisp-stream-string-buffer-len stream) 0)
 		(setf (lisp-stream-string-index stream) 0)))))
 	(setf input-size size)
@@ -1706,13 +1708,18 @@
 		     ;; The string buffer contains Lisp characters,
 		     ;; not octets!  To figure out how many octets
 		     ;; have not been already supplied, we need to
-		     ;; convert them back to the encoded format and
-		     ;; count the number of octets.
-		     (decf posn
-			   (length (string-encode (fd-stream-string-buffer stream)
-						  (fd-stream-external-format stream)
-						  (fd-stream-string-index stream)
-						  (fd-stream-string-buffer-len stream))))
+		     ;; count how many octets were consumed for all
+		     ;; the characters in the string bbuffer that have
+		     ;; not been supplied.
+		     (let ((ocount (fd-stream-octet-count stream)))
+		       (when ocount
+			 ;; Note: string-index starts at 1 (because
+			 ;; index 0 is for the unread-char), but
+			 ;; octet-count doesn't use that.  Hence,
+			 ;; subtract one from string-index.
+			 (loop for k of-type fixnum from (1- (fd-stream-string-index stream))
+			    below (fd-stream-string-buffer-len stream)
+			    do (decf posn (aref ocount k)))))
 		     (decf posn (- (fd-stream-ibuf-tail stream)
 				   (fd-stream-ibuf-head stream))))
 		 (when (fd-stream-unread stream) ;;@@
@@ -1841,11 +1848,18 @@
 			  (d (cond ((characterp decoding-error)
 				    (constantly (char-code decoding-error)))
 				   ((eq t decoding-error)
+				    #+unicode
 				    #'(lambda (&rest args)
 					(apply 'cerror
 					       (intl:gettext "Use Unicode replacement character instead")
 					       args)
-					stream:+replacement-character-code+))
+					stream:+replacement-character-code+)
+				    #-unicode
+				    #'(lambda (&rest args)
+					(apply 'cerror
+					       (intl:gettext "Use question mark character instead")
+					       args)
+					#\?))
 				   (t
 				    decoding-error))))
 		      (%make-fd-stream :fd fd
@@ -2201,15 +2215,23 @@
    :if-does-not-exist - one of :error, :create or nil
    :external-format - an external format name
    :decoding-error - How to handle decoding errors from the external format.
-                       Should be a symbol or function of 3 arguments.  If it
-                       returns, it should return a code point to use as the
-                       replacment.  NIL means use the default replacement scheme
-                       specified by the external format.  The function arguments
-                       are a format message string, the offending octet, and the
-                       number of octets read in the current encoding.   
+                       If a character, then that character is used as
+                       the replacment character for all errors.  If T,
+                       then a continuable error is signaled.  If
+                       continued, the Unicode replacement character is
+                       used.  Otherwise, it should be a symbol or
+                       function of 3 arguments.  If it returns, it
+                       should return a code point to use as the
+                       replacment.  The function arguments are a
+                       format message string, the offending octet, and
+                       the number of octets read in the current
+                       encoding.
    :encoding-error - Like :decoding-error, but for errors when encoding the
-                       stream.  The function arguments are a format message
-                       string and the incorrect codepoint.
+                       stream.  If a character, that character is used
+                       as the replacment code point.  Otherwise, it
+                       should be a symbol or function oof two
+                       arguments: a format message string and the
+                       incorrect codepoint.
 
   See the manual for details."
   (declare (ignore element-type external-format input-handle output-handle
Index: src/code/stream.lisp
diff -u src/code/stream.lisp:1.94.4.1 src/code/stream.lisp:1.94.4.2
--- src/code/stream.lisp:1.94.4.1	Mon Aug  9 18:46:42 2010
+++ src/code/stream.lisp	Sun Aug 15 11:07:51 2010
@@ -5,7 +5,7 @@
 ;;; Carnegie Mellon University, and has been placed in the public domain.
 ;;;
 (ext:file-comment
-  "$Header: /project/cmucl/cvsroot/src/code/stream.lisp,v 1.94.4.1 2010-08-09 22:46:42 rtoy Exp $")
+  "$Header: /project/cmucl/cvsroot/src/code/stream.lisp,v 1.94.4.2 2010-08-15 15:07:51 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -802,14 +802,16 @@
 	       ;; Convert all the octets, including the ones that we
 	       ;; haven't processed yet and the ones we just read in.
 	       (multiple-value-bind (s char-count octet-count new-state)
-		   (octets-to-string ibuf
-				     :start 0
-				     :end (fd-stream-in-length stream)
-				     :state (fd-stream-oc-state stream)
-				     :string sbuf
-				     :s-start 1
-				     :external-format (fd-stream-external-format stream)
-				     :error (fd-stream-octets-to-char-error stream))
+		   (stream::octets-to-string-counted
+		    ibuf
+		    (fd-stream-octet-count stream)
+		    :start 0
+		    :end (fd-stream-in-length stream)
+		    :state (fd-stream-oc-state stream)
+		    :string sbuf
+		    :s-start 1
+		    :external-format (fd-stream-external-format stream)
+		    :error (fd-stream-octets-to-char-error stream))
 		 (declare (ignore s))
 		 (setf (fd-stream-oc-state stream) new-state)
 		 (setf (lisp-stream-string-buffer-len stream) char-count)
Index: src/code/struct.lisp
diff -u src/code/struct.lisp:1.24 src/code/struct.lisp:1.24.4.1
--- src/code/struct.lisp:1.24	Sun Apr 18 22:18:04 2010
+++ src/code/struct.lisp	Sun Aug 15 11:07:51 2010
@@ -5,7 +5,7 @@
 ;;; Carnegie Mellon University, and has been placed in the public domain.
 ;;;
 (ext:file-comment
-  "$Header: /project/cmucl/cvsroot/src/code/struct.lisp,v 1.24 2010-04-19 02:18:04 rtoy Exp $")
+  "$Header: /project/cmucl/cvsroot/src/code/struct.lisp,v 1.24.4.1 2010-08-15 15:07:51 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -41,11 +41,13 @@
   (misc #'do-nothing :type function)		; Less used methods
   ;;
   ;; A string to hold characters that have been converted from
-  ;; in-buffer.
+  ;; in-buffer.  The very first character is for unreading.  New
+  ;; characters are stored starting at index 1.
   #+unicode
   (string-buffer nil :type (or null simple-string))
   ;;
-  ;; Index into string-buffer where the next character should be read from
+  ;; Index into string-buffer where the next character should be read
+  ;; from.
   #+unicode
   (string-index 0 :type index)
   ;;
@@ -53,7 +55,13 @@
   ;; string-buffer, but the number of characters in the buffer, since
   ;; many octets may be consumed to produce one character.)
   #+unicode
-  (string-buffer-len 0 :type index))
+  (string-buffer-len 0 :type index)
+  ;;
+  ;; An array holding the number of octets consumed for each character
+  ;; in string-buffer.  This is basically unused, except by
+  ;; FILE-POSITION so that we can get the correct file position.
+  #+unicode
+  (octet-count nil :type (or null (simple-array (unsigned-byte 8) (*)))))
 
 (declaim (inline streamp))
 (defun streamp (stream)