CMUCL commit: src/code (4 files)

Raymond Toy rtoy at common-lisp.net
Sun Aug 15 14:04:44 CEST 2010


    Date: Sunday, August 15, 2010 @ 08:04:44
  Author: rtoy
    Path: /project/cmucl/cvsroot/src/code

Modified: extfmts.lisp fd-stream.lisp stream.lisp struct.lisp

Fix file-position bug in trac #36.  We add an array to keep track of
the octets consumed for each character.  This array is used to figure
out the file position.  Some tests comparing this scheme indicates a
very small slowdown of about 1%, so this seems not to hurt.

Use a cross-compile using the 2010-07 snapshot to build this.  (Same
procedure as used to build the 20b-pre1 release.)

struct.lisp:
o Add new slot OCTET-COUNT to LISP-STREAM to hold the array of octets
  per character.

extfmts.lisp:
o Add OCTETS-TO-STRING-COUNTED, which is like OCTETS-TO-STRING, except
  we need an array in which to store the number of octets consumed for
  each character processed.

fd-stream.lisp:
o Create the octet-count array creating the lisp stream string buffer.
o In FD-STREAM-FILE-POSITION, use the octet count to count the number
  of octets that have been read but not yet returned to the user.

stream.lisp:
o Use OCTETS-TO-STRING-COUNTED instead of OCTETS-TO-STRING so we keep
  track of octet length of each character processed.


----------------+
 extfmts.lisp   |   71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fd-stream.lisp |   23 +++++++++++------
 stream.lisp    |   20 ++++++++-------
 struct.lisp    |   16 +++++++++---
 4 files changed, 108 insertions(+), 22 deletions(-)


Index: src/code/extfmts.lisp
diff -u src/code/extfmts.lisp:1.37 src/code/extfmts.lisp:1.38
--- src/code/extfmts.lisp:1.37	Sat Aug 14 19:18:03 2010
+++ src/code/extfmts.lisp	Sun Aug 15 08:04:43 2010
@@ -5,7 +5,7 @@
 ;;; domain.
 ;;; 
 (ext:file-comment
- "$Header: /project/cmucl/cvsroot/src/code/extfmts.lisp,v 1.37 2010-08-14 23:18:03 rtoy Exp $")
+ "$Header: /project/cmucl/cvsroot/src/code/extfmts.lisp,v 1.38 2010-08-15 12:04:43 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -920,6 +920,75 @@
 		 error)
       (values (if stringp string (lisp::shrink-vector string pos)) pos last-octet new-state))))
 
+
+(def-ef-macro ef-octets-to-string-counted (extfmt lisp::lisp +ef-max+ +ef-os+)
+  `(lambda (octets ptr end state ocount string s-start s-end error
+	    &aux (pos s-start) (last-octet 0))
+     (declare (optimize (speed 3) (safety 0) #|(space 0) (debug 0)|#)
+	      (type (simple-array (unsigned-byte 8) (*)) octets ocount)
+	      (type kernel:index pos end last-octet s-start s-end)
+	      (type (integer -1 (#.array-dimension-limit)) ptr)
+	      (type simple-string string)
+	      (ignorable state))
+     (catch 'end-of-octets
+       (loop for k of-type fixnum from 0 
+	  while (< pos s-end)
+	  do (setf (schar string pos)
+		   (octets-to-char ,extfmt state (aref ocount k)
+				   (if (>= ptr end)
+				       (throw 'end-of-octets nil)
+				       (aref octets (incf ptr)))
+				   (lambda (n) (decf ptr n))
+				   error))
+	  (incf pos)
+	  (incf last-octet (aref ocount k))))
+     (values string pos last-octet state)))
+
+;; Like OCTETS-TO-STRING, but we take an extra argument which is an
+;; array which will contain the number of octets read for each
+;; character placed in the output string.
+(defun octets-to-string-counted (octets ocount
+				 &key (start 0) end (external-format :default)
+				 (string nil stringp)
+				 (s-start 0) (s-end nil s-end-p)
+				 (state nil)
+				 error)
+  "Octets-to-string converts an array of octets in Octets to a string
+  according to the specified External-format.  The array of octets is
+  bounded by Start (defaulting ot 0) and End (defaulting to the end of
+  the array.  If String is not given, a new string is created.  If
+  String is given, the converted octets are stored in String, starting
+  at S-Start (defaulting to the 0) and ending at S-End (defaulting to
+  the length of String).  If the string is not large enough to hold
+  all of characters, then some octets will not be converted.  A State
+  may also be specified; this is used as the state of the external
+  format.
+
+  In Ocount, the number of octets read for each character in the
+  string is saved
+
+  Four values are returned: the string, the number of characters read,
+  the number of octets actually consumed and the new state of the
+  external format."
+  (declare (type (simple-array (unsigned-byte 8) (*)) octets ocount)
+	   (type kernel:index start s-start)
+	   (type (or kernel:index null) end)
+	   (type (or simple-string null) string))
+  (let ((s-end (if s-end-p
+		   s-end
+		   (if stringp
+		       (length string)
+		       (length octets)))))
+    (multiple-value-bind (string pos last-octet new-state)
+	(funcall (ef-octets-to-string-counted external-format)
+		 octets (1- start) (1- (or end (length octets)))
+		 state
+		 ocount
+		 (or string (make-string (length octets)))
+		 s-start s-end
+		 error)
+      (values (if stringp string (lisp::shrink-vector string pos)) pos last-octet new-state))))
+
 
 
 (def-ef-macro ef-encode (extfmt lisp::lisp +ef-max+ +ef-en+)
Index: src/code/fd-stream.lisp
diff -u src/code/fd-stream.lisp:1.115 src/code/fd-stream.lisp:1.116
--- src/code/fd-stream.lisp:1.115	Tue Aug  3 23:37:51 2010
+++ src/code/fd-stream.lisp	Sun Aug 15 08:04:43 2010
@@ -5,7 +5,7 @@
 ;;; Carnegie Mellon University, and has been placed in the public domain.
 ;;;
 (ext:file-comment
-  "$Header: /project/cmucl/cvsroot/src/code/fd-stream.lisp,v 1.115 2010-08-04 03:37:51 rtoy Exp $")
+  "$Header: /project/cmucl/cvsroot/src/code/fd-stream.lisp,v 1.116 2010-08-15 12:04:43 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -1450,6 +1450,8 @@
 		;; FAST-READ-CHAR.)
 		(setf (lisp-stream-string-buffer stream)
 		      (make-string (1+ in-buffer-length)))
+		(setf (fd-stream-octet-count stream)
+		      (make-array in-buffer-length :element-type '(unsigned-byte 8)))
 		(setf (lisp-stream-string-buffer-len stream) 0)
 		(setf (lisp-stream-string-index stream) 0)))))
 	(setf input-size size)
@@ -1706,13 +1708,18 @@
 		     ;; The string buffer contains Lisp characters,
 		     ;; not octets!  To figure out how many octets
 		     ;; have not been already supplied, we need to
-		     ;; convert them back to the encoded format and
-		     ;; count the number of octets.
-		     (decf posn
-			   (length (string-encode (fd-stream-string-buffer stream)
-						  (fd-stream-external-format stream)
-						  (fd-stream-string-index stream)
-						  (fd-stream-string-buffer-len stream))))
+		     ;; count how many octets were consumed for all
+		     ;; the characters in the string bbuffer that have
+		     ;; not been supplied.
+		     (let ((ocount (fd-stream-octet-count stream)))
+		       (when ocount
+			 ;; Note: string-index starts at 1 (because
+			 ;; index 0 is for the unread-char), but
+			 ;; octet-count doesn't use that.  Hence,
+			 ;; subtract one from string-index.
+			 (loop for k of-type fixnum from (1- (fd-stream-string-index stream))
+			    below (fd-stream-string-buffer-len stream)
+			    do (decf posn (aref ocount k)))))
 		     (decf posn (- (fd-stream-ibuf-tail stream)
 				   (fd-stream-ibuf-head stream))))
 		 (when (fd-stream-unread stream) ;;@@
Index: src/code/stream.lisp
diff -u src/code/stream.lisp:1.95 src/code/stream.lisp:1.96
--- src/code/stream.lisp:1.95	Mon Aug  9 18:45:14 2010
+++ src/code/stream.lisp	Sun Aug 15 08:04:43 2010
@@ -5,7 +5,7 @@
 ;;; Carnegie Mellon University, and has been placed in the public domain.
 ;;;
 (ext:file-comment
-  "$Header: /project/cmucl/cvsroot/src/code/stream.lisp,v 1.95 2010-08-09 22:45:14 rtoy Exp $")
+  "$Header: /project/cmucl/cvsroot/src/code/stream.lisp,v 1.96 2010-08-15 12:04:43 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -802,14 +802,16 @@
 	       ;; Convert all the octets, including the ones that we
 	       ;; haven't processed yet and the ones we just read in.
 	       (multiple-value-bind (s char-count octet-count new-state)
-		   (octets-to-string ibuf
-				     :start 0
-				     :end (fd-stream-in-length stream)
-				     :state (fd-stream-oc-state stream)
-				     :string sbuf
-				     :s-start 1
-				     :external-format (fd-stream-external-format stream)
-				     :error (fd-stream-octets-to-char-error stream))
+		   (stream::octets-to-string-counted
+		    ibuf
+		    (fd-stream-octet-count stream)
+		    :start 0
+		    :end (fd-stream-in-length stream)
+		    :state (fd-stream-oc-state stream)
+		    :string sbuf
+		    :s-start 1
+		    :external-format (fd-stream-external-format stream)
+		    :error (fd-stream-octets-to-char-error stream))
 		 (declare (ignore s))
 		 (setf (fd-stream-oc-state stream) new-state)
 		 (setf (lisp-stream-string-buffer-len stream) char-count)
Index: src/code/struct.lisp
diff -u src/code/struct.lisp:1.24 src/code/struct.lisp:1.25
--- src/code/struct.lisp:1.24	Sun Apr 18 22:18:04 2010
+++ src/code/struct.lisp	Sun Aug 15 08:04:44 2010
@@ -5,7 +5,7 @@
 ;;; Carnegie Mellon University, and has been placed in the public domain.
 ;;;
 (ext:file-comment
-  "$Header: /project/cmucl/cvsroot/src/code/struct.lisp,v 1.24 2010-04-19 02:18:04 rtoy Exp $")
+  "$Header: /project/cmucl/cvsroot/src/code/struct.lisp,v 1.25 2010-08-15 12:04:44 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -41,11 +41,13 @@
   (misc #'do-nothing :type function)		; Less used methods
   ;;
   ;; A string to hold characters that have been converted from
-  ;; in-buffer.
+  ;; in-buffer.  The very first character is for unreading.  New
+  ;; characters are stored starting at index 1.
   #+unicode
   (string-buffer nil :type (or null simple-string))
   ;;
-  ;; Index into string-buffer where the next character should be read from
+  ;; Index into string-buffer where the next character should be read
+  ;; from.
   #+unicode
   (string-index 0 :type index)
   ;;
@@ -53,7 +55,13 @@
   ;; string-buffer, but the number of characters in the buffer, since
   ;; many octets may be consumed to produce one character.)
   #+unicode
-  (string-buffer-len 0 :type index))
+  (string-buffer-len 0 :type index)
+  ;;
+  ;; An array holding the number of octets consumed for each character
+  ;; in string-buffer.  This is basically unused, except by
+  ;; FILE-POSITION so that we can get the correct file position.
+  #+unicode
+  (octet-count nil :type (or null (simple-array (unsigned-byte 8) (*)))))
 
 (declaim (inline streamp))
 (defun streamp (stream)



More information about the cmucl-commit mailing list