[cmucl-commit] [git] CMU Common Lisp branch file-attribute created. snapshot-2012-06-4-g81f65db

Raymond Toy rtoy at common-lisp.net
Tue Jun 26 03:47:33 UTC 2012


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "CMU Common Lisp".

The branch, file-attribute has been created
        at  81f65db452ae1c7c1b8907d10f5bb8bbd4cff37f (commit)

- Log -----------------------------------------------------------------
commit 81f65db452ae1c7c1b8907d10f5bb8bbd4cff37f
Merge: 7ed7451 8a9d1d8
Author: Raymond Toy <toy.raymond at gmail.com>
Date:   Mon Jun 25 20:46:20 2012 -0700

    Merge branch 'master' into file-attribute


commit 7ed745111ae7a478ba867db257a1888d2948b524
Author: Raymond Toy <toy.raymond at gmail.com>
Date:   Thu May 24 21:39:39 2012 -0700

    First cut at :file-attribute external format (mostly from Douglas.)

diff --git a/src/code/exports.lisp b/src/code/exports.lisp
index a46d5bb..d0cfe58 100644
--- a/src/code/exports.lisp
+++ b/src/code/exports.lisp
@@ -1511,6 +1511,7 @@
 		"DESCRIBE-EXTERNAL-FORMAT")
   ;; Unicode
   (:export "STRING-TO-OCTETS" "OCTETS-TO-STRING" "*DEFAULT-EXTERNAL-FORMAT*"
+	   "*DEFAULT-SOURCE-EXTERNAL-FORMAT*"
 	   "DESCRIBE-EXTERNAL-FORMAT"
 	   "LIST-ALL-EXTERNAL-FORMATS"
 	   "STRING-ENCODE" "STRING-DECODE"
diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp
index 45b5847..e854a73 100644
--- a/src/code/fd-stream.lisp
+++ b/src/code/fd-stream.lisp
@@ -1362,6 +1362,169 @@
 
 ;;;; Utility functions (misc routines, etc)
 
+(defvar *stream-encoding-file-attribute-translations*
+  '(;; Emacs specific codings.
+    (:iso8859-1 "latin-1" "latin-1-unix" "iso-latin-1-unix" "iso-8859-1-unix")
+    (:utf-8 "utf-8-unix")
+    (:euc-jp "euc-jp-unix")
+    )
+  "List of coding translations used by 'stream-encoding-file-attribute to map
+  the read file coding into a native external-format.  Each element is a list of
+  a native external-format followed byte a list of coding strings that are to be
+  mapped to this native format.")
+
+;;; stream-encoding-file-attribute  --  Internal
+;;;
+;;; Read the encoding file option from the stream 's which is expected to be a
+;;; character stream with an external-format of :iso8859-1.
+;;;
+(defun stream-encoding-file-attribute (s)
+  (let* ((initial-encoding nil)
+	 (declared-encoding nil)
+	 (buffer (make-array 1024 :element-type '(unsigned-byte 8)))
+	 (available (do ((i 0 (1+ i)))
+			((>= i 1024) i)
+		      (declare (fixnum i))
+		      (let ((ch (read-char s nil nil)))
+			(unless ch (return i))
+			(setf (aref buffer i) (char-code ch))))))
+    (labels ((decode-ascii (start size offset)
+	       (declare (type fixnum start)
+			(type (integer 1 4) size)
+			(type (integer 0 3) offset))
+	       (let ((ascii (make-array 64 :element-type 'character
+					:adjustable t :fill-pointer 0)))
+		 (do ()
+		     ((< available (+ start size)))
+		   (let* ((code (ecase size
+				  (1 (aref buffer start))
+				  (2 (let ((b0 (aref buffer start))
+					   (b1 (aref buffer (1+ start))))
+				       (ecase offset
+					 (0 (logior (ash b1 8) b0))
+					 (1 (logior (ash b0 8) b1)))))
+				  (4
+				   (let ((b0 (aref buffer start))
+					 (b1 (aref buffer (+ start 1)))
+					 (b2 (aref buffer (+ start 2)))
+					 (b3 (aref buffer (+ start 3))))
+				     (ecase offset
+				       (0 (logior (ash b3 24) (ash b2 16) (ash b1 8) b0))
+				       (1 (logior (ash b1 24) (ash b0 16) (ash b3 8) b2))
+				       (2 (logior (ash b2 24) (ash b3 16) (ash b0 8) b1))
+				       (3 (logior (ash b0 24) (ash b1 16) (ash b2 8) b3))))))))
+		     (incf start size)
+		     (let ((ch (if (< 0 code #x80) (code-char code) #\?)))
+		       (vector-push-extend ch ascii))))
+		 ascii))
+	     (parse-file-option (ascii)
+	       ;; Parse the file options.
+	       (let ((found (search "-*-" ascii))
+		     (options nil))
+		 (when found
+		   (block do-file-options
+		     (let* ((start (+ found 3))
+			    (end (search "-*-" ascii :start2 start)))
+		       (unless end
+			 (return-from do-file-options))
+		       (unless (find #\: ascii :start start :end end)
+			 (return-from do-file-options))
+		       (do ((opt-start start (1+ semi)) colon semi)
+			   (nil)
+			 (setf colon (position #\: ascii :start opt-start :end end))
+			 (unless colon
+			   (return-from do-file-options))
+			 (setf semi (or (position #\; ascii :start colon :end end) end))
+			 (let ((option (string-trim '(#\space #\tab)
+						    (subseq ascii opt-start colon)))
+			       (value (string-trim '(#\space #\tab)
+						   (subseq ascii (1+ colon) semi))))
+			   (push (cons option value) options)
+			   (when (= semi end) (return nil)))))))
+		 (setf declared-encoding
+		       (cond ((cdr (assoc "external-format" options :test 'equalp)))
+			     ((cdr (assoc "encoding" options :test 'equalp)))
+			     ((cdr (assoc "coding" options :test 'equalp))))))))
+      (cond ((>= available 4)
+	     (let ((b1 (aref buffer 0))
+		   (b2 (aref buffer 1))
+		   (b3 (aref buffer 2))
+		   (b4 (aref buffer 3)))
+	       (cond ((and (= b1 #x00) (= b2 #x00) (= b3 #xFE) (= b4 #xFF))
+		      (setf initial-encoding :ucs-4be)
+		      (parse-file-option (decode-ascii 4 4 3)))
+		     ((and (= b1 #xff) (= b2 #xfe))
+		      (cond ((and (= b3 #x00) (= b4 #x00))
+			     (setf initial-encoding :ucs-4le)
+			     (parse-file-option (decode-ascii 4 4 0)))
+			    (t
+			     (setf initial-encoding :utf-16le)
+			     (parse-file-option (decode-ascii 2 2 0)))))
+		     ((and (= b1 #x00) (= b2 #x00) (= b3 #xFF) (= b4 #xFE))
+		      (parse-file-option (decode-ascii 4 4 2)))
+		     ((and (= b1 #xfe) (= b2 #xff))
+		      (cond ((and (= b3 #x00) (= b4 #x00))
+			     (parse-file-option (decode-ascii 4 4 1)))
+			    (t
+			     (setf initial-encoding :utf-16be)
+			     (parse-file-option (decode-ascii 2 2 1)))))
+		     ((and (= b1 #xEF) (= b2 #xBB) (= b3 #xBF))
+		      (setf initial-encoding :utf-8))
+		     ((and (> b1 0) (= b2 0) (= b3 0) (= b4 0))
+		      (setf initial-encoding :ucs-4le)
+		      (parse-file-option (decode-ascii 0 4 0)))
+		     ((and (= b1 0) (> b2 0) (= b3 0) (= b4 0))
+		      (parse-file-option (decode-ascii 0 4 1)))
+		     ((and (= b1 0) (= b2 0) (> b3 0) (= b4 0))
+		      (parse-file-option (decode-ascii 0 4 2)))
+		     ((and (= b1 0) (= b2 0) (= b3 0) (> b4 0))
+		      (setf initial-encoding :ucs-4be)
+		      (parse-file-option (decode-ascii 0 4 3)))
+		     ((and (> b1 0) (= b2 0) (> b3 0) (= b4 0))
+		      (setf initial-encoding :utf-16le)
+		      (parse-file-option (decode-ascii 0 2 0)))
+		     ((and (= b1 0) (> b2 0) (= b3 0) (> b4 0))
+		      (setf initial-encoding :utf-16be)
+		      (parse-file-option (decode-ascii 0 2 1)))
+		     ((and (= b1 #x2B) (= b2 #x41)
+			   (or (= b3 #x43) (= b3 #x44)))
+		      (setf initial-encoding :utf-7))
+		     ((and (= b1 #x2F) (= b2 #x2B) (= b3 #x41))
+		      (setf initial-encoding :utf-7))
+		     (t
+		      (parse-file-option (decode-ascii 0 1 0))))))
+	    ((= available 3)
+	     (when (and (= (aref buffer 0) #xEF)
+			(= (aref buffer 1) #xBB)
+			(= (aref buffer 2) #xBF))
+	       (setf initial-encoding :utf-8)))
+	    ((= available 2)
+	     (let ((b1 (aref buffer 0))
+		   (b2 (aref buffer 1)))
+	       (cond ((and (= b1 #xff) (= b2 #xfe))
+		      (setf initial-encoding :utf-16le))
+		     ((and (= b1 #xfe) (= b2 #xff))
+		      (setf initial-encoding :utf-16be)))))))
+    ;;
+    ;;
+    (cond ((and (not initial-encoding) (not declared-encoding))
+	   :default)
+	  (t
+	   (let ((encoding (or declared-encoding initial-encoding)))
+	     (when (stringp encoding)
+	       (setf encoding (string-upcase encoding))
+	       (dolist (translations *stream-encoding-file-attribute-translations*)
+		 (when (member encoding (rest translations) :test 'equalp)
+		   (setf encoding (first translations))
+		   (return))))
+	     (let ((external-format
+		    (cond ((eq encoding :default) :default)
+			  ((stringp encoding)
+			   (intern encoding :keyword))
+			  (t
+			   encoding))))
+	       external-format))))))
+
 ;;; SET-ROUTINES -- internal
 ;;;
 ;;;   Fill in the various routine slots for the given type. Input-p and
@@ -1916,20 +2079,7 @@
 	   (setf (fd-stream-flags stream) #b001))
 	  (t
 	   (setf (fd-stream-flags stream) #b010)))
-
-    ;; FIXME: setting the external format here should be better
-    ;; integrated into set-routines.  We do it before so that
-    ;; set-routines can create an in-buffer if appropriate.  But we
-    ;; need to do it after to put the correct input routines for the
-    ;; external format.
     ;;
-    ;;#-unicode-bootstrap ; fails in stream-reinit otherwise
-    #+(and unicode (not unicode-bootstrap))
-    (%set-fd-stream-external-format stream external-format nil)
-    (set-routines stream element-type input output input-buffer-p
-		  :binary-stream-p binary-stream-p)
-    #+(and unicode (not unicode-bootstrap))
-    (%set-fd-stream-external-format stream external-format nil)
     (when (and auto-close (fboundp 'finalize))
       (finalize stream
 		#'(lambda ()
@@ -1937,6 +2087,46 @@
 		    (format *terminal-io* (intl:gettext "** Closed ~A~%") name)
 		    (when original
 		      (revert-file file original)))))
+    ;;
+    ;; FIXME: setting the external format here should be better
+    ;; integrated into set-routines.  We do it before so that
+    ;; set-routines can create an in-buffer if appropriate.  But we
+    ;; need to do it after to put the correct input routines for the
+    ;; external format.
+    ;;
+    ;;#-unicode-bootstrap ; fails in stream-reinit otherwise
+    #+(and unicode (not unicode-bootstrap))
+    (cond ((and (eq external-format :file-attribute) input)
+	   ;; Read the encoding file option with the external-format set to
+	   ;; :iso8859-1, and then change the external-format if necessary.
+	   #+(and unicode (not unicode-bootstrap))
+	   (%set-fd-stream-external-format stream :iso8859-1 nil)
+	   (set-routines stream element-type input output input-buffer-p
+			 :binary-stream-p binary-stream-p)
+	   #+(and unicode (not unicode-bootstrap))
+	   (%set-fd-stream-external-format stream :iso8859-1 nil)
+	   (let ((encoding (stream-encoding-file-attribute stream)))
+	     (unless (file-position stream :start)
+	       (error (intl:gettext "The ~A external-format requires a file stream.")
+		      external-format))
+	     (unless (member encoding '(:iso8859-1 :iso-8859-1))
+	       (setf (stream-external-format stream) (or encoding :default)))))
+	  ((eq external-format :file-attribute)
+	   ;; Non-input stream, so can not read the file attributes, so use the
+	   ;; :default.
+	   #+(and unicode (not unicode-bootstrap))
+	   (%set-fd-stream-external-format stream :default nil)
+	   (set-routines stream element-type input output input-buffer-p
+			 :binary-stream-p binary-stream-p)
+	   #+(and unicode (not unicode-bootstrap))
+	   (%set-fd-stream-external-format stream :default nil))
+	  (t
+	   #+(and unicode (not unicode-bootstrap))
+	   (%set-fd-stream-external-format stream external-format nil)
+	   (set-routines stream element-type input output input-buffer-p
+			 :binary-stream-p binary-stream-p)
+	   #+(and unicode (not unicode-bootstrap))
+	   (%set-fd-stream-external-format stream external-format nil)))
     stream))
 
 
diff --git a/src/code/load.lisp b/src/code/load.lisp
index 832853b..89f7705 100644
--- a/src/code/load.lisp
+++ b/src/code/load.lisp
@@ -19,7 +19,7 @@
 
 (in-package "EXTENSIONS")
 (export '(*load-if-source-newer* *load-source-types* *load-object-types*
-	  invalid-fasl))
+	  invalid-fasl *default-source-external-format*))
 
 (in-package "SYSTEM")
 (export '(foreign-symbol-address alternate-get-global-address))
@@ -94,6 +94,12 @@
 	     (invalid-fasl-pathname condition)
 	     (invalid-fasl-version condition)
 	     (invalid-fasl-expected-version condition)))))
+
+(defvar *default-source-external-format* :default
+  "The external-format that 'load and 'compile-file use when given an
+  external-format of :default.  The default value is :default which will open
+  the file using the 'ext:*default-external-format*")
+
 
 ;;; LOAD-FRESH-LINE -- internal.
 ;;;
@@ -523,6 +529,10 @@
        defaulting.  Probably only necessary if you have source files with a
        \"fasl\" type. 
 
+   :EXTERNAL-FORMAT
+       The external-format to use when opening the FILENAME. The default is
+       :default which uses the EXT:*DEFAULT-SOURCE-EXTERNAL-FORMAT*.
+
    The variables *LOAD-VERBOSE*, *LOAD-PRINT* and EXT:*LOAD-IF-SOURCE-NEWER*
    determine the defaults for the corresponding keyword arguments.  These
    variables are also bound to the specified argument values, so specifying a
@@ -604,6 +614,8 @@
 	(*load-pathname* pathname))
     (case contents
       (:source
+       (when (eq external-format :default)
+	 (setf external-format *default-source-external-format*))
        (with-open-file (file truename :external-format external-format
 			     :direction :input
 			     :if-does-not-exist if-does-not-exist)
diff --git a/src/compiler/main.lisp b/src/compiler/main.lisp
index 544ccb0..066ff2d 100644
--- a/src/compiler/main.lisp
+++ b/src/compiler/main.lisp
@@ -738,12 +738,12 @@
 				     :write-date (file-write-date x)
 				     :language :lisp))
 		 files)))
-
+    (when (eq external-format :default)
+      (setf external-format *default-source-external-format*))
     (make-source-info :files file-info
 		      :current-file file-info
 		      #+unicode :external-format
-		      #+unicode (stream::ef-name
-				 (stream::find-external-format external-format))
+		      #+unicode external-format
 		      #+unicode :decoding-error
 		      #+unicode decoding-error)))
 
diff --git a/src/i18n/locale/cmucl.pot b/src/i18n/locale/cmucl.pot
index 5b38108..bb807f4 100644
--- a/src/i18n/locale/cmucl.pot
+++ b/src/i18n/locale/cmucl.pot
@@ -9187,6 +9187,16 @@ msgid "Error reading ~S: ~A"
 msgstr ""
 
 #: src/code/fd-stream.lisp
+msgid ""
+"List of coding translations used by 'stream-encoding-file-attribute to map\n"
+"  the read file coding into a native external-format.  Each element is a "
+"list of\n"
+"  a native external-format followed byte a list of coding strings that are "
+"to be\n"
+"  mapped to this native format."
+msgstr ""
+
+#: src/code/fd-stream.lisp
 msgid "Could not find any input routine for ~S"
 msgstr ""
 
@@ -9263,6 +9273,10 @@ msgid "** Closed ~A~%"
 msgstr ""
 
 #: src/code/fd-stream.lisp
+msgid "The ~A external-format requires a file stream."
+msgstr ""
+
+#: src/code/fd-stream.lisp
 msgid ""
 "This is a string that OPEN tacks on the end of a file namestring to produce\n"
 "   a name for the :if-exists :rename-and-delete and :rename options.  Also,\n"
@@ -10064,6 +10078,14 @@ msgid ""
 msgstr ""
 
 #: src/code/load.lisp
+msgid ""
+"The external-format that 'load and 'compile-file use when given an\n"
+"  external-format of :default.  The default value is :default which will "
+"open\n"
+"  the file using the 'ext:*default-external-format*"
+msgstr ""
+
+#: src/code/load.lisp
 msgid "List of free fop tables for the fasloader."
 msgstr ""
 
@@ -10133,6 +10155,10 @@ msgid ""
 "       defaulting.  Probably only necessary if you have source files with a\n"
 "       \"fasl\" type. \n"
 "\n"
+"   :EXTERNAL-FORMAT\n"
+"       The external-format to use when opening the FILENAME. The default is\n"
+"       :default which uses the EXT:*DEFAULT-SOURCE-EXTERNAL-FORMAT*.\n"
+"\n"
 "   The variables *LOAD-VERBOSE*, *LOAD-PRINT* and EXT:*LOAD-IF-SOURCE-NEWER"
 "*\n"
 "   determine the defaults for the corresponding keyword arguments.  These\n"

-----------------------------------------------------------------------


hooks/post-receive
-- 
CMU Common Lisp


More information about the cmucl-commit mailing list