[cmucl-commit] [git] CMU Common Lisp branch master updated. snapshot-2012-05-14-g0653c41

Raymond Toy rtoy at common-lisp.net
Mon May 28 22:49:41 UTC 2012


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "CMU Common Lisp".

The branch, master has been updated
       via  0653c4136d09939153698e149b55f493a42b349a (commit)
       via  4ffd5ed1ea819525878370bc988cd46386b8c1b2 (commit)
       via  7514638ff9ecc8d1f61c425feea45edf125d9e62 (commit)
       via  fbced7f5db441d632c6ceb8eb53f598e0588551b (commit)
       via  38b90775ff86c404a507fda46eda01dc9bb2adab (commit)
       via  4bbfd80236161221c3271734cea742f11b25e334 (commit)
       via  9a2c833786572d84ca98de5e938e5ac4c42b4618 (commit)
       via  164dd1e02ab53a665e561b5e0a2369f19f73c3b0 (commit)
      from  afd3451aeb9bb26e076e235c502c5b1a106081cf (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 0653c4136d09939153698e149b55f493a42b349a
Author: Raymond Toy <toy.raymond at gmail.com>
Date:   Mon May 28 15:49:32 2012 -0700

    Update with new :FILE-ATTRIBUTE external format, contributed by
    Douglas Crosher.

diff --git a/src/general-info/release-20d.txt b/src/general-info/release-20d.txt
index af8619d..f176b8b 100644
--- a/src/general-info/release-20d.txt
+++ b/src/general-info/release-20d.txt
@@ -28,6 +28,10 @@ New in this release:
       double-float) numbers.  Utility functions are provided to set
       and access these packed numbers.
     * Added external format for EUC-KR.
+    * Added new external format, :FILE-ATTRIBUTE, which looks for an
+      emacs mode-line to determine the encoding to use for reading a
+      file.  The end-of-line sequence is also determined from reading
+      the file.
  
   * Changes
     * ASDF2 updated to version 2.21.
@@ -50,6 +54,11 @@ New in this release:
       enabling a trap when the current exception also listed that trap
       caused the exception to be immediately signaled.  This no longer
       happens and now matches how ppc and sparc behave.
+    * The default external-format for COMPILE-FILE and LOAD is now
+      given by *DEFAULT-SOURCE-EXTERNAL-FORMAT*, instead of
+      *DEFAULT-EXTERNAL-FORMAT*.  However, the default value of
+      *DEFAULT-SOURCE-EXTERNAL-FORMAT* is :DEFAULT, which means the
+      value of *DEFAULT-EXTERNAL-FORMAT* will be used.
       
   * ANSI compliance fixes:
     * CMUCL was not printing pathnames like (make-pathname :directory
@@ -82,6 +91,10 @@ New in this release:
     * EXPORT and friends should not EVAL the form when compiling.
       This was probably a leftover from the time when CMUCL did not
       have DEFPACKAGE.  (See ticket:60.)
+    * The debugger was not always opening the file in the correct
+      external format.  It defaulted to using
+      *DEFAULT-EXTERNAL-FORMAT* instead of the format used when
+      compiling the file.
 
   * Trac Tickets:
     * #50: Print/read error with make-pathname.

commit 4ffd5ed1ea819525878370bc988cd46386b8c1b2
Author: Raymond Toy <toy.raymond at gmail.com>
Date:   Mon May 28 09:47:36 2012 -0700

    Remove extra closing parenthesis.

diff --git a/src/tools/hemcom.lisp b/src/tools/hemcom.lisp
index 949f3e0..c841aae 100644
--- a/src/tools/hemcom.lisp
+++ b/src/tools/hemcom.lisp
@@ -118,7 +118,7 @@
    "WINDOW-DISPLAY-END" "WINDOW-DISPLAY-RECENTERING" "WINDOW-DISPLAY-START"
    "WINDOW-FONT" "WINDOW-HEIGHT" "WINDOW-POINT" "WINDOW-WIDTH" "WINDOWP"
    "WITH-INPUT-FROM-REGION" "WITH-MARK" "WITH-OUTPUT-TO-MARK"
-   "WITH-POP-UP-DISPLAY" "WITH-WRITABLE-BUFFER" "WRITE-FILE")))
+   "WITH-POP-UP-DISPLAY" "WITH-WRITABLE-BUFFER" "WRITE-FILE"))
 
 (unless (find-package "HEMLOCK")
   (make-package "HEMLOCK"

commit 7514638ff9ecc8d1f61c425feea45edf125d9e62
Merge: fbced7f afd3451
Author: Raymond Toy <toy.raymond at gmail.com>
Date:   Mon May 28 09:19:17 2012 -0700

    Merge branch 'master' into ext-format-file-attribute


commit fbced7f5db441d632c6ceb8eb53f598e0588551b
Author: Raymond Toy <toy.raymond at gmail.com>
Date:   Sat May 26 11:13:01 2012 -0700

    Fix so this can build on 8-bit cmucl.
    
    o Clean up reader conditionals in MAKE-FD-STREAM.
    o Add dummy %SET-FD-STREAM-EXTERNAL-FORMAT for non-unicode.

diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp
index 5f194a0..062a0d3 100644
--- a/src/code/fd-stream.lisp
+++ b/src/code/fd-stream.lisp
@@ -2136,6 +2136,12 @@
 
 ;;;; Creation routines (MAKE-FD-STREAM and OPEN)
 
+;; The unicode version of this is in fd-stream-extfmt.lisp
+#-(and unicode (not unicode-boootstrap))
+(defun %set-fd-stream-external-format (stream extfmt &optional (updatep t))
+  (declare (ignore stream extfmt updatep))
+  (values))
+
 ;;; MAKE-FD-STREAM -- Public.
 ;;;
 ;;; Returns a FD-STREAM on the given file.
@@ -2246,15 +2252,12 @@
     ;; external format.
     ;;
     ;;#-unicode-bootstrap ; fails in stream-reinit otherwise
-    #+(and unicode (not unicode-bootstrap))
     (cond ((and (eq external-format :file-attribute) input)
 	   ;; Read the encoding file option with the external-format set to
 	   ;; :iso8859-1, and then change the external-format if necessary.
-	   #+(and unicode (not unicode-bootstrap))
 	   (%set-fd-stream-external-format stream :iso8859-1 nil)
 	   (set-routines stream element-type input output input-buffer-p
 			 :binary-stream-p binary-stream-p)
-	   #+(and unicode (not unicode-bootstrap))
 	   (%set-fd-stream-external-format stream :iso8859-1 nil)
 	   (multiple-value-bind (encoding eol-mode)
 	       (stream-encoding-file-attribute stream)
@@ -2271,18 +2274,14 @@
 	  ((eq external-format :file-attribute)
 	   ;; Non-input stream, so can not read the file attributes, so use the
 	   ;; :default.
-	   #+(and unicode (not unicode-bootstrap))
 	   (%set-fd-stream-external-format stream :default nil)
 	   (set-routines stream element-type input output input-buffer-p
 			 :binary-stream-p binary-stream-p)
-	   #+(and unicode (not unicode-bootstrap))
 	   (%set-fd-stream-external-format stream :default nil))
 	  (t
-	   #+(and unicode (not unicode-bootstrap))
 	   (%set-fd-stream-external-format stream external-format nil)
 	   (set-routines stream element-type input output input-buffer-p
 			 :binary-stream-p binary-stream-p)
-	   #+(and unicode (not unicode-bootstrap))
 	   (%set-fd-stream-external-format stream external-format nil)))
     stream))
 

commit 38b90775ff86c404a507fda46eda01dc9bb2adab
Author: Raymond Toy <toy.raymond at gmail.com>
Date:   Sat May 26 09:13:34 2012 -0700

    Use concatenate instead of format because format isn't available when
    compiling.

diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp
index 85aa0b8..5f194a0 100644
--- a/src/code/fd-stream.lisp
+++ b/src/code/fd-stream.lisp
@@ -1367,7 +1367,8 @@
 	   (flet ((add-suffix (list suffix)
 		    (let ((list* nil))
 		      (dolist (coding list)
-			(push (format nil "~A-~A" coding suffix) list*))
+			(push (concatenate 'simple-string coding "-" suffix)
+			      list*))
 		      (nreverse list*))))
 	     `((,target , at list)
 	       ((,target :unix) ,@(add-suffix list "unix"))

commit 4bbfd80236161221c3271734cea742f11b25e334
Author: Raymond Toy <toy.raymond at gmail.com>
Date:   Sat May 26 08:24:55 2012 -0700

    Debugger needs to open file with the appropriate external format.
    Patch from Douglas.

diff --git a/src/code/debug-int.lisp b/src/code/debug-int.lisp
index a3a3dfb..014382e 100644
--- a/src/code/debug-int.lisp
+++ b/src/code/debug-int.lisp
@@ -4943,8 +4943,10 @@ The result is a symbol or nil if the routine cannot be found."
 		 (aref (or (debug-source-start-positions d-source)
 			   (error (intl:gettext "Cannot set breakpoints for editor when ~
 				   there is no start positions map.")))
-		       local-tlf-offset)))
-	   (with-open-file (f name)
+		       local-tlf-offset))
+		(external-format (or (c::debug-source-info d-source)
+				     ext:*default-source-external-format*)))
+	   (with-open-file (f name :external-format external-format)
 	     (cond
 	      ((= (debug-source-created d-source) (file-write-date name))
 	       (file-position f char-offset))
diff --git a/src/code/debug.lisp b/src/code/debug.lisp
index 825ed0b..62bf986 100644
--- a/src/code/debug.lisp
+++ b/src/code/debug.lisp
@@ -1486,7 +1486,8 @@ See the CMU Common Lisp User's Manual for more information.
 	(when *cached-source-stream* (close *cached-source-stream*))
 	(setq *cached-source-stream*
 	      (open name :if-does-not-exist nil
-		    :external-format (or (c::debug-source-info d-source) :default)))
+		    :external-format (or (c::debug-source-info d-source)
+					 ext:*default-source-external-format*)))
 	(unless *cached-source-stream*
 	  (error (intl:gettext "Source file no longer exists:~%  ~A.") (namestring name)))
 	(format t (intl:gettext "~%; File: ~A~%") (namestring name)))

commit 9a2c833786572d84ca98de5e938e5ac4c42b4618
Author: Raymond Toy <toy.raymond at gmail.com>
Date:   Sat May 26 08:24:22 2012 -0700

    Add all the emacs format encodings.  From Douglas.

diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp
index 820c22d..85aa0b8 100644
--- a/src/code/fd-stream.lisp
+++ b/src/code/fd-stream.lisp
@@ -1363,20 +1363,101 @@
 ;;;; Utility functions (misc routines, etc)
 
 (defparameter *stream-encoding-file-attribute-translations*
-  '(;; Emacs specific codings.
-    ((:iso-8859-1 :unix)
-     "latin-1" "latin-1-unix" "iso-latin-1-unix" "iso-8859-1-unix")
-    ((:iso-8859-1 :dos)
-     "latin-1" "latin-1-dos" "iso-latin-1-dos" "iso-8859-1-dos")
-    ((:iso-8859-1 :max)
-     "latin-1" "latin-1-mac" "iso-latin-1-mac" "iso-8859-1-mac")
-    ((:utf-8 :unix) "utf-8-unix")
-    ((:utf-8 :dos) "utf-8-dos")
-    ((:utf-8 :mac) "utf-8-mac")
-    ((:euc-jp :unix) "euc-jp-unix")
-    ((:euc-jp :dos) "euc-jp-dos")
-    ((:euc-jp :mac) "euc-jp-mac")
-    )
+  (flet ((emacs-coding (target &rest list)
+	   (flet ((add-suffix (list suffix)
+		    (let ((list* nil))
+		      (dolist (coding list)
+			(push (format nil "~A-~A" coding suffix) list*))
+		      (nreverse list*))))
+	     `((,target , at list)
+	       ((,target :unix) ,@(add-suffix list "unix"))
+	       ((,target :dos) ,@(add-suffix list "dos"))
+	       ((,target :mac) ,@(add-suffix list "mac"))))))
+    `(;; Emacs specific codings.
+      ,@(emacs-coding :utf-8 "utf-8" "utf-8-with-signature" "mule-utf-8")
+      ,@(emacs-coding :utf-16le "utf-16le" "utf-16-le")
+      ,@(emacs-coding :utf-16be "utf-16be" "utf-16-be")
+      ,@(emacs-coding :utf-16 "utf-16" "utf-16le-with-signature" "utf-16be-with-signature")
+      ,@(emacs-coding :us-ascii "us-ascii" "iso-safe")
+      ,@(emacs-coding :iso-8859-1 "iso-8859-1" "latin-1" "iso-latin-1")
+      ,@(emacs-coding :iso-8859-1 "binary" "no-conversion" "raw-text")
+      ,@(emacs-coding :iso-8859-2 "iso-8859-2" "latin-2" "iso-latin-2")
+      ,@(emacs-coding :iso-8859-3 "iso-8859-3" "latin-3" "iso-latin-3")
+      ,@(emacs-coding :iso-8859-4 "iso-8859-4" "latin-4" "iso-latin-4")
+      ,@(emacs-coding :iso-8859-5 "iso-8859-5" "cyrillic-iso-8bit")
+      ,@(emacs-coding :iso-8859-6 "iso-8859-6")
+      ,@(emacs-coding :iso-8859-7 "iso-8859-7" "greek-iso-8bit")
+      ,@(emacs-coding :iso-8859-8 "iso-8859-8" "hebrew-iso-8bit")
+      ,@(emacs-coding :iso-8859-9 "iso-8859-9" "latin-5" "iso-latin-5")
+      ,@(emacs-coding :iso-8859-10 "iso-8859-10" "latin-6" "iso-latin-6")
+      ,@(emacs-coding :iso-8859-11 "iso-8859-11")
+      ,@(emacs-coding :iso-8859-13 "iso-8859-13" "latin-7" "iso-latin-7")
+      ,@(emacs-coding :iso-8859-14 "iso-8859-14" "latin-8" "iso-latin-8")
+      ,@(emacs-coding :iso-8859-15 "iso-8859-15" "latin-9" "iso-latin-9" "latin-0")
+      ,@(emacs-coding :iso-8859-16 "iso-8859-16" "latin-10" "iso-latin-10")
+      ,@(emacs-coding :cp437 "cp437" "ibm437")
+      ,@(emacs-coding :cp850 "cp850" "ibm850")
+      ,@(emacs-coding :cp852 "cp852" "ibm852")
+      ,@(emacs-coding :cp857 "cp857" "ibm857")
+      ,@(emacs-coding :cp858 "cp858")
+      ,@(emacs-coding :cp860 "cp860" "ibm860")
+      ,@(emacs-coding :cp861 "cp861" "ibm861")
+      ,@(emacs-coding :cp862 "cp862" "ibm862")
+      ,@(emacs-coding :cp863 "cp863" "ibm863")
+      ,@(emacs-coding :cp865 "cp865" "ibm865")
+      ,@(emacs-coding :roman8 "roman8" "hp-roman8")
+      ,@(emacs-coding :macintosh "mac-roman")
+      ,@(emacs-coding :utf-7 "utf-7")
+      ,@(emacs-coding :cp1250 "cp1250" "windows-1250")
+      ,@(emacs-coding :cp1251 "cp1251" "windows-1251")
+      ,@(emacs-coding :cp1252 "cp1252" "windows-1252")
+      ,@(emacs-coding :cp1253 "cp1253" "windows-1253")
+      ,@(emacs-coding :cp1254 "cp1254" "windows-1254")
+      ,@(emacs-coding :cp1255 "cp1255" "windows-1255")
+      ,@(emacs-coding :cp1256 "cp1256" "windows-1256")
+      ,@(emacs-coding :cp1257 "cp1257" "windows-1257")
+      ,@(emacs-coding :cp1258 "cp1258" "windows-1258")
+      ,@(emacs-coding :cp851 "cp851" "ibm851")
+      ,@(emacs-coding :cp737 "cp737")
+      ,@(emacs-coding :cp869 "cp869" "ibm869")
+      ,@(emacs-coding :cp866 "cp866")
+      ,@(emacs-coding :koi8 "koi8" "koi8-r" "cyrillic-koi8" "cp878")
+      ,@(emacs-coding :koi8-u "koi8-u")
+      ,@(emacs-coding :koi8-t "koi8-t")
+      ,@(emacs-coding :cp1125 "cp1125" "ruscii" "cp866u")
+      ,@(emacs-coding :cp855 "cp855" "ibm855")
+      ,@(emacs-coding :mik "mik")
+      ,@(emacs-coding :pt154 "pt154")
+      ,@(emacs-coding :ebcdic-us "ebcdic-us")
+      ,@(emacs-coding :ebcdic-uk "ebcdic-uk")
+      ,@(emacs-coding :cp1047 "cp1047" "ibm1047")
+      ,@(emacs-coding :iso-2022-cn "iso-2022-cn" "chinese-iso-7bit")
+      ,@(emacs-coding :iso-2022-cn-ext "iso-2022-cn-ext")
+      ,@(emacs-coding :gb2312 "gb2312" "cn-gb" "euc-cn" "euc-china"
+		      "cn-gb-2312" "chinese-iso-8bit")
+      ,@(emacs-coding :big5 "big5" "cp950" "cn-big5" "chinese-big5")
+      ,@(emacs-coding :big5hkscs "big5-hkscs" "cn-big5-hkscs" "chinese-big5-hkscs")
+      ,@(emacs-coding :euc-tw "euc-tw" "euc-taiwan")
+      ,@(emacs-coding :cp936 "cp936" "windows-936" "gbk" "chinese-gbk")
+      ,@(emacs-coding :gb18030 "gb18030" "chinese-gb18003")
+      ,@(emacs-coding :cp874 "ibm874" "cp874")
+      ,@(emacs-coding :tis-620 "tis-620" "tis620" "th-tis620" "thai-tis620")
+      ,@(emacs-coding :viscii "viscii" "vietnamese-viscii")
+      ,@(emacs-coding :tcvn "tcvn-5712" "tcvn" "vietnamese-tcvn")
+      ,@(emacs-coding :georgian-ps "georgian-ps")
+      ,@(emacs-coding :georgian-academy "georgian-academy")
+      ,@(emacs-coding :iso-2022-jp "iso-2022-jp" "junet")
+      ,@(emacs-coding :iso-2022-jp-2 "iso-2022-jp-2")
+      ,@(emacs-coding :shift-jis "shift_jis" "sjis" "japanese-shift-jis")
+      ,@(emacs-coding :cp932 "cp932" "japanese-cp932")
+      ,@(emacs-coding :euc-jp "euc-jp" "euc-japan" "euc-japan-1990" "japanese-iso-8bit")
+      ,@(emacs-coding :euc-ms "eucjp-ms")
+      ,@(emacs-coding :iso-2022-jp-3 "iso-2022-jp-3" "iso-2022-jp-2004")
+      ,@(emacs-coding :euc-jisx0213 "euc-jisx0213" "euc-jis-2004")
+      ,@(emacs-coding :euc-korea "euc-korea" "euc-kr" "korean-iso-8bit")
+      ,@(emacs-coding :iso-2022-kr "iso-2022-kr" "korean-iso-7bit-lock")
+      ,@(emacs-coding :cp949 "cp949" "korean-cp949")
+      ))
   "List of coding translations used by 'stream-encoding-file-attribute to map
   the read file coding into a native external-format.  Each element is a list of
   a native external-format followed by a list of coding strings that are to be

commit 164dd1e02ab53a665e561b5e0a2369f19f73c3b0
Author: Raymond Toy <toy.raymond at gmail.com>
Date:   Fri May 25 22:49:18 2012 -0700

    First cut at :file-attribute external-format that determines the
    format from the file contents ala emacs.

diff --git a/src/code/exports.lisp b/src/code/exports.lisp
index a46d5bb..d0cfe58 100644
--- a/src/code/exports.lisp
+++ b/src/code/exports.lisp
@@ -1511,6 +1511,7 @@
 		"DESCRIBE-EXTERNAL-FORMAT")
   ;; Unicode
   (:export "STRING-TO-OCTETS" "OCTETS-TO-STRING" "*DEFAULT-EXTERNAL-FORMAT*"
+	   "*DEFAULT-SOURCE-EXTERNAL-FORMAT*"
 	   "DESCRIBE-EXTERNAL-FORMAT"
 	   "LIST-ALL-EXTERNAL-FORMATS"
 	   "STRING-ENCODE" "STRING-DECODE"
diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp
index 45b5847..820c22d 100644
--- a/src/code/fd-stream.lisp
+++ b/src/code/fd-stream.lisp
@@ -1362,6 +1362,238 @@
 
 ;;;; Utility functions (misc routines, etc)
 
+(defparameter *stream-encoding-file-attribute-translations*
+  '(;; Emacs specific codings.
+    ((:iso-8859-1 :unix)
+     "latin-1" "latin-1-unix" "iso-latin-1-unix" "iso-8859-1-unix")
+    ((:iso-8859-1 :dos)
+     "latin-1" "latin-1-dos" "iso-latin-1-dos" "iso-8859-1-dos")
+    ((:iso-8859-1 :max)
+     "latin-1" "latin-1-mac" "iso-latin-1-mac" "iso-8859-1-mac")
+    ((:utf-8 :unix) "utf-8-unix")
+    ((:utf-8 :dos) "utf-8-dos")
+    ((:utf-8 :mac) "utf-8-mac")
+    ((:euc-jp :unix) "euc-jp-unix")
+    ((:euc-jp :dos) "euc-jp-dos")
+    ((:euc-jp :mac) "euc-jp-mac")
+    )
+  "List of coding translations used by 'stream-encoding-file-attribute to map
+  the read file coding into a native external-format.  Each element is a list of
+  a native external-format followed by a list of coding strings that are to be
+  mapped to this native format.  The first element is the target encoding,
+  may be a list with the first element being the encoding and the second the
+  line termination style: :unix (linefeed), :dos (CR-LF), or :mac (CR).")
+
+
+;;; stream-encoding-file-attribute  --  Internal
+;;;
+;;; Read the encoding file option from the stream 's which is expected to be a
+;;; character stream with an external-format of :iso8859-1.
+;;;
+(defun stream-encoding-file-attribute (s)
+  (let* ((initial-encoding nil)
+	 (declared-encoding nil)
+	 (eol-mode nil)
+	 (buffer (make-array 1024 :element-type '(unsigned-byte 8)))
+	 (available (do ((i 0 (1+ i)))
+			((>= i 1024) i)
+		      (declare (fixnum i))
+		      (let ((ch (read-char s nil nil)))
+			(unless ch (return i))
+			(setf (aref buffer i) (char-code ch))))))
+    (labels ((decode-ascii (start size offset)
+	       (declare (type fixnum start)
+			(type (integer 1 4) size)
+			(type (integer 0 3) offset))
+	       (let ((ascii (make-array 64 :element-type 'character
+					:adjustable t :fill-pointer 0)))
+		 (do ()
+		     ((< available (+ start size)))
+		   (let* ((code (ecase size
+				  (1 (aref buffer start))
+				  (2 (let ((b0 (aref buffer start))
+					   (b1 (aref buffer (1+ start))))
+				       (ecase offset
+					 (0 (logior (ash b1 8) b0))
+					 (1 (logior (ash b0 8) b1)))))
+				  (4
+				   (let ((b0 (aref buffer start))
+					 (b1 (aref buffer (+ start 1)))
+					 (b2 (aref buffer (+ start 2)))
+					 (b3 (aref buffer (+ start 3))))
+				     (ecase offset
+				       (0 (logior (ash b3 24) (ash b2 16) (ash b1 8) b0))
+				       (1 (logior (ash b1 24) (ash b0 16) (ash b3 8) b2))
+				       (2 (logior (ash b2 24) (ash b3 16) (ash b0 8) b1))
+				       (3 (logior (ash b0 24) (ash b1 16) (ash b2 8) b3))))))))
+		     (incf start size)
+		     (let ((ch (if (< 0 code #x80) (code-char code) #\?)))
+		       (vector-push-extend ch ascii))))
+		 ascii))
+	     (parse-file-option (ascii)
+	       ;; Parse the file options.
+	       (let ((found (search "-*-" ascii))
+		     (options nil))
+		 (when found
+		   (block do-file-options
+		     (let* ((start (+ found 3))
+			    (end (search "-*-" ascii :start2 start)))
+		       (unless end
+			 (return-from do-file-options))
+		       (unless (find #\: ascii :start start :end end)
+			 (return-from do-file-options))
+		       (do ((opt-start start (1+ semi)) colon semi)
+			   (nil)
+			 (setf colon (position #\: ascii :start opt-start :end end))
+			 (unless colon
+			   (return-from do-file-options))
+			 (setf semi (or (position #\; ascii :start colon :end end) end))
+			 (let ((option (string-trim '(#\space #\tab)
+						    (subseq ascii opt-start colon)))
+			       (value (string-trim '(#\space #\tab)
+						   (subseq ascii (1+ colon) semi))))
+			   (push (cons option value) options)
+			   (when (= semi end) (return nil)))))))
+		 (setf declared-encoding
+		       (cond ((cdr (assoc "external-format" options :test 'equalp)))
+			     ((cdr (assoc "encoding" options :test 'equalp)))
+			     ((cdr (assoc "coding" options :test 'equalp)))))))
+	     (detect-line-termination (ascii)
+	       ;; Look for the first line termination and check the style.
+	       (let ((p (position-if #'(lambda (c) (member c '(#\linefeed #\return)))
+				     ascii)))
+		 (when p
+		   (let ((c1 (char ascii p)))
+		     (cond ((char= c1 #\linefeed)
+			    (setf eol-mode :unix))
+			   ((< (1+ p) (length ascii))
+			    (assert (char= c1 #\return))
+			    (let ((c2 (char ascii (1+ p))))
+			      (cond ((eql c2 #\linefeed)
+				     (setf eol-mode :dos))
+				    (t
+				     (setf eol-mode :mac)))))))))))
+      (cond ((>= available 4)
+	     (let ((b1 (aref buffer 0))
+		   (b2 (aref buffer 1))
+		   (b3 (aref buffer 2))
+		   (b4 (aref buffer 3)))
+	       (cond ((and (= b1 #x00) (= b2 #x00) (= b3 #xFE) (= b4 #xFF))
+		      (setf initial-encoding :ucs-4be)
+		      (let ((ascii (decode-ascii 4 4 3)))
+			(parse-file-option ascii)
+			(detect-line-termination ascii)))
+		     ((and (= b1 #xff) (= b2 #xfe))
+		      (cond ((and (= b3 #x00) (= b4 #x00))
+			     (setf initial-encoding :ucs-4le)
+			     (let ((ascii (decode-ascii 4 4 0)))
+			       (parse-file-option ascii)
+			       (detect-line-termination ascii)))
+			    (t
+			     (setf initial-encoding :utf-16)
+			     (let ((ascii (decode-ascii 2 2 0)))
+			       (parse-file-option ascii)
+			       (detect-line-termination ascii)))))
+		     ((and (= b1 #x00) (= b2 #x00) (= b3 #xFF) (= b4 #xFE))
+		      (let ((ascii (decode-ascii 4 4 2)))
+			(parse-file-option ascii)
+			(detect-line-termination ascii)))
+		     ((and (= b1 #xfe) (= b2 #xff))
+		      (cond ((and (= b3 #x00) (= b4 #x00))
+			     (let ((ascii (decode-ascii 4 4 1)))
+			       (parse-file-option ascii)
+			       (detect-line-termination ascii)))
+			    (t
+			     (setf initial-encoding :utf-16)
+			     (let ((ascii (decode-ascii 2 2 1)))
+			       (parse-file-option ascii)
+			       (detect-line-termination ascii)))))
+		     ;;
+		     ((and (= b1 #xEF) (= b2 #xBB) (= b3 #xBF))
+		      (setf initial-encoding :utf-8)
+		      (let ((ascii (decode-ascii 3 1 0)))
+			(parse-file-option ascii)
+			(detect-line-termination ascii)))
+		     ;;
+		     ((and (> b1 0) (= b2 0) (= b3 0) (= b4 0))
+		      (setf initial-encoding :ucs-4le)
+		      (let ((ascii (decode-ascii 0 4 0)))
+			(parse-file-option ascii)
+			(detect-line-termination ascii)))
+		     ((and (= b1 0) (> b2 0) (= b3 0) (= b4 0))
+		      (let ((ascii (decode-ascii 0 4 1)))
+			(parse-file-option ascii)
+			(detect-line-termination ascii)))
+		     ((and (= b1 0) (= b2 0) (> b3 0) (= b4 0))
+		      (let ((ascii (decode-ascii 0 4 2)))
+			(parse-file-option ascii)
+			(detect-line-termination ascii)))
+		     ((and (= b1 0) (= b2 0) (= b3 0) (> b4 0))
+		      (setf initial-encoding :ucs-4be)
+		      (let ((ascii (decode-ascii 0 4 3)))
+			(parse-file-option ascii)
+			(detect-line-termination ascii)))
+		     ;;
+		     ((and (> b1 0) (= b2 0) (> b3 0) (= b4 0))
+		      (setf initial-encoding :utf-16le)
+		      (let ((ascii (decode-ascii 0 2 0)))
+			(parse-file-option ascii)
+			(detect-line-termination ascii)))
+		     ((and (= b1 0) (> b2 0) (= b3 0) (> b4 0))
+		      (setf initial-encoding :utf-16be)
+		      (let ((ascii (decode-ascii 0 2 1)))
+			(parse-file-option ascii)
+			(detect-line-termination ascii)))
+		     ;;
+		     ((and (= b1 #x2B) (= b2 #x41)
+			   (or (= b3 #x43) (= b3 #x44)))
+		      (setf initial-encoding :utf-7)
+		      (let ((ascii (decode-ascii 0 1 0)))
+			(detect-line-termination ascii)))
+		     ((and (= b1 #x2F) (= b2 #x2B) (= b3 #x41))
+		      (setf initial-encoding :utf-7)
+		      (let ((ascii (decode-ascii 0 1 0)))
+			(detect-line-termination ascii)))
+		     (t
+		      (let ((ascii (decode-ascii 0 1 0)))
+			(when (parse-file-option ascii)
+			  (detect-line-termination ascii)))))))
+	    ((= available 3)
+	     (when (and (= (aref buffer 0) #xEF)
+			(= (aref buffer 1) #xBB)
+			(= (aref buffer 2) #xBF))
+	       (setf initial-encoding :utf-8)))
+	    ((= available 2)
+	     (let ((b1 (aref buffer 0))
+		   (b2 (aref buffer 1)))
+	       (cond ((or (and (= b1 #xff) (= b2 #xfe))
+			  (and (= b1 #xfe) (= b2 #xff)))
+		      (setf initial-encoding :utf-16)))))))
+    ;;
+    ;;
+    (cond ((and (not initial-encoding) (not declared-encoding))
+	   (values :default eol-mode))
+	  (t
+	   (let ((encoding (or declared-encoding initial-encoding)))
+	     (when (stringp encoding)
+	       (setf encoding (string-upcase encoding))
+	       (dolist (translations *stream-encoding-file-attribute-translations*)
+		 (when (member encoding (rest translations) :test 'equalp)
+		   (let ((target (first translations)))
+		     (cond ((consp target)
+			    (setf encoding (first target))
+			    (setf eol-mode (second target)))
+			   (t
+			    (setf encoding (first translations)))))
+		   (return))))
+	     (let ((external-format
+		    (cond ((eq encoding :default) :default)
+			  ((stringp encoding)
+			   (intern encoding :keyword))
+			  (t
+			   encoding))))
+	       (values external-format eol-mode)))))))
+
 ;;; SET-ROUTINES -- internal
 ;;;
 ;;;   Fill in the various routine slots for the given type. Input-p and
@@ -1916,20 +2148,7 @@
 	   (setf (fd-stream-flags stream) #b001))
 	  (t
 	   (setf (fd-stream-flags stream) #b010)))
-
-    ;; FIXME: setting the external format here should be better
-    ;; integrated into set-routines.  We do it before so that
-    ;; set-routines can create an in-buffer if appropriate.  But we
-    ;; need to do it after to put the correct input routines for the
-    ;; external format.
     ;;
-    ;;#-unicode-bootstrap ; fails in stream-reinit otherwise
-    #+(and unicode (not unicode-bootstrap))
-    (%set-fd-stream-external-format stream external-format nil)
-    (set-routines stream element-type input output input-buffer-p
-		  :binary-stream-p binary-stream-p)
-    #+(and unicode (not unicode-bootstrap))
-    (%set-fd-stream-external-format stream external-format nil)
     (when (and auto-close (fboundp 'finalize))
       (finalize stream
 		#'(lambda ()
@@ -1937,6 +2156,52 @@
 		    (format *terminal-io* (intl:gettext "** Closed ~A~%") name)
 		    (when original
 		      (revert-file file original)))))
+    ;;
+    ;; FIXME: setting the external format here should be better
+    ;; integrated into set-routines.  We do it before so that
+    ;; set-routines can create an in-buffer if appropriate.  But we
+    ;; need to do it after to put the correct input routines for the
+    ;; external format.
+    ;;
+    ;;#-unicode-bootstrap ; fails in stream-reinit otherwise
+    #+(and unicode (not unicode-bootstrap))
+    (cond ((and (eq external-format :file-attribute) input)
+	   ;; Read the encoding file option with the external-format set to
+	   ;; :iso8859-1, and then change the external-format if necessary.
+	   #+(and unicode (not unicode-bootstrap))
+	   (%set-fd-stream-external-format stream :iso8859-1 nil)
+	   (set-routines stream element-type input output input-buffer-p
+			 :binary-stream-p binary-stream-p)
+	   #+(and unicode (not unicode-bootstrap))
+	   (%set-fd-stream-external-format stream :iso8859-1 nil)
+	   (multiple-value-bind (encoding eol-mode)
+	       (stream-encoding-file-attribute stream)
+	     (unless (file-position stream :start)
+	       (error (intl:gettext "The ~A external-format requires a file stream.")
+		      external-format))
+	     (unless (and (member encoding '(:iso8859-1 :iso-8859-1))
+			  (member eol-mode '(nil :unix)))
+	       (setf (stream-external-format stream)
+		     (cond ((member eol-mode '(nil :unix))
+			    (or encoding :default))
+			   (t
+			    (list (or encoding :default) eol-mode)))))))
+	  ((eq external-format :file-attribute)
+	   ;; Non-input stream, so can not read the file attributes, so use the
+	   ;; :default.
+	   #+(and unicode (not unicode-bootstrap))
+	   (%set-fd-stream-external-format stream :default nil)
+	   (set-routines stream element-type input output input-buffer-p
+			 :binary-stream-p binary-stream-p)
+	   #+(and unicode (not unicode-bootstrap))
+	   (%set-fd-stream-external-format stream :default nil))
+	  (t
+	   #+(and unicode (not unicode-bootstrap))
+	   (%set-fd-stream-external-format stream external-format nil)
+	   (set-routines stream element-type input output input-buffer-p
+			 :binary-stream-p binary-stream-p)
+	   #+(and unicode (not unicode-bootstrap))
+	   (%set-fd-stream-external-format stream external-format nil)))
     stream))
 
 
diff --git a/src/code/load.lisp b/src/code/load.lisp
index 832853b..89f7705 100644
--- a/src/code/load.lisp
+++ b/src/code/load.lisp
@@ -19,7 +19,7 @@
 
 (in-package "EXTENSIONS")
 (export '(*load-if-source-newer* *load-source-types* *load-object-types*
-	  invalid-fasl))
+	  invalid-fasl *default-source-external-format*))
 
 (in-package "SYSTEM")
 (export '(foreign-symbol-address alternate-get-global-address))
@@ -94,6 +94,12 @@
 	     (invalid-fasl-pathname condition)
 	     (invalid-fasl-version condition)
 	     (invalid-fasl-expected-version condition)))))
+
+(defvar *default-source-external-format* :default
+  "The external-format that 'load and 'compile-file use when given an
+  external-format of :default.  The default value is :default which will open
+  the file using the 'ext:*default-external-format*")
+
 
 ;;; LOAD-FRESH-LINE -- internal.
 ;;;
@@ -523,6 +529,10 @@
        defaulting.  Probably only necessary if you have source files with a
        \"fasl\" type. 
 
+   :EXTERNAL-FORMAT
+       The external-format to use when opening the FILENAME. The default is
+       :default which uses the EXT:*DEFAULT-SOURCE-EXTERNAL-FORMAT*.
+
    The variables *LOAD-VERBOSE*, *LOAD-PRINT* and EXT:*LOAD-IF-SOURCE-NEWER*
    determine the defaults for the corresponding keyword arguments.  These
    variables are also bound to the specified argument values, so specifying a
@@ -604,6 +614,8 @@
 	(*load-pathname* pathname))
     (case contents
       (:source
+       (when (eq external-format :default)
+	 (setf external-format *default-source-external-format*))
        (with-open-file (file truename :external-format external-format
 			     :direction :input
 			     :if-does-not-exist if-does-not-exist)
diff --git a/src/compiler/main.lisp b/src/compiler/main.lisp
index 544ccb0..066ff2d 100644
--- a/src/compiler/main.lisp
+++ b/src/compiler/main.lisp
@@ -738,12 +738,12 @@
 				     :write-date (file-write-date x)
 				     :language :lisp))
 		 files)))
-
+    (when (eq external-format :default)
+      (setf external-format *default-source-external-format*))
     (make-source-info :files file-info
 		      :current-file file-info
 		      #+unicode :external-format
-		      #+unicode (stream::ef-name
-				 (stream::find-external-format external-format))
+		      #+unicode external-format
 		      #+unicode :decoding-error
 		      #+unicode decoding-error)))
 
diff --git a/src/i18n/locale/cmucl.pot b/src/i18n/locale/cmucl.pot
index 5b38108..bb807f4 100644
--- a/src/i18n/locale/cmucl.pot
+++ b/src/i18n/locale/cmucl.pot
@@ -9187,6 +9187,16 @@ msgid "Error reading ~S: ~A"
 msgstr ""
 
 #: src/code/fd-stream.lisp
+msgid ""
+"List of coding translations used by 'stream-encoding-file-attribute to map\n"
+"  the read file coding into a native external-format.  Each element is a "
+"list of\n"
+"  a native external-format followed byte a list of coding strings that are "
+"to be\n"
+"  mapped to this native format."
+msgstr ""
+
+#: src/code/fd-stream.lisp
 msgid "Could not find any input routine for ~S"
 msgstr ""
 
@@ -9263,6 +9273,10 @@ msgid "** Closed ~A~%"
 msgstr ""
 
 #: src/code/fd-stream.lisp
+msgid "The ~A external-format requires a file stream."
+msgstr ""
+
+#: src/code/fd-stream.lisp
 msgid ""
 "This is a string that OPEN tacks on the end of a file namestring to produce\n"
 "   a name for the :if-exists :rename-and-delete and :rename options.  Also,\n"
@@ -10064,6 +10078,14 @@ msgid ""
 msgstr ""
 
 #: src/code/load.lisp
+msgid ""
+"The external-format that 'load and 'compile-file use when given an\n"
+"  external-format of :default.  The default value is :default which will "
+"open\n"
+"  the file using the 'ext:*default-external-format*"
+msgstr ""
+
+#: src/code/load.lisp
 msgid "List of free fop tables for the fasloader."
 msgstr ""
 
@@ -10133,6 +10155,10 @@ msgid ""
 "       defaulting.  Probably only necessary if you have source files with a\n"
 "       \"fasl\" type. \n"
 "\n"
+"   :EXTERNAL-FORMAT\n"
+"       The external-format to use when opening the FILENAME. The default is\n"
+"       :default which uses the EXT:*DEFAULT-SOURCE-EXTERNAL-FORMAT*.\n"
+"\n"
 "   The variables *LOAD-VERBOSE*, *LOAD-PRINT* and EXT:*LOAD-IF-SOURCE-NEWER"
 "*\n"
 "   determine the defaults for the corresponding keyword arguments.  These\n"

-----------------------------------------------------------------------

Summary of changes:
 src/code/debug-int.lisp          |    6 +-
 src/code/debug.lisp              |    3 +-
 src/code/exports.lisp            |    1 +
 src/code/fd-stream.lisp          |  372 ++++++++++++++++++++++++++++++++++++--
 src/code/load.lisp               |   14 ++-
 src/compiler/main.lisp           |    6 +-
 src/general-info/release-20d.txt |   13 ++
 src/i18n/locale/cmucl.pot        |   26 +++
 src/tools/hemcom.lisp            |    2 +-
 9 files changed, 422 insertions(+), 21 deletions(-)


hooks/post-receive
-- 
CMU Common Lisp


More information about the cmucl-commit mailing list