CMUCL commit: src/code (string.lisp)
Raymond Toy
rtoy at common-lisp.net
Mon Sep 13 23:27:04 CEST 2010
Date: Monday, September 13, 2010 @ 17:27:04
Author: rtoy
Path: /project/cmucl/cvsroot/src/code
Modified: string.lisp
Add function to convert a sequence of codepoints to a string and a
function to convert a string to a list of codepoints.
-------------+
string.lisp | 32 +++++++++++++++++++++++++++++++-
1 file changed, 31 insertions(+), 1 deletion(-)
Index: src/code/string.lisp
diff -u src/code/string.lisp:1.24 src/code/string.lisp:1.25
--- src/code/string.lisp:1.24 Tue Apr 20 13:57:45 2010
+++ src/code/string.lisp Mon Sep 13 17:27:04 2010
@@ -5,7 +5,7 @@
;;; Carnegie Mellon University, and has been placed in the public domain.
;;;
(ext:file-comment
- "$Header: /project/cmucl/cvsroot/src/code/string.lisp,v 1.24 2010-04-20 17:57:45 rtoy Exp $")
+ "$Header: /project/cmucl/cvsroot/src/code/string.lisp,v 1.25 2010-09-13 21:27:04 rtoy Exp $")
;;;
;;; **********************************************************************
;;;
@@ -1684,3 +1684,33 @@
result :start (1+ start) :end next)))
(write-string string result :start end :end offset-slen))))))
+
+;; Some utilities
+(defun codepoints-string (seq)
+ "Convert a sequence of codepoints to a string. Codepoints outside
+ the basic multilingual plane (BMP) are converted into the
+ corresponding surrogate pairs."
+ (with-output-to-string (s)
+ (map nil #'(lambda (c)
+ (multiple-value-bind (hi lo)
+ (surrogates c)
+ (write-char hi s)
+ (when lo (write-char lo s))))
+ seq)))
+
+(defun string-codepoints (s)
+ "Convert a string to a list of corresponding code points. Surrogate
+ pairs in the string are converted into the correspoinding
+ codepoint."
+ (declare (type simple-string s))
+ (let ((len (length s))
+ cp)
+ (do ((idx 0))
+ ((>= idx len))
+ (multiple-value-bind (c widep)
+ (codepoint s idx)
+ (if widep
+ (incf idx 2)
+ (incf idx))
+ (push c cp)))
+ (nreverse cp)))
\ No newline at end of file
More information about the cmucl-commit
mailing list