[cmucl-commit] [git] CMU Common Lisp branch master updated. snapshot-2012-08-9-gff56940

Raymond Toy rtoy at common-lisp.net
Tue Aug 28 05:56:39 UTC 2012


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "CMU Common Lisp".

The branch, master has been updated
       via  ff569406a77867b99256fc829d233478334aaf46 (commit)
      from  0dae48842681ded2440ebf34339e1a6851f3f80c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit ff569406a77867b99256fc829d233478334aaf46
Author: Raymond Toy <toy.raymond at gmail.com>
Date:   Mon Aug 27 22:47:36 2012 -0700

    Clean up debug_print.  Surrogate pairs are always high surrogate
    followed by low; anything else is invalid.

diff --git a/src/lisp/interr.c b/src/lisp/interr.c
index f5bc61f..a203d57 100644
--- a/src/lisp/interr.c
+++ b/src/lisp/interr.c
@@ -226,53 +226,38 @@ surrogatep(int code, int *type)
 static int
 utf16_codepoint(unsigned short int* utf16, int len, int* consumed)
 {
-    int code = *utf16;
-    int read = 1;
-    
+    int codepoint = REPLACEMENT_CODE;
+    int code_unit = *utf16;
     int code_type;
+    int read = 1;
 
     /*
      * If the current code unit is not a surrogate, we're done.
-     * Otherwise process the surrogate.
+     * Otherwise process the surrogate.  If this is a high (leading)
+     * surrogate and the next code unit is a low (trailing) surrogate,
+     * compute the code point.  Otherwise we have a bare surrogate or
+     * an invalid surrogate sequence, so just return the replacement
+     * character.
      */
     
-    if (surrogatep(code, &code_type)) {
-        /*
-         * Try to get the following surrogate, if there are still code
-         * units left.  If not, we have a bare surrogate, so just
-         * return the replacement character.
-         */
-        if (len > 0) {
-            int next = utf16[1];
+    if (surrogatep(code_unit, &code_type)) {
+        if (code_type == 0 && len > 0) {
+            int next_unit = utf16[1];
             int next_type;
-            if (surrogatep(next, &next_type)) {
-                /* Got the following surrogate, so combine them if possible */
-                if ((code_type == 0) && (next_type == 1)) {
+            if (surrogatep(next_unit, &next_type)) {
+                if (next_type == 1) {
                     /* High followed by low surrogate */
-                    code = ((code - 0xd800) << 10) + next + 0x2400;
-                    ++read;
-                } else if ((code_type == 1) && (next_type == 0)) {
-                    /*
-                     * Low followed by high surrogate.  Not sure if we
-                     * really need to handle this case.
-                     */
-                    code = ((code - 0xd800) << 10) + next + 0x2400;;
+                    codepoint = ((code_unit - 0xd800) << 10) + next_unit + 0x2400;
                     ++read;
-                } else {
-                    /* Give up */
-                    code = REPLACEMENT_CODE;
                 }
-            } else {
-                /* Surrogate followed by non-surrogate. Give up */
-                code = REPLACEMENT_CODE;
             }
-        } else {
-            code = REPLACEMENT_CODE;
         }
+    } else {
+        codepoint = code_unit;
     }
 
     *consumed = read;
-    return code;
+    return codepoint;
 }
 
 /*
@@ -340,8 +325,8 @@ debug_print(lispobj object)
         }
     } else {
         /*
-         * We should actually ever get here because %primitive print
-         * is only supposed to take strings.  But if we do, it's
+         * We shouldn't actually ever get here because %primitive
+         * print is only supposed to take strings.  But if we do, it's
          * useful to print something out anyway.
          */
 #if 1

-----------------------------------------------------------------------

Summary of changes:
 src/lisp/interr.c |   53 +++++++++++++++++++----------------------------------
 1 files changed, 19 insertions(+), 34 deletions(-)


hooks/post-receive
-- 
CMU Common Lisp


More information about the cmucl-commit mailing list