CMUCL commit: src/lisp (gencgc.c x86-assem.S)

Raymond Toy rtoy at common-lisp.net
Thu Apr 1 16:05:46 CEST 2010


    Date: Thursday, April 1, 2010 @ 10:05:46
  Author: rtoy
    Path: /project/cmucl/cvsroot/src/lisp

Modified: gencgc.c x86-assem.S

Fix SSE2 bug when running

(defun testfn ()
  (let* ((i-gc-ed-u nil)
         (ext:*gc-notify-before* (lambda (a) (/ a 0.34d0))))
    (dotimes (i 100000)
      (setf i-gc-ed-u nil)
      (let* ((v1 (list (* 1d0 (random 10)) (* 1d0 (random 10))))
             (v2 (list (* 1d0 (random 10)) (* 1d0 (random 10))))
             (dot1 (reduce #'+ (mapcar #'* v1 v2)))
             (dot2 (reduce #'+ (mapcar #'* v1 v2))))
        (when (/= dot1 dot2)
          (print `(gc ,i-gc-ed-u v1 ,v1 v2 ,v2 dot1 ,dot1 dot2 ,dot2)))))))

Running this with sse2 would cuase dot1 and dot2 to sometimes be
different.  We forgot to save the SSE2 state in call_into_lisp.

(This bug was mentioned on comp.lang.lisp, http://groups.google.com/group/comp.lang.lisp/browse_thread/thread/828371aa4800272c?hl=en#

x86-assem.S:
o Save and restore SSE2 state when running with SSE2 core.  (This
  might need more tweaking.  Should we use cpuid instead of looking at
  fpu_mode?)

gencgc.c:
o Save the SSE2 state, along with X87 state for SSE2 cores.


-------------+
 gencgc.c    |   20 ++++++++++++++++++--
 x86-assem.S |   46 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 61 insertions(+), 5 deletions(-)


Index: src/lisp/gencgc.c
diff -u src/lisp/gencgc.c:1.106 src/lisp/gencgc.c:1.107
--- src/lisp/gencgc.c:1.106	Mon Dec 21 06:36:43 2009
+++ src/lisp/gencgc.c	Thu Apr  1 10:05:45 2010
@@ -7,7 +7,7 @@
  *
  * Douglas Crosher, 1996, 1997, 1998, 1999.
  *
- * $Header: /project/cmucl/cvsroot/src/lisp/gencgc.c,v 1.106 2009-12-21 11:36:43 rswindells Exp $
+ * $Header: /project/cmucl/cvsroot/src/lisp/gencgc.c,v 1.107 2010-04-01 14:05:45 rtoy Exp $
  *
  */
 
@@ -662,7 +662,14 @@
 
 #if defined(i386) || defined(__x86_64)
 #define FPU_STATE_SIZE 27
+    /* 
+     * Need 512 byte area, aligned on a 16-byte boundary.  So allocate
+     * 512+16 bytes of space and let the routine adjust use the
+     * appropriate alignment.
+     */
+#define SSE_STATE_SIZE ((512+16)/4)
     int fpu_state[FPU_STATE_SIZE];
+    int sse_state[SSE_STATE_SIZE];
 #elif defined(sparc)
     /*
      * 32 (single-precision) FP registers, and the FP state register.
@@ -683,7 +690,11 @@
      */
 
     fpu_save(fpu_state);
-
+#if defined(i386) || defined(__x86_64)
+    if (fpu_mode == SSE2) {
+      sse_save(sse_state);
+    }
+#endif    
 
     /* Number of generations to print out. */
     if (verbose)
@@ -738,6 +749,11 @@
     fprintf(stderr, "   Total bytes alloc=%ld\n", bytes_allocated);
 
     fpu_restore(fpu_state);
+#if defined(i386) || defined(__x86_64)
+    if (fpu_mode == SSE2) {
+      sse_restore(sse_state);
+    }
+#endif
 }
 
 /* Get statistics that are kept "on the fly" out of the generation
Index: src/lisp/x86-assem.S
diff -u src/lisp/x86-assem.S:1.32 src/lisp/x86-assem.S:1.33
--- src/lisp/x86-assem.S:1.32	Tue Dec 23 23:36:40 2008
+++ src/lisp/x86-assem.S	Thu Apr  1 10:05:45 2010
@@ -1,6 +1,6 @@
 ### x86-assem.S -*- Mode: Asm; -*-
 /**
- * $Header: /project/cmucl/cvsroot/src/lisp/x86-assem.S,v 1.32 2008-12-24 04:36:40 rtoy Rel $
+ * $Header: /project/cmucl/cvsroot/src/lisp/x86-assem.S,v 1.33 2010-04-01 14:05:45 rtoy Exp $
  *
  * Authors:	Paul F. Werkowski <pw at snoopy.mv.com>
  *		Douglas T. Crosher
@@ -134,9 +134,22 @@
 
 /* Save the NPX state */
 	fwait			# Catch any pending NPX exceptions.
+	/* Save the SSE2 for X87 state */
+	mov	GNAME(fpu_mode), %eax
+	cmp	$2, %eax	# SSE2 mode?
+	jne	x87_save
+	movl	%esp, %eax	# Remember the current stack pointer
+	subl	$512,%esp	# Make room for the SSE state
+	andl	$-16, %esp	# fxsave needs 16-byte alignment
+	fxsave	(%esp)		
+	pushl	%eax		# Save the old stack pointer
+	fninit			# Reset fpu, just in case
+	jmp	npx_save_done
+
+x87_save:
 	subl	$108,%esp	# Make room for the NPX state.
 	fnsave	(%esp)		# Resets NPX
-
+		
 	movl	(%esp),%eax	# Load NPX control word
 	andl	$0xfffff3ff,%eax	# Set rounding mode to nearest
 #ifdef type_LongFloat
@@ -147,6 +160,7 @@
 	pushl	%eax
 	fldcw	(%esp)		# Recover modes
 	popl	%eax
+npx_save_done:			
 
 	fldz			# insure no FP regs are empty
 	fldz
@@ -224,9 +238,19 @@
 	popl	%ebx
 
 /* Restore the NPX state */
+	/* Restore SSE2 state? */
+	mov	GNAME(fpu_mode), %eax
+	cmp	$2, %eax	# SSE2 mode?
+	jne	x87_restore
+	popl	%eax		# Get the old stack pointer
+	fxrstor	(%esp)		# Restore the SSE state
+	movl	%eax, %esp	# Now really restore the old stack pointer
+	jmp	npx_restore_done
+x87_restore:
 	frstor  (%esp)
 	addl	$108, %esp
-	
+npx_restore_done:	
+			
 	popl	%ebp		# c-sp
 	movl	%edx,%eax	# c-val
 	ret
@@ -244,6 +268,22 @@
 	frstor	(%eax)		# Restore the NPX state.
 	ret
 ENDFUNC(fpu_restore)
+
+FUNCDEF(sse_save)
+	movl	4(%esp),%eax
+	addl	$16, %eax	# Make sure eax is on a 16-byte boundary
+	and	$-16, %eax
+	fxsave	(%eax)
+	ret
+ENDFUNC(sse_save)
+		
+FUNCDEF(sse_restore)
+	movl	4(%esp),%eax
+	addl	$16, %eax	# Make sure eax is on a 16-byte boundary
+	and	$-16, %eax
+	fxrstor	(%eax)
+	ret
+ENDFUNC(sse_restore)
 
 
 /*



More information about the cmucl-commit mailing list