CMUCL commit: src/lisp (gencgc.c x86-assem.S)
Raymond Toy
rtoy at common-lisp.net
Thu Apr 1 16:05:46 CEST 2010
Date: Thursday, April 1, 2010 @ 10:05:46
Author: rtoy
Path: /project/cmucl/cvsroot/src/lisp
Modified: gencgc.c x86-assem.S
Fix SSE2 bug when running
(defun testfn ()
(let* ((i-gc-ed-u nil)
(ext:*gc-notify-before* (lambda (a) (/ a 0.34d0))))
(dotimes (i 100000)
(setf i-gc-ed-u nil)
(let* ((v1 (list (* 1d0 (random 10)) (* 1d0 (random 10))))
(v2 (list (* 1d0 (random 10)) (* 1d0 (random 10))))
(dot1 (reduce #'+ (mapcar #'* v1 v2)))
(dot2 (reduce #'+ (mapcar #'* v1 v2))))
(when (/= dot1 dot2)
(print `(gc ,i-gc-ed-u v1 ,v1 v2 ,v2 dot1 ,dot1 dot2 ,dot2)))))))
Running this with sse2 would cuase dot1 and dot2 to sometimes be
different. We forgot to save the SSE2 state in call_into_lisp.
(This bug was mentioned on comp.lang.lisp, http://groups.google.com/group/comp.lang.lisp/browse_thread/thread/828371aa4800272c?hl=en#
x86-assem.S:
o Save and restore SSE2 state when running with SSE2 core. (This
might need more tweaking. Should we use cpuid instead of looking at
fpu_mode?)
gencgc.c:
o Save the SSE2 state, along with X87 state for SSE2 cores.
-------------+
gencgc.c | 20 ++++++++++++++++++--
x86-assem.S | 46 +++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 61 insertions(+), 5 deletions(-)
Index: src/lisp/gencgc.c
diff -u src/lisp/gencgc.c:1.106 src/lisp/gencgc.c:1.107
--- src/lisp/gencgc.c:1.106 Mon Dec 21 06:36:43 2009
+++ src/lisp/gencgc.c Thu Apr 1 10:05:45 2010
@@ -7,7 +7,7 @@
*
* Douglas Crosher, 1996, 1997, 1998, 1999.
*
- * $Header: /project/cmucl/cvsroot/src/lisp/gencgc.c,v 1.106 2009-12-21 11:36:43 rswindells Exp $
+ * $Header: /project/cmucl/cvsroot/src/lisp/gencgc.c,v 1.107 2010-04-01 14:05:45 rtoy Exp $
*
*/
@@ -662,7 +662,14 @@
#if defined(i386) || defined(__x86_64)
#define FPU_STATE_SIZE 27
+ /*
+ * Need 512 byte area, aligned on a 16-byte boundary. So allocate
+ * 512+16 bytes of space and let the routine adjust use the
+ * appropriate alignment.
+ */
+#define SSE_STATE_SIZE ((512+16)/4)
int fpu_state[FPU_STATE_SIZE];
+ int sse_state[SSE_STATE_SIZE];
#elif defined(sparc)
/*
* 32 (single-precision) FP registers, and the FP state register.
@@ -683,7 +690,11 @@
*/
fpu_save(fpu_state);
-
+#if defined(i386) || defined(__x86_64)
+ if (fpu_mode == SSE2) {
+ sse_save(sse_state);
+ }
+#endif
/* Number of generations to print out. */
if (verbose)
@@ -738,6 +749,11 @@
fprintf(stderr, " Total bytes alloc=%ld\n", bytes_allocated);
fpu_restore(fpu_state);
+#if defined(i386) || defined(__x86_64)
+ if (fpu_mode == SSE2) {
+ sse_restore(sse_state);
+ }
+#endif
}
/* Get statistics that are kept "on the fly" out of the generation
Index: src/lisp/x86-assem.S
diff -u src/lisp/x86-assem.S:1.32 src/lisp/x86-assem.S:1.33
--- src/lisp/x86-assem.S:1.32 Tue Dec 23 23:36:40 2008
+++ src/lisp/x86-assem.S Thu Apr 1 10:05:45 2010
@@ -1,6 +1,6 @@
### x86-assem.S -*- Mode: Asm; -*-
/**
- * $Header: /project/cmucl/cvsroot/src/lisp/x86-assem.S,v 1.32 2008-12-24 04:36:40 rtoy Rel $
+ * $Header: /project/cmucl/cvsroot/src/lisp/x86-assem.S,v 1.33 2010-04-01 14:05:45 rtoy Exp $
*
* Authors: Paul F. Werkowski <pw at snoopy.mv.com>
* Douglas T. Crosher
@@ -134,9 +134,22 @@
/* Save the NPX state */
fwait # Catch any pending NPX exceptions.
+ /* Save the SSE2 for X87 state */
+ mov GNAME(fpu_mode), %eax
+ cmp $2, %eax # SSE2 mode?
+ jne x87_save
+ movl %esp, %eax # Remember the current stack pointer
+ subl $512,%esp # Make room for the SSE state
+ andl $-16, %esp # fxsave needs 16-byte alignment
+ fxsave (%esp)
+ pushl %eax # Save the old stack pointer
+ fninit # Reset fpu, just in case
+ jmp npx_save_done
+
+x87_save:
subl $108,%esp # Make room for the NPX state.
fnsave (%esp) # Resets NPX
-
+
movl (%esp),%eax # Load NPX control word
andl $0xfffff3ff,%eax # Set rounding mode to nearest
#ifdef type_LongFloat
@@ -147,6 +160,7 @@
pushl %eax
fldcw (%esp) # Recover modes
popl %eax
+npx_save_done:
fldz # insure no FP regs are empty
fldz
@@ -224,9 +238,19 @@
popl %ebx
/* Restore the NPX state */
+ /* Restore SSE2 state? */
+ mov GNAME(fpu_mode), %eax
+ cmp $2, %eax # SSE2 mode?
+ jne x87_restore
+ popl %eax # Get the old stack pointer
+ fxrstor (%esp) # Restore the SSE state
+ movl %eax, %esp # Now really restore the old stack pointer
+ jmp npx_restore_done
+x87_restore:
frstor (%esp)
addl $108, %esp
-
+npx_restore_done:
+
popl %ebp # c-sp
movl %edx,%eax # c-val
ret
@@ -244,6 +268,22 @@
frstor (%eax) # Restore the NPX state.
ret
ENDFUNC(fpu_restore)
+
+FUNCDEF(sse_save)
+ movl 4(%esp),%eax
+ addl $16, %eax # Make sure eax is on a 16-byte boundary
+ and $-16, %eax
+ fxsave (%eax)
+ ret
+ENDFUNC(sse_save)
+
+FUNCDEF(sse_restore)
+ movl 4(%esp),%eax
+ addl $16, %eax # Make sure eax is on a 16-byte boundary
+ and $-16, %eax
+ fxrstor (%eax)
+ ret
+ENDFUNC(sse_restore)
/*
More information about the cmucl-commit
mailing list