Patchwork xmmstack fixes

login
register
about
Submitter Rudolf Marek
Date 2009-10-09 17:47:12
Message ID <4ACF7720.4080301@assembler.cz>
Download mbox | patch
Permalink /patch/379/
State Not Applicable
Headers show

Comments

Rudolf Marek - 2009-10-09 17:47:12
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Hi all,

I added support for pushl xxx(%esp) instruction and also I managed to create
smarter wrapper code which means 10% drop in size (measured if -O0 is used).

Signed-off-by: Rudolf Marek <r.marek@assembler.cz>

Rudolf
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iEYEARECAAYFAkrPdyAACgkQ3J9wPJqZRNWhGwCeJcqRNRfZJf889PcP/RFhychk
SUoAn2lcBzedRFFpxOcBwDg34VluHGUs
=D22D
-----END PGP SIGNATURE-----
Kevin O'Connor - 2009-10-09 19:34:25
On Fri, Oct 09, 2009 at 07:47:12PM +0200, Rudolf Marek wrote:
> I added support for pushl xxx(%esp) instruction and also I managed to create
> smarter wrapper code which means 10% drop in size (measured if -O0 is used).

Nice!

BTW, I saw some further ways to enhance xmmstack:

It looks like either the xmm or mxx space is used.  However, my read
of the docs says both xmm and mxx can be used at the same time - this
could increase the space from 128 bytes to 192 bytes.

The code only fixes pointers relative to %esp, but I don't think
anything stops gcc from using non-esp pointers when referring to the
stack.  I think the code could be extended to fixup all memory
accesses.  Unfortunately, this would break those places where the code
needs to access real-memory (eg, pci space or constants in flash).  To
get around this, the wrapper functions could check the absolute
address of the access and only use xmm/mmx for those accesses between
say 0x0040-0x00c0 (where the stack could be located).  In order to test
the address, instructions that set flags would be needed, but backing
up and restoring the flags doesn't sound too hard (with lahf/"seto
%al").

Finally, it appears much of xmmstack is static code inserted at the
top of the file.  I think moving the static code into it's own
assembler file that is included might make what's going on a little
more clear.

Just some random thoughts..
-Kevin
Rudolf Marek - 2009-10-09 20:24:57
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Hi Kevin,
> 
> It looks like either the xmm or mxx space is used.  However, my read
> of the docs says both xmm and mxx can be used at the same time - this
> could increase the space from 128 bytes to 192 bytes.

Yes I think one must have SSE2 and not SSE for this.

> The code only fixes pointers relative to %esp, but I don't think
> anything stops gcc from using non-esp pointers when referring to the
> stack. 

Well I believe if -fomit-frame pointer is used all is addressed relatively to esp.

> I think the code could be extended to fixup all memory
> accesses.  Unfortunately, this would break those places where the code
> needs to access real-memory (eg, pci space or constants in flash).  To
> get around this, the wrapper functions could check the absolute
> address of the access and only use xmm/mmx for those accesses between
> say 0x0040-0x00c0 (where the stack could be located).  In order to test
> the address, instructions that set flags would be needed, but backing
> up and restoring the flags doesn't sound too hard (with lahf/"seto
> %al").

Yes looks feasible.

> Finally, it appears much of xmmstack is static code inserted at the
> top of the file.  I think moving the static code into it's own
> assembler file that is included might make what's going on a little
> more clear.

Yes :)

Rudolf
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iEYEARECAAYFAkrPnBgACgkQ3J9wPJqZRNV0MgCgmvHuwklESvnfeMLn/sHE2M4m
r6UAmwUGm2M5SnxMSr88tgpTTKDLMhlt
=IuxC
-----END PGP SIGNATURE-----

Patch

Index: xmmstack.c
===================================================================
--- xmmstack.c	(revision 10)
+++ xmmstack.c	(working copy)
@@ -162,6 +162,30 @@ 
 //bswap and xchg also not affect flags.
 //dr2 must be initially 0x7f
 
+    fprintf(w,"docall:\nmovl %%eax,%%dr0 #NOSTACK\n\
+    ovl %%dr2,%%eax #NOSTACK\n\
+    movzbl %%al,%%eax #NOSTACK\n\
+    leal -4(%%eax),%%eax #NOSTACK\n\
+    movb %%al,%%ah #NOSTACK\n\
+    movl %%eax,%%dr2 #NOSTACK\n\
+    leal 8(%%esp),%%eax #NOSTACK\n\
+    jmp setxmml\n\
+    dopop:\n movl %%eax,%%dr0 #NOSTACK\n\
+    movl %%dr2,%%eax #NOSTACK\n\
+    movzbl %%al,%%eax #NOSTACK\n\
+    movb %%al,%%ah #NOSTACK\n\
+    leal 4(%%eax),%%eax #NOSTACK\n\
+    movl %%eax,%%dr2 #NOSTACK\n\
+    jmp getxmml\n\
+    dopush:\n movl %%eax,%%dr3 #NOSTACK\n\
+    movl %%dr2,%%eax #NOSTACK\n\
+    movzbl %%al,%%eax #NOSTACK\n\
+    leal -4(%%eax),%%eax #NOSTACK\n\
+    movb %%al,%%ah #NOSTACK\n\
+    movl %%eax,%%dr2 #NOSTACK\n\
+    movl %%dr3,%%eax\n\
+    jmp setxmml\n");
+
     //16 bytes table xmm/12 mmx, pextrw and pinsrw consumes 5 bytes, and jmp *esp consumes 2, nop consumes 1 byte, movb consumes 2 bytes
     //Align of each,must 16 bytes, for use 1,2,4,8 escalar multiple.
     fprintf(w,"setxmm:\n");
@@ -550,6 +574,23 @@ 
 
 	fprintf(w,"#%s #ORIG\n", buf);
 
+	/* handle pushl xxx(%esp) */
+        if (regexp("^pushl\\s+(\\d*)\\(\\%esp\\)",buf,len,find)) {
+            fprintf(w,"movl %%eax,%%dr0 #NOSTACK\n");
+            fprintf(w,"movl %%dr2,%%eax #NOSTACK\n");
+            fprintf(w,"movzbl %%al,%%eax #NOSTACK\n");
+            fprintf(w,"movb %%al,%%ah #NOSTACK\n");
+            fprintf(w,"leal %d(%%eax),%%eax #NOSTACK\n",256*atoi(find[1]));
+            fprintf(w,"movl %%eax,%%dr2 #NOSTACK\n");
+            fprintf(w,"movl %%dr0,%%eax #NOSTACK\n");
+            fprintf(w,"movl $.+10,%%esp\n");
+            fprintf(w,"jmp getxmml\n");
+            fprintf(w,"movl $.+10,%%esp\n");
+            fprintf(w,"jmp dopush\n");
+            fprintf(w,"movl %%dr0,%%eax #NOSTACK\n");
+            continue;
+	}
+
 	/* leal is tricky */
 	if (regexp("^leal\\s+(\\d*)\\(\\%esp\\)\\s*,\\s*(.*)$",buf,len,find))
 	{
@@ -675,27 +716,17 @@ 
         if (regexp("^pushl\\s+(.*)$",buf,len,find))
         {
             fprintf(w,"movl %%eax,%%dr0 #NOSTACK\n");
-            fprintf(w,"movl %%dr2,%%eax #NOSTACK\n");
-            fprintf(w,"movzbl %%al,%%eax #NOSTACK\n");
-            fprintf(w,"leal -4(%%eax),%%eax #NOSTACK\n");
-            fprintf(w,"movb %%al,%%ah #NOSTACK\n");
-            fprintf(w,"movl %%eax,%%dr2 #NOSTACK\n");
             fprintf(w,"movl %s,%%eax\n",find[1]);
             fprintf(w,"movl $.+10,%%esp\n");
-            fprintf(w,"jmp setxmml\n");
+            fprintf(w,"jmp dopush\n");
             fprintf(w,"movl %%dr0,%%eax #NOSTACK\n");
             continue;
         }
         if (regexp("^popl\\s+(.*)$",buf,len,find))
         {
-            fprintf(w,"movl %%eax,%%dr0 #NOSTACK\n");
-            fprintf(w,"movl %%dr2,%%eax #NOSTACK\n");
-            fprintf(w,"movzbl %%al,%%eax #NOSTACK\n");
-            fprintf(w,"movb %%al,%%ah #NOSTACK\n");
-            fprintf(w,"leal 4(%%eax),%%eax #NOSTACK\n");
-            fprintf(w,"movl %%eax,%%dr2 #NOSTACK\n");
+
             fprintf(w,"movl $.+10,%%esp\n");
-            fprintf(w,"jmp getxmml\n");
+            fprintf(w,"jmp dopop\n");
             fprintf(w,"movl %%eax,%s\n",find[1]);
             if (strcmp(find[1],"%eax"))
                 fprintf(w,"movl %%dr0,%%eax #NOSTACK\n");
@@ -703,15 +734,8 @@ 
         }
         if (regexp("^call\\s+(.*)$",buf,len,find))
         {
-            fprintf(w,"movl %%eax,%%dr0 #NOSTACK\n");
-            fprintf(w,"movl %%dr2,%%eax #NOSTACK\n");
-            fprintf(w,"movzbl %%al,%%eax #NOSTACK\n");
-            fprintf(w,"leal -4(%%eax),%%eax #NOSTACK\n");
-            fprintf(w,"movb %%al,%%ah #NOSTACK\n");
-            fprintf(w,"movl %%eax,%%dr2 #NOSTACK\n");
-            fprintf(w,"movl $.+23,%%eax #NOSTACK\n");
             fprintf(w,"movl $.+10,%%esp\n");
-            fprintf(w,"jmp setxmml\n");
+            fprintf(w,"jmp docall\n");
             fprintf(w,"movl %%dr0,%%eax #NOSTACK\n");
             fprintf(w,".byte 0xe9\n.long %s-4-. #NOSTACK\n",find[1]);//jmp, not works with .global??? gcc???            
             continue;