Exploiting Dual Core's with Py_NewInterpreter's separated GIL ?

Fri Nov 10 02:40:15 EST 2006

robert wrote:

> --------------
> 0040101F   mov         eax,3B9ACA00h
> 13:     for (i = 0; i < count; ++i) {
> 14:         __asm lock inc x;
> 00401024   lock inc    dword ptr [_x (00408a00)]
> 15:         sum += x;
> 0040102B   mov         edx,dword ptr [_x (00408a00)]
> 00401031   add         esi,edx
> 00401033   dec         eax
> 00401034   jne         main+24h (00401024)
> 16:     }
> ---------------
> 
> results on a UP PIII :
> 
> INC version:
> clocks: 7520
> secs:   7
> 
> LOCK INC version:
> clocks: 36632
> secs:   36
> 
> 
> Its probably not much...

The Intels I checked, have all about this factor of ~5 in that simple example.

AMDs were typically faster. less than ~3. 

-robert

PS: 
the asm for x86 linux/gcc:
-------------------

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

volatile int x=0;
clock_t c0,c1;
time_t t0,t1;

int main (int argc, char **argv) {
  int sum = 0, i, count = 100000000;
  t0=time(&t0);
  c0=clock();
  for (i = 0; i < count; ++i) {
      asm ("incl %0;" : "=m"(x)  );
      sum += x;
  }
  c1=clock();
  t1=time(&t1);
//  printf("%d\n", sum);
  printf("clocks: %d\n", c1-c0);
  printf("secs:   %d\n", t1-t0);
  return sum;
}