Unoptimized GCC generated:benstoker wrote:Bo Persson wrote:Ben, I will give you another version:
With VS2010 64-bit edition, this generates exactly the same instructions as the Crafty hand optimized assembler version.Code: Select all
inline int MemberCount(long long Bits) { int Count = 0; while (Bits != 0) { ++Count; Bits &= (Bits - 1); } return Count; } // COPYRIGHT 2011 Bo Persson
Now, without copying any code, try to figure out how this works and write your own version.
Copyright problem solved!
Code: Select all
static inline int popcount_original(unsigned *buf, int n) { int cnt=0; unsigned v; while (n--) { v = *buf; while (v) { cnt++; v &= v-1; } buf++; } return cnt; }
Code: Select all
_MemberCount:
pushl %ebp
movl %esp, %ebp
subl $24, %esp
movl 8(%ebp), %eax
movl %eax, -24(%ebp)
movl 12(%ebp), %eax
movl %eax, -20(%ebp)
movl $0, -4(%ebp)
jmp L8
L9:
addl $1, -4(%ebp)
movl -24(%ebp), %eax
movl -20(%ebp), %edx
addl $-1, %eax
adcl $-1, %edx
andl %eax, -24(%ebp)
andl %edx, -20(%ebp)
L8:
movl -24(%ebp), %eax
movl -20(%ebp), %edx
orl %edx, %eax
testl %eax, %eax
jne L9
movl -4(%ebp), %eax
leave
ret
Code: Select all
_popcount_original:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
movl $0, -8(%ebp)
jmp L12
L15:
movl 8(%ebp), %eax
movl (%eax), %eax
movl %eax, -4(%ebp)
jmp L13
L14:
addl $1, -8(%ebp)
movl -4(%ebp), %eax
subl $1, %eax
andl %eax, -4(%ebp)
L13:
cmpl $0, -4(%ebp)
jne L14
addl $4, 8(%ebp)
L12:
cmpl $0, 12(%ebp)
setne %al
subl $1, 12(%ebp)
testb %al, %al
jne L15
movl -8(%ebp), %eax
leave
ret