Author: | Wojciech Muła |
---|---|
Added on: | 2008-08-03 |
Unusual application of PHMINPOSUW instruction as key part of insertion sort for 8 element tables. I guess it won't find any practical usage.
typedef uint16_t table[8]; table max[8] = { {0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, {0x0000, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, {0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, {0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000}, {0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000}, {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000}, {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000}, {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff} }; void sse4_sort(table T) { uint32_t dummy; __asm__ volatile ( " movdqu (%%eax), %%xmm0 \n" " xor %%ecx, %%ecx \n" // i = 0 "1: \n" " phminposuw %%xmm0, %%xmm1 \n" // find min, and its index j " movd %%xmm1, %%edx \n" " movw %%dx, (%%eax, %%ecx, 2) \n" // save min at i-th position " \n" " shrl $16, %%edx \n" " shll $4, %%edx \n" " \n" " por max(%%edx), %%xmm0 \n" // set max at pisition j " \n" " addl $1, %%ecx \n" // i += 1 " cmp $8, %%ecx \n" " jl 1b \n" : : "a" (T) : "ecx", "edx" ); }