| Author: | Wojciech Muła |
|---|---|
| Added on: | 2008-08-03 |
Unusual application of PHMINPOSUW instruction as key part of insertion sort for 8 element tables. I guess it won't find any practical usage.
typedef uint16_t table[8];
table max[8] = {
{0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
{0x0000, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
{0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
{0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000},
{0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000},
{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000},
{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000},
{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff}
};
void sse4_sort(table T) {
uint32_t dummy;
__asm__ volatile (
" movdqu (%%eax), %%xmm0 \n"
" xor %%ecx, %%ecx \n" // i = 0
"1: \n"
" phminposuw %%xmm0, %%xmm1 \n" // find min, and its index j
" movd %%xmm1, %%edx \n"
" movw %%dx, (%%eax, %%ecx, 2) \n" // save min at i-th position
" \n"
" shrl $16, %%edx \n"
" shll $4, %%edx \n"
" \n"
" por max(%%edx), %%xmm0 \n" // set max at pisition j
" \n"
" addl $1, %%ecx \n" // i += 1
" cmp $8, %%ecx \n"
" jl 1b \n"
:
: "a" (T)
: "ecx", "edx"
);
}