SSE4.1: PHMINPOSUW — insertion sort

Author:Wojciech Muła
Added on:3.08.2008

Unusual application of PHMINPOSUW instruction as key part of insertion sort for 8 element tables. I guess it won't find any practical usage.

Implementation:


typedef uint16_t table[8];

table max[8] = {
        {0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
        {0x0000, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
        {0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
        {0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000},
        {0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000},
        {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000},
        {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000},
        {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff}
};

void sse4_sort(table T) {
        uint32_t dummy;

        __asm__ volatile (
        "       movdqu (%%eax), %%xmm0          \n"
        "       xor %%ecx, %%ecx                \n"     // i = 0
        "1:                                     \n"
        "       phminposuw %%xmm0, %%xmm1       \n"     // find min, and its index j
        "       movd %%xmm1, %%edx              \n"
        "       movw   %%dx, (%%eax, %%ecx, 2)  \n"     // save min at i-th position
        "                                       \n"
        "       shrl   $16, %%edx               \n"
        "       shll    $4, %%edx               \n"
        "                                       \n"
        "       por  max(%%edx), %%xmm0         \n"     // set max at pisition j
        "                                       \n"
        "       addl    $1, %%ecx               \n"     // i += 1
        "       cmp     $8, %%ecx               \n"
        "       jl      1b                      \n"
        
        :
        : "a" (T)
        : "ecx", "edx"
        );
}