| #include "llvm_blake3_prefix.h" |
| |
| .intel_syntax noprefix |
| .global blake3_hash_many_sse41 |
| .global _blake3_hash_many_sse41 |
| .global blake3_compress_in_place_sse41 |
| .global _blake3_compress_in_place_sse41 |
| .global blake3_compress_xof_sse41 |
| .global _blake3_compress_xof_sse41 |
| .section .text |
| .p2align 6 |
| _blake3_hash_many_sse41: |
| blake3_hash_many_sse41: |
| push r15 |
| push r14 |
| push r13 |
| push r12 |
| push rsi |
| push rdi |
| push rbx |
| push rbp |
| mov rbp, rsp |
| sub rsp, 528 |
| and rsp, 0xFFFFFFFFFFFFFFC0 |
| movdqa xmmword ptr [rsp+0x170], xmm6 |
| movdqa xmmword ptr [rsp+0x180], xmm7 |
| movdqa xmmword ptr [rsp+0x190], xmm8 |
| movdqa xmmword ptr [rsp+0x1A0], xmm9 |
| movdqa xmmword ptr [rsp+0x1B0], xmm10 |
| movdqa xmmword ptr [rsp+0x1C0], xmm11 |
| movdqa xmmword ptr [rsp+0x1D0], xmm12 |
| movdqa xmmword ptr [rsp+0x1E0], xmm13 |
| movdqa xmmword ptr [rsp+0x1F0], xmm14 |
| movdqa xmmword ptr [rsp+0x200], xmm15 |
| mov rdi, rcx |
| mov rsi, rdx |
| mov rdx, r8 |
| mov rcx, r9 |
| mov r8, qword ptr [rbp+0x68] |
| movzx r9, byte ptr [rbp+0x70] |
| neg r9d |
| movd xmm0, r9d |
| pshufd xmm0, xmm0, 0x00 |
| movdqa xmmword ptr [rsp+0x130], xmm0 |
| movdqa xmm1, xmm0 |
| pand xmm1, xmmword ptr [ADD0+rip] |
| pand xmm0, xmmword ptr [ADD1+rip] |
| movdqa xmmword ptr [rsp+0x150], xmm0 |
| movd xmm0, r8d |
| pshufd xmm0, xmm0, 0x00 |
| paddd xmm0, xmm1 |
| movdqa xmmword ptr [rsp+0x110], xmm0 |
| pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] |
| pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] |
| pcmpgtd xmm1, xmm0 |
| shr r8, 32 |
| movd xmm2, r8d |
| pshufd xmm2, xmm2, 0x00 |
| psubd xmm2, xmm1 |
| movdqa xmmword ptr [rsp+0x120], xmm2 |
| mov rbx, qword ptr [rbp+0x90] |
| mov r15, rdx |
| shl r15, 6 |
| movzx r13d, byte ptr [rbp+0x78] |
| movzx r12d, byte ptr [rbp+0x88] |
| cmp rsi, 4 |
| jc 3f |
| 2: |
| movdqu xmm3, xmmword ptr [rcx] |
| pshufd xmm0, xmm3, 0x00 |
| pshufd xmm1, xmm3, 0x55 |
| pshufd xmm2, xmm3, 0xAA |
| pshufd xmm3, xmm3, 0xFF |
| movdqu xmm7, xmmword ptr [rcx+0x10] |
| pshufd xmm4, xmm7, 0x00 |
| pshufd xmm5, xmm7, 0x55 |
| pshufd xmm6, xmm7, 0xAA |
| pshufd xmm7, xmm7, 0xFF |
| mov r8, qword ptr [rdi] |
| mov r9, qword ptr [rdi+0x8] |
| mov r10, qword ptr [rdi+0x10] |
| mov r11, qword ptr [rdi+0x18] |
| movzx eax, byte ptr [rbp+0x80] |
| or eax, r13d |
| xor edx, edx |
| 9: |
| mov r14d, eax |
| or eax, r12d |
| add rdx, 64 |
| cmp rdx, r15 |
| cmovne eax, r14d |
| movdqu xmm8, xmmword ptr [r8+rdx-0x40] |
| movdqu xmm9, xmmword ptr [r9+rdx-0x40] |
| movdqu xmm10, xmmword ptr [r10+rdx-0x40] |
| movdqu xmm11, xmmword ptr [r11+rdx-0x40] |
| movdqa xmm12, xmm8 |
| punpckldq xmm8, xmm9 |
| punpckhdq xmm12, xmm9 |
| movdqa xmm14, xmm10 |
| punpckldq xmm10, xmm11 |
| punpckhdq xmm14, xmm11 |
| movdqa xmm9, xmm8 |
| punpcklqdq xmm8, xmm10 |
| punpckhqdq xmm9, xmm10 |
| movdqa xmm13, xmm12 |
| punpcklqdq xmm12, xmm14 |
| punpckhqdq xmm13, xmm14 |
| movdqa xmmword ptr [rsp], xmm8 |
| movdqa xmmword ptr [rsp+0x10], xmm9 |
| movdqa xmmword ptr [rsp+0x20], xmm12 |
| movdqa xmmword ptr [rsp+0x30], xmm13 |
| movdqu xmm8, xmmword ptr [r8+rdx-0x30] |
| movdqu xmm9, xmmword ptr [r9+rdx-0x30] |
| movdqu xmm10, xmmword ptr [r10+rdx-0x30] |
| movdqu xmm11, xmmword ptr [r11+rdx-0x30] |
| movdqa xmm12, xmm8 |
| punpckldq xmm8, xmm9 |
| punpckhdq xmm12, xmm9 |
| movdqa xmm14, xmm10 |
| punpckldq xmm10, xmm11 |
| punpckhdq xmm14, xmm11 |
| movdqa xmm9, xmm8 |
| punpcklqdq xmm8, xmm10 |
| punpckhqdq xmm9, xmm10 |
| movdqa xmm13, xmm12 |
| punpcklqdq xmm12, xmm14 |
| punpckhqdq xmm13, xmm14 |
| movdqa xmmword ptr [rsp+0x40], xmm8 |
| movdqa xmmword ptr [rsp+0x50], xmm9 |
| movdqa xmmword ptr [rsp+0x60], xmm12 |
| movdqa xmmword ptr [rsp+0x70], xmm13 |
| movdqu xmm8, xmmword ptr [r8+rdx-0x20] |
| movdqu xmm9, xmmword ptr [r9+rdx-0x20] |
| movdqu xmm10, xmmword ptr [r10+rdx-0x20] |
| movdqu xmm11, xmmword ptr [r11+rdx-0x20] |
| movdqa xmm12, xmm8 |
| punpckldq xmm8, xmm9 |
| punpckhdq xmm12, xmm9 |
| movdqa xmm14, xmm10 |
| punpckldq xmm10, xmm11 |
| punpckhdq xmm14, xmm11 |
| movdqa xmm9, xmm8 |
| punpcklqdq xmm8, xmm10 |
| punpckhqdq xmm9, xmm10 |
| movdqa xmm13, xmm12 |
| punpcklqdq xmm12, xmm14 |
| punpckhqdq xmm13, xmm14 |
| movdqa xmmword ptr [rsp+0x80], xmm8 |
| movdqa xmmword ptr [rsp+0x90], xmm9 |
| movdqa xmmword ptr [rsp+0xA0], xmm12 |
| movdqa xmmword ptr [rsp+0xB0], xmm13 |
| movdqu xmm8, xmmword ptr [r8+rdx-0x10] |
| movdqu xmm9, xmmword ptr [r9+rdx-0x10] |
| movdqu xmm10, xmmword ptr [r10+rdx-0x10] |
| movdqu xmm11, xmmword ptr [r11+rdx-0x10] |
| movdqa xmm12, xmm8 |
| punpckldq xmm8, xmm9 |
| punpckhdq xmm12, xmm9 |
| movdqa xmm14, xmm10 |
| punpckldq xmm10, xmm11 |
| punpckhdq xmm14, xmm11 |
| movdqa xmm9, xmm8 |
| punpcklqdq xmm8, xmm10 |
| punpckhqdq xmm9, xmm10 |
| movdqa xmm13, xmm12 |
| punpcklqdq xmm12, xmm14 |
| punpckhqdq xmm13, xmm14 |
| movdqa xmmword ptr [rsp+0xC0], xmm8 |
| movdqa xmmword ptr [rsp+0xD0], xmm9 |
| movdqa xmmword ptr [rsp+0xE0], xmm12 |
| movdqa xmmword ptr [rsp+0xF0], xmm13 |
| movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] |
| movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] |
| movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] |
| movdqa xmm12, xmmword ptr [rsp+0x110] |
| movdqa xmm13, xmmword ptr [rsp+0x120] |
| movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] |
| movd xmm15, eax |
| pshufd xmm15, xmm15, 0x00 |
| prefetcht0 [r8+rdx+0x80] |
| prefetcht0 [r9+rdx+0x80] |
| prefetcht0 [r10+rdx+0x80] |
| prefetcht0 [r11+rdx+0x80] |
| paddd xmm0, xmmword ptr [rsp] |
| paddd xmm1, xmmword ptr [rsp+0x20] |
| paddd xmm2, xmmword ptr [rsp+0x40] |
| paddd xmm3, xmmword ptr [rsp+0x60] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x10] |
| paddd xmm1, xmmword ptr [rsp+0x30] |
| paddd xmm2, xmmword ptr [rsp+0x50] |
| paddd xmm3, xmmword ptr [rsp+0x70] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x80] |
| paddd xmm1, xmmword ptr [rsp+0xA0] |
| paddd xmm2, xmmword ptr [rsp+0xC0] |
| paddd xmm3, xmmword ptr [rsp+0xE0] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x90] |
| paddd xmm1, xmmword ptr [rsp+0xB0] |
| paddd xmm2, xmmword ptr [rsp+0xD0] |
| paddd xmm3, xmmword ptr [rsp+0xF0] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x20] |
| paddd xmm1, xmmword ptr [rsp+0x30] |
| paddd xmm2, xmmword ptr [rsp+0x70] |
| paddd xmm3, xmmword ptr [rsp+0x40] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x60] |
| paddd xmm1, xmmword ptr [rsp+0xA0] |
| paddd xmm2, xmmword ptr [rsp] |
| paddd xmm3, xmmword ptr [rsp+0xD0] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x10] |
| paddd xmm1, xmmword ptr [rsp+0xC0] |
| paddd xmm2, xmmword ptr [rsp+0x90] |
| paddd xmm3, xmmword ptr [rsp+0xF0] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0xB0] |
| paddd xmm1, xmmword ptr [rsp+0x50] |
| paddd xmm2, xmmword ptr [rsp+0xE0] |
| paddd xmm3, xmmword ptr [rsp+0x80] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x30] |
| paddd xmm1, xmmword ptr [rsp+0xA0] |
| paddd xmm2, xmmword ptr [rsp+0xD0] |
| paddd xmm3, xmmword ptr [rsp+0x70] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x40] |
| paddd xmm1, xmmword ptr [rsp+0xC0] |
| paddd xmm2, xmmword ptr [rsp+0x20] |
| paddd xmm3, xmmword ptr [rsp+0xE0] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x60] |
| paddd xmm1, xmmword ptr [rsp+0x90] |
| paddd xmm2, xmmword ptr [rsp+0xB0] |
| paddd xmm3, xmmword ptr [rsp+0x80] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x50] |
| paddd xmm1, xmmword ptr [rsp] |
| paddd xmm2, xmmword ptr [rsp+0xF0] |
| paddd xmm3, xmmword ptr [rsp+0x10] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0xA0] |
| paddd xmm1, xmmword ptr [rsp+0xC0] |
| paddd xmm2, xmmword ptr [rsp+0xE0] |
| paddd xmm3, xmmword ptr [rsp+0xD0] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x70] |
| paddd xmm1, xmmword ptr [rsp+0x90] |
| paddd xmm2, xmmword ptr [rsp+0x30] |
| paddd xmm3, xmmword ptr [rsp+0xF0] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x40] |
| paddd xmm1, xmmword ptr [rsp+0xB0] |
| paddd xmm2, xmmword ptr [rsp+0x50] |
| paddd xmm3, xmmword ptr [rsp+0x10] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp] |
| paddd xmm1, xmmword ptr [rsp+0x20] |
| paddd xmm2, xmmword ptr [rsp+0x80] |
| paddd xmm3, xmmword ptr [rsp+0x60] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0xC0] |
| paddd xmm1, xmmword ptr [rsp+0x90] |
| paddd xmm2, xmmword ptr [rsp+0xF0] |
| paddd xmm3, xmmword ptr [rsp+0xE0] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0xD0] |
| paddd xmm1, xmmword ptr [rsp+0xB0] |
| paddd xmm2, xmmword ptr [rsp+0xA0] |
| paddd xmm3, xmmword ptr [rsp+0x80] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x70] |
| paddd xmm1, xmmword ptr [rsp+0x50] |
| paddd xmm2, xmmword ptr [rsp] |
| paddd xmm3, xmmword ptr [rsp+0x60] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x20] |
| paddd xmm1, xmmword ptr [rsp+0x30] |
| paddd xmm2, xmmword ptr [rsp+0x10] |
| paddd xmm3, xmmword ptr [rsp+0x40] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x90] |
| paddd xmm1, xmmword ptr [rsp+0xB0] |
| paddd xmm2, xmmword ptr [rsp+0x80] |
| paddd xmm3, xmmword ptr [rsp+0xF0] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0xE0] |
| paddd xmm1, xmmword ptr [rsp+0x50] |
| paddd xmm2, xmmword ptr [rsp+0xC0] |
| paddd xmm3, xmmword ptr [rsp+0x10] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0xD0] |
| paddd xmm1, xmmword ptr [rsp] |
| paddd xmm2, xmmword ptr [rsp+0x20] |
| paddd xmm3, xmmword ptr [rsp+0x40] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0x30] |
| paddd xmm1, xmmword ptr [rsp+0xA0] |
| paddd xmm2, xmmword ptr [rsp+0x60] |
| paddd xmm3, xmmword ptr [rsp+0x70] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0xB0] |
| paddd xmm1, xmmword ptr [rsp+0x50] |
| paddd xmm2, xmmword ptr [rsp+0x10] |
| paddd xmm3, xmmword ptr [rsp+0x80] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0xF0] |
| paddd xmm1, xmmword ptr [rsp] |
| paddd xmm2, xmmword ptr [rsp+0x90] |
| paddd xmm3, xmmword ptr [rsp+0x60] |
| paddd xmm0, xmm4 |
| paddd xmm1, xmm5 |
| paddd xmm2, xmm6 |
| paddd xmm3, xmm7 |
| pxor xmm12, xmm0 |
| pxor xmm13, xmm1 |
| pxor xmm14, xmm2 |
| pxor xmm15, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| pshufb xmm15, xmm8 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm12 |
| paddd xmm9, xmm13 |
| paddd xmm10, xmm14 |
| paddd xmm11, xmm15 |
| pxor xmm4, xmm8 |
| pxor xmm5, xmm9 |
| pxor xmm6, xmm10 |
| pxor xmm7, xmm11 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0xE0] |
| paddd xmm1, xmmword ptr [rsp+0x20] |
| paddd xmm2, xmmword ptr [rsp+0x30] |
| paddd xmm3, xmmword ptr [rsp+0x70] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT16+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| movdqa xmmword ptr [rsp+0x100], xmm8 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 12 |
| pslld xmm5, 20 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 12 |
| pslld xmm6, 20 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 12 |
| pslld xmm7, 20 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 12 |
| pslld xmm4, 20 |
| por xmm4, xmm8 |
| paddd xmm0, xmmword ptr [rsp+0xA0] |
| paddd xmm1, xmmword ptr [rsp+0xC0] |
| paddd xmm2, xmmword ptr [rsp+0x40] |
| paddd xmm3, xmmword ptr [rsp+0xD0] |
| paddd xmm0, xmm5 |
| paddd xmm1, xmm6 |
| paddd xmm2, xmm7 |
| paddd xmm3, xmm4 |
| pxor xmm15, xmm0 |
| pxor xmm12, xmm1 |
| pxor xmm13, xmm2 |
| pxor xmm14, xmm3 |
| movdqa xmm8, xmmword ptr [ROT8+rip] |
| pshufb xmm15, xmm8 |
| pshufb xmm12, xmm8 |
| pshufb xmm13, xmm8 |
| pshufb xmm14, xmm8 |
| paddd xmm10, xmm15 |
| paddd xmm11, xmm12 |
| movdqa xmm8, xmmword ptr [rsp+0x100] |
| paddd xmm8, xmm13 |
| paddd xmm9, xmm14 |
| pxor xmm5, xmm10 |
| pxor xmm6, xmm11 |
| pxor xmm7, xmm8 |
| pxor xmm4, xmm9 |
| pxor xmm0, xmm8 |
| pxor xmm1, xmm9 |
| pxor xmm2, xmm10 |
| pxor xmm3, xmm11 |
| movdqa xmm8, xmm5 |
| psrld xmm8, 7 |
| pslld xmm5, 25 |
| por xmm5, xmm8 |
| movdqa xmm8, xmm6 |
| psrld xmm8, 7 |
| pslld xmm6, 25 |
| por xmm6, xmm8 |
| movdqa xmm8, xmm7 |
| psrld xmm8, 7 |
| pslld xmm7, 25 |
| por xmm7, xmm8 |
| movdqa xmm8, xmm4 |
| psrld xmm8, 7 |
| pslld xmm4, 25 |
| por xmm4, xmm8 |
| pxor xmm4, xmm12 |
| pxor xmm5, xmm13 |
| pxor xmm6, xmm14 |
| pxor xmm7, xmm15 |
| mov eax, r13d |
| jne 9b |
| movdqa xmm9, xmm0 |
| punpckldq xmm0, xmm1 |
| punpckhdq xmm9, xmm1 |
| movdqa xmm11, xmm2 |
| punpckldq xmm2, xmm3 |
| punpckhdq xmm11, xmm3 |
| movdqa xmm1, xmm0 |
| punpcklqdq xmm0, xmm2 |
| punpckhqdq xmm1, xmm2 |
| movdqa xmm3, xmm9 |
| punpcklqdq xmm9, xmm11 |
| punpckhqdq xmm3, xmm11 |
| movdqu xmmword ptr [rbx], xmm0 |
| movdqu xmmword ptr [rbx+0x20], xmm1 |
| movdqu xmmword ptr [rbx+0x40], xmm9 |
| movdqu xmmword ptr [rbx+0x60], xmm3 |
| movdqa xmm9, xmm4 |
| punpckldq xmm4, xmm5 |
| punpckhdq xmm9, xmm5 |
| movdqa xmm11, xmm6 |
| punpckldq xmm6, xmm7 |
| punpckhdq xmm11, xmm7 |
| movdqa xmm5, xmm4 |
| punpcklqdq xmm4, xmm6 |
| punpckhqdq xmm5, xmm6 |
| movdqa xmm7, xmm9 |
| punpcklqdq xmm9, xmm11 |
| punpckhqdq xmm7, xmm11 |
| movdqu xmmword ptr [rbx+0x10], xmm4 |
| movdqu xmmword ptr [rbx+0x30], xmm5 |
| movdqu xmmword ptr [rbx+0x50], xmm9 |
| movdqu xmmword ptr [rbx+0x70], xmm7 |
| movdqa xmm1, xmmword ptr [rsp+0x110] |
| movdqa xmm0, xmm1 |
| paddd xmm1, xmmword ptr [rsp+0x150] |
| movdqa xmmword ptr [rsp+0x110], xmm1 |
| pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] |
| pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] |
| pcmpgtd xmm0, xmm1 |
| movdqa xmm1, xmmword ptr [rsp+0x120] |
| psubd xmm1, xmm0 |
| movdqa xmmword ptr [rsp+0x120], xmm1 |
| add rbx, 128 |
| add rdi, 32 |
| sub rsi, 4 |
| cmp rsi, 4 |
| jnc 2b |
| test rsi, rsi |
| jne 3f |
| 4: |
| movdqa xmm6, xmmword ptr [rsp+0x170] |
| movdqa xmm7, xmmword ptr [rsp+0x180] |
| movdqa xmm8, xmmword ptr [rsp+0x190] |
| movdqa xmm9, xmmword ptr [rsp+0x1A0] |
| movdqa xmm10, xmmword ptr [rsp+0x1B0] |
| movdqa xmm11, xmmword ptr [rsp+0x1C0] |
| movdqa xmm12, xmmword ptr [rsp+0x1D0] |
| movdqa xmm13, xmmword ptr [rsp+0x1E0] |
| movdqa xmm14, xmmword ptr [rsp+0x1F0] |
| movdqa xmm15, xmmword ptr [rsp+0x200] |
| mov rsp, rbp |
| pop rbp |
| pop rbx |
| pop rdi |
| pop rsi |
| pop r12 |
| pop r13 |
| pop r14 |
| pop r15 |
| ret |
| .p2align 5 |
| 3: |
| test esi, 0x2 |
| je 3f |
| movups xmm0, xmmword ptr [rcx] |
| movups xmm1, xmmword ptr [rcx+0x10] |
| movaps xmm8, xmm0 |
| movaps xmm9, xmm1 |
| movd xmm13, dword ptr [rsp+0x110] |
| pinsrd xmm13, dword ptr [rsp+0x120], 1 |
| pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 |
| movaps xmmword ptr [rsp], xmm13 |
| movd xmm14, dword ptr [rsp+0x114] |
| pinsrd xmm14, dword ptr [rsp+0x124], 1 |
| pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 |
| movaps xmmword ptr [rsp+0x10], xmm14 |
| mov r8, qword ptr [rdi] |
| mov r9, qword ptr [rdi+0x8] |
| movzx eax, byte ptr [rbp+0x80] |
| or eax, r13d |
| xor edx, edx |
| 2: |
| mov r14d, eax |
| or eax, r12d |
| add rdx, 64 |
| cmp rdx, r15 |
| cmovne eax, r14d |
| movaps xmm2, xmmword ptr [BLAKE3_IV+rip] |
| movaps xmm10, xmm2 |
| movups xmm4, xmmword ptr [r8+rdx-0x40] |
| movups xmm5, xmmword ptr [r8+rdx-0x30] |
| movaps xmm3, xmm4 |
| shufps xmm4, xmm5, 136 |
| shufps xmm3, xmm5, 221 |
| movaps xmm5, xmm3 |
| movups xmm6, xmmword ptr [r8+rdx-0x20] |
| movups xmm7, xmmword ptr [r8+rdx-0x10] |
| movaps xmm3, xmm6 |
| shufps xmm6, xmm7, 136 |
| pshufd xmm6, xmm6, 0x93 |
| shufps xmm3, xmm7, 221 |
| pshufd xmm7, xmm3, 0x93 |
| movups xmm12, xmmword ptr [r9+rdx-0x40] |
| movups xmm13, xmmword ptr [r9+rdx-0x30] |
| movaps xmm11, xmm12 |
| shufps xmm12, xmm13, 136 |
| shufps xmm11, xmm13, 221 |
| movaps xmm13, xmm11 |
| movups xmm14, xmmword ptr [r9+rdx-0x20] |
| movups xmm15, xmmword ptr [r9+rdx-0x10] |
| movaps xmm11, xmm14 |
| shufps xmm14, xmm15, 136 |
| pshufd xmm14, xmm14, 0x93 |
| shufps xmm11, xmm15, 221 |
| pshufd xmm15, xmm11, 0x93 |
| movaps xmm3, xmmword ptr [rsp] |
| movaps xmm11, xmmword ptr [rsp+0x10] |
| pinsrd xmm3, eax, 3 |
| pinsrd xmm11, eax, 3 |
| mov al, 7 |
| 9: |
| paddd xmm0, xmm4 |
| paddd xmm8, xmm12 |
| movaps xmmword ptr [rsp+0x20], xmm4 |
| movaps xmmword ptr [rsp+0x30], xmm12 |
| paddd xmm0, xmm1 |
| paddd xmm8, xmm9 |
| pxor xmm3, xmm0 |
| pxor xmm11, xmm8 |
| movaps xmm12, xmmword ptr [ROT16+rip] |
| pshufb xmm3, xmm12 |
| pshufb xmm11, xmm12 |
| paddd xmm2, xmm3 |
| paddd xmm10, xmm11 |
| pxor xmm1, xmm2 |
| pxor xmm9, xmm10 |
| movdqa xmm4, xmm1 |
| pslld xmm1, 20 |
| psrld xmm4, 12 |
| por xmm1, xmm4 |
| movdqa xmm4, xmm9 |
| pslld xmm9, 20 |
| psrld xmm4, 12 |
| por xmm9, xmm4 |
| paddd xmm0, xmm5 |
| paddd xmm8, xmm13 |
| movaps xmmword ptr [rsp+0x40], xmm5 |
| movaps xmmword ptr [rsp+0x50], xmm13 |
| paddd xmm0, xmm1 |
| paddd xmm8, xmm9 |
| pxor xmm3, xmm0 |
| pxor xmm11, xmm8 |
| movaps xmm13, xmmword ptr [ROT8+rip] |
| pshufb xmm3, xmm13 |
| pshufb xmm11, xmm13 |
| paddd xmm2, xmm3 |
| paddd xmm10, xmm11 |
| pxor xmm1, xmm2 |
| pxor xmm9, xmm10 |
| movdqa xmm4, xmm1 |
| pslld xmm1, 25 |
| psrld xmm4, 7 |
| por xmm1, xmm4 |
| movdqa xmm4, xmm9 |
| pslld xmm9, 25 |
| psrld xmm4, 7 |
| por xmm9, xmm4 |
| pshufd xmm0, xmm0, 0x93 |
| pshufd xmm8, xmm8, 0x93 |
| pshufd xmm3, xmm3, 0x4E |
| pshufd xmm11, xmm11, 0x4E |
| pshufd xmm2, xmm2, 0x39 |
| pshufd xmm10, xmm10, 0x39 |
| paddd xmm0, xmm6 |
| paddd xmm8, xmm14 |
| paddd xmm0, xmm1 |
| paddd xmm8, xmm9 |
| pxor xmm3, xmm0 |
| pxor xmm11, xmm8 |
| pshufb xmm3, xmm12 |
| pshufb xmm11, xmm12 |
| paddd xmm2, xmm3 |
| paddd xmm10, xmm11 |
| pxor xmm1, xmm2 |
| pxor xmm9, xmm10 |
| movdqa xmm4, xmm1 |
| pslld xmm1, 20 |
| psrld xmm4, 12 |
| por xmm1, xmm4 |
| movdqa xmm4, xmm9 |
| pslld xmm9, 20 |
| psrld xmm4, 12 |
| por xmm9, xmm4 |
| paddd xmm0, xmm7 |
| paddd xmm8, xmm15 |
| paddd xmm0, xmm1 |
| paddd xmm8, xmm9 |
| pxor xmm3, xmm0 |
| pxor xmm11, xmm8 |
| pshufb xmm3, xmm13 |
| pshufb xmm11, xmm13 |
| paddd xmm2, xmm3 |
| paddd xmm10, xmm11 |
| pxor xmm1, xmm2 |
| pxor xmm9, xmm10 |
| movdqa xmm4, xmm1 |
| pslld xmm1, 25 |
| psrld xmm4, 7 |
| por xmm1, xmm4 |
| movdqa xmm4, xmm9 |
| pslld xmm9, 25 |
| psrld xmm4, 7 |
| por xmm9, xmm4 |
| pshufd xmm0, xmm0, 0x39 |
| pshufd xmm8, xmm8, 0x39 |
| pshufd xmm3, xmm3, 0x4E |
| pshufd xmm11, xmm11, 0x4E |
| pshufd xmm2, xmm2, 0x93 |
| pshufd xmm10, xmm10, 0x93 |
| dec al |
| je 9f |
| movdqa xmm12, xmmword ptr [rsp+0x20] |
| movdqa xmm5, xmmword ptr [rsp+0x40] |
| pshufd xmm13, xmm12, 0x0F |
| shufps xmm12, xmm5, 214 |
| pshufd xmm4, xmm12, 0x39 |
| movdqa xmm12, xmm6 |
| shufps xmm12, xmm7, 250 |
| pblendw xmm13, xmm12, 0xCC |
| movdqa xmm12, xmm7 |
| punpcklqdq xmm12, xmm5 |
| pblendw xmm12, xmm6, 0xC0 |
| pshufd xmm12, xmm12, 0x78 |
| punpckhdq xmm5, xmm7 |
| punpckldq xmm6, xmm5 |
| pshufd xmm7, xmm6, 0x1E |
| movdqa xmmword ptr [rsp+0x20], xmm13 |
| movdqa xmmword ptr [rsp+0x40], xmm12 |
| movdqa xmm5, xmmword ptr [rsp+0x30] |
| movdqa xmm13, xmmword ptr [rsp+0x50] |
| pshufd xmm6, xmm5, 0x0F |
| shufps xmm5, xmm13, 214 |
| pshufd xmm12, xmm5, 0x39 |
| movdqa xmm5, xmm14 |
| shufps xmm5, xmm15, 250 |
| pblendw xmm6, xmm5, 0xCC |
| movdqa xmm5, xmm15 |
| punpcklqdq xmm5, xmm13 |
| pblendw xmm5, xmm14, 0xC0 |
| pshufd xmm5, xmm5, 0x78 |
| punpckhdq xmm13, xmm15 |
| punpckldq xmm14, xmm13 |
| pshufd xmm15, xmm14, 0x1E |
| movdqa xmm13, xmm6 |
| movdqa xmm14, xmm5 |
| movdqa xmm5, xmmword ptr [rsp+0x20] |
| movdqa xmm6, xmmword ptr [rsp+0x40] |
| jmp 9b |
| 9: |
| pxor xmm0, xmm2 |
| pxor xmm1, xmm3 |
| pxor xmm8, xmm10 |
| pxor xmm9, xmm11 |
| mov eax, r13d |
| cmp rdx, r15 |
| jne 2b |
| movups xmmword ptr [rbx], xmm0 |
| movups xmmword ptr [rbx+0x10], xmm1 |
| movups xmmword ptr [rbx+0x20], xmm8 |
| movups xmmword ptr [rbx+0x30], xmm9 |
| movdqa xmm0, xmmword ptr [rsp+0x130] |
| movdqa xmm1, xmmword ptr [rsp+0x110] |
| movdqa xmm2, xmmword ptr [rsp+0x120] |
| movdqu xmm3, xmmword ptr [rsp+0x118] |
| movdqu xmm4, xmmword ptr [rsp+0x128] |
| blendvps xmm1, xmm3, xmm0 |
| blendvps xmm2, xmm4, xmm0 |
| movdqa xmmword ptr [rsp+0x110], xmm1 |
| movdqa xmmword ptr [rsp+0x120], xmm2 |
| add rdi, 16 |
| add rbx, 64 |
| sub rsi, 2 |
| 3: |
| test esi, 0x1 |
| je 4b |
| movups xmm0, xmmword ptr [rcx] |
| movups xmm1, xmmword ptr [rcx+0x10] |
| movd xmm13, dword ptr [rsp+0x110] |
| pinsrd xmm13, dword ptr [rsp+0x120], 1 |
| pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 |
| movaps xmm14, xmmword ptr [ROT8+rip] |
| movaps xmm15, xmmword ptr [ROT16+rip] |
| mov r8, qword ptr [rdi] |
| movzx eax, byte ptr [rbp+0x80] |
| or eax, r13d |
| xor edx, edx |
| 2: |
| mov r14d, eax |
| or eax, r12d |
| add rdx, 64 |
| cmp rdx, r15 |
| cmovne eax, r14d |
| movaps xmm2, xmmword ptr [BLAKE3_IV+rip] |
| movaps xmm3, xmm13 |
| pinsrd xmm3, eax, 3 |
| movups xmm4, xmmword ptr [r8+rdx-0x40] |
| movups xmm5, xmmword ptr [r8+rdx-0x30] |
| movaps xmm8, xmm4 |
| shufps xmm4, xmm5, 136 |
| shufps xmm8, xmm5, 221 |
| movaps xmm5, xmm8 |
| movups xmm6, xmmword ptr [r8+rdx-0x20] |
| movups xmm7, xmmword ptr [r8+rdx-0x10] |
| movaps xmm8, xmm6 |
| shufps xmm6, xmm7, 136 |
| pshufd xmm6, xmm6, 0x93 |
| shufps xmm8, xmm7, 221 |
| pshufd xmm7, xmm8, 0x93 |
| mov al, 7 |
| 9: |
| paddd xmm0, xmm4 |
| paddd xmm0, xmm1 |
| pxor xmm3, xmm0 |
| pshufb xmm3, xmm15 |
| paddd xmm2, xmm3 |
| pxor xmm1, xmm2 |
| movdqa xmm11, xmm1 |
| pslld xmm1, 20 |
| psrld xmm11, 12 |
| por xmm1, xmm11 |
| paddd xmm0, xmm5 |
| paddd xmm0, xmm1 |
| pxor xmm3, xmm0 |
| pshufb xmm3, xmm14 |
| paddd xmm2, xmm3 |
| pxor xmm1, xmm2 |
| movdqa xmm11, xmm1 |
| pslld xmm1, 25 |
| psrld xmm11, 7 |
| por xmm1, xmm11 |
| pshufd xmm0, xmm0, 0x93 |
| pshufd xmm3, xmm3, 0x4E |
| pshufd xmm2, xmm2, 0x39 |
| paddd xmm0, xmm6 |
| paddd xmm0, xmm1 |
| pxor xmm3, xmm0 |
| pshufb xmm3, xmm15 |
| paddd xmm2, xmm3 |
| pxor xmm1, xmm2 |
| movdqa xmm11, xmm1 |
| pslld xmm1, 20 |
| psrld xmm11, 12 |
| por xmm1, xmm11 |
| paddd xmm0, xmm7 |
| paddd xmm0, xmm1 |
| pxor xmm3, xmm0 |
| pshufb xmm3, xmm14 |
| paddd xmm2, xmm3 |
| pxor xmm1, xmm2 |
| movdqa xmm11, xmm1 |
| pslld xmm1, 25 |
| psrld xmm11, 7 |
| por xmm1, xmm11 |
| pshufd xmm0, xmm0, 0x39 |
| pshufd xmm3, xmm3, 0x4E |
| pshufd xmm2, xmm2, 0x93 |
| dec al |
| jz 9f |
| movdqa xmm8, xmm4 |
| shufps xmm8, xmm5, 214 |
| pshufd xmm9, xmm4, 0x0F |
| pshufd xmm4, xmm8, 0x39 |
| movdqa xmm8, xmm6 |
| shufps xmm8, xmm7, 250 |
| pblendw xmm9, xmm8, 0xCC |
| movdqa xmm8, xmm7 |
| punpcklqdq xmm8, xmm5 |
| pblendw xmm8, xmm6, 0xC0 |
| pshufd xmm8, xmm8, 0x78 |
| punpckhdq xmm5, xmm7 |
| punpckldq xmm6, xmm5 |
| pshufd xmm7, xmm6, 0x1E |
| movdqa xmm5, xmm9 |
| movdqa xmm6, xmm8 |
| jmp 9b |
| 9: |
| pxor xmm0, xmm2 |
| pxor xmm1, xmm3 |
| mov eax, r13d |
| cmp rdx, r15 |
| jne 2b |
| movups xmmword ptr [rbx], xmm0 |
| movups xmmword ptr [rbx+0x10], xmm1 |
| jmp 4b |
| |
| .p2align 6 |
| blake3_compress_in_place_sse41: |
| _blake3_compress_in_place_sse41: |
| sub rsp, 120 |
| movdqa xmmword ptr [rsp], xmm6 |
| movdqa xmmword ptr [rsp+0x10], xmm7 |
| movdqa xmmword ptr [rsp+0x20], xmm8 |
| movdqa xmmword ptr [rsp+0x30], xmm9 |
| movdqa xmmword ptr [rsp+0x40], xmm11 |
| movdqa xmmword ptr [rsp+0x50], xmm14 |
| movdqa xmmword ptr [rsp+0x60], xmm15 |
| movups xmm0, xmmword ptr [rcx] |
| movups xmm1, xmmword ptr [rcx+0x10] |
| movaps xmm2, xmmword ptr [BLAKE3_IV+rip] |
| movzx eax, byte ptr [rsp+0xA0] |
| movzx r8d, r8b |
| shl rax, 32 |
| add r8, rax |
| movq xmm3, r9 |
| movq xmm4, r8 |
| punpcklqdq xmm3, xmm4 |
| movups xmm4, xmmword ptr [rdx] |
| movups xmm5, xmmword ptr [rdx+0x10] |
| movaps xmm8, xmm4 |
| shufps xmm4, xmm5, 136 |
| shufps xmm8, xmm5, 221 |
| movaps xmm5, xmm8 |
| movups xmm6, xmmword ptr [rdx+0x20] |
| movups xmm7, xmmword ptr [rdx+0x30] |
| movaps xmm8, xmm6 |
| shufps xmm6, xmm7, 136 |
| pshufd xmm6, xmm6, 0x93 |
| shufps xmm8, xmm7, 221 |
| pshufd xmm7, xmm8, 0x93 |
| movaps xmm14, xmmword ptr [ROT8+rip] |
| movaps xmm15, xmmword ptr [ROT16+rip] |
| mov al, 7 |
| 9: |
| paddd xmm0, xmm4 |
| paddd xmm0, xmm1 |
| pxor xmm3, xmm0 |
| pshufb xmm3, xmm15 |
| paddd xmm2, xmm3 |
| pxor xmm1, xmm2 |
| movdqa xmm11, xmm1 |
| pslld xmm1, 20 |
| psrld xmm11, 12 |
| por xmm1, xmm11 |
| paddd xmm0, xmm5 |
| paddd xmm0, xmm1 |
| pxor xmm3, xmm0 |
| pshufb xmm3, xmm14 |
| paddd xmm2, xmm3 |
| pxor xmm1, xmm2 |
| movdqa xmm11, xmm1 |
| pslld xmm1, 25 |
| psrld xmm11, 7 |
| por xmm1, xmm11 |
| pshufd xmm0, xmm0, 0x93 |
| pshufd xmm3, xmm3, 0x4E |
| pshufd xmm2, xmm2, 0x39 |
| paddd xmm0, xmm6 |
| paddd xmm0, xmm1 |
| pxor xmm3, xmm0 |
| pshufb xmm3, xmm15 |
| paddd xmm2, xmm3 |
| pxor xmm1, xmm2 |
| movdqa xmm11, xmm1 |
| pslld xmm1, 20 |
| psrld xmm11, 12 |
| por xmm1, xmm11 |
| paddd xmm0, xmm7 |
| paddd xmm0, xmm1 |
| pxor xmm3, xmm0 |
| pshufb xmm3, xmm14 |
| paddd xmm2, xmm3 |
| pxor xmm1, xmm2 |
| movdqa xmm11, xmm1 |
| pslld xmm1, 25 |
| psrld xmm11, 7 |
| por xmm1, xmm11 |
| pshufd xmm0, xmm0, 0x39 |
| pshufd xmm3, xmm3, 0x4E |
| pshufd xmm2, xmm2, 0x93 |
| dec al |
| jz 9f |
| movdqa xmm8, xmm4 |
| shufps xmm8, xmm5, 214 |
| pshufd xmm9, xmm4, 0x0F |
| pshufd xmm4, xmm8, 0x39 |
| movdqa xmm8, xmm6 |
| shufps xmm8, xmm7, 250 |
| pblendw xmm9, xmm8, 0xCC |
| movdqa xmm8, xmm7 |
| punpcklqdq xmm8, xmm5 |
| pblendw xmm8, xmm6, 0xC0 |
| pshufd xmm8, xmm8, 0x78 |
| punpckhdq xmm5, xmm7 |
| punpckldq xmm6, xmm5 |
| pshufd xmm7, xmm6, 0x1E |
| movdqa xmm5, xmm9 |
| movdqa xmm6, xmm8 |
| jmp 9b |
| 9: |
| pxor xmm0, xmm2 |
| pxor xmm1, xmm3 |
| movups xmmword ptr [rcx], xmm0 |
| movups xmmword ptr [rcx+0x10], xmm1 |
| movdqa xmm6, xmmword ptr [rsp] |
| movdqa xmm7, xmmword ptr [rsp+0x10] |
| movdqa xmm8, xmmword ptr [rsp+0x20] |
| movdqa xmm9, xmmword ptr [rsp+0x30] |
| movdqa xmm11, xmmword ptr [rsp+0x40] |
| movdqa xmm14, xmmword ptr [rsp+0x50] |
| movdqa xmm15, xmmword ptr [rsp+0x60] |
| add rsp, 120 |
| ret |
| |
| |
| .p2align 6 |
| _blake3_compress_xof_sse41: |
| blake3_compress_xof_sse41: |
| sub rsp, 120 |
| movdqa xmmword ptr [rsp], xmm6 |
| movdqa xmmword ptr [rsp+0x10], xmm7 |
| movdqa xmmword ptr [rsp+0x20], xmm8 |
| movdqa xmmword ptr [rsp+0x30], xmm9 |
| movdqa xmmword ptr [rsp+0x40], xmm11 |
| movdqa xmmword ptr [rsp+0x50], xmm14 |
| movdqa xmmword ptr [rsp+0x60], xmm15 |
| movups xmm0, xmmword ptr [rcx] |
| movups xmm1, xmmword ptr [rcx+0x10] |
| movaps xmm2, xmmword ptr [BLAKE3_IV+rip] |
| movzx eax, byte ptr [rsp+0xA0] |
| movzx r8d, r8b |
| mov r10, qword ptr [rsp+0xA8] |
| shl rax, 32 |
| add r8, rax |
| movq xmm3, r9 |
| movq xmm4, r8 |
| punpcklqdq xmm3, xmm4 |
| movups xmm4, xmmword ptr [rdx] |
| movups xmm5, xmmword ptr [rdx+0x10] |
| movaps xmm8, xmm4 |
| shufps xmm4, xmm5, 136 |
| shufps xmm8, xmm5, 221 |
| movaps xmm5, xmm8 |
| movups xmm6, xmmword ptr [rdx+0x20] |
| movups xmm7, xmmword ptr [rdx+0x30] |
| movaps xmm8, xmm6 |
| shufps xmm6, xmm7, 136 |
| pshufd xmm6, xmm6, 0x93 |
| shufps xmm8, xmm7, 221 |
| pshufd xmm7, xmm8, 0x93 |
| movaps xmm14, xmmword ptr [ROT8+rip] |
| movaps xmm15, xmmword ptr [ROT16+rip] |
| mov al, 7 |
| 9: |
| paddd xmm0, xmm4 |
| paddd xmm0, xmm1 |
| pxor xmm3, xmm0 |
| pshufb xmm3, xmm15 |
| paddd xmm2, xmm3 |
| pxor xmm1, xmm2 |
| movdqa xmm11, xmm1 |
| pslld xmm1, 20 |
| psrld xmm11, 12 |
| por xmm1, xmm11 |
| paddd xmm0, xmm5 |
| paddd xmm0, xmm1 |
| pxor xmm3, xmm0 |
| pshufb xmm3, xmm14 |
| paddd xmm2, xmm3 |
| pxor xmm1, xmm2 |
| movdqa xmm11, xmm1 |
| pslld xmm1, 25 |
| psrld xmm11, 7 |
| por xmm1, xmm11 |
| pshufd xmm0, xmm0, 0x93 |
| pshufd xmm3, xmm3, 0x4E |
| pshufd xmm2, xmm2, 0x39 |
| paddd xmm0, xmm6 |
| paddd xmm0, xmm1 |
| pxor xmm3, xmm0 |
| pshufb xmm3, xmm15 |
| paddd xmm2, xmm3 |
| pxor xmm1, xmm2 |
| movdqa xmm11, xmm1 |
| pslld xmm1, 20 |
| psrld xmm11, 12 |
| por xmm1, xmm11 |
| paddd xmm0, xmm7 |
| paddd xmm0, xmm1 |
| pxor xmm3, xmm0 |
| pshufb xmm3, xmm14 |
| paddd xmm2, xmm3 |
| pxor xmm1, xmm2 |
| movdqa xmm11, xmm1 |
| pslld xmm1, 25 |
| psrld xmm11, 7 |
| por xmm1, xmm11 |
| pshufd xmm0, xmm0, 0x39 |
| pshufd xmm3, xmm3, 0x4E |
| pshufd xmm2, xmm2, 0x93 |
| dec al |
| jz 9f |
| movdqa xmm8, xmm4 |
| shufps xmm8, xmm5, 214 |
| pshufd xmm9, xmm4, 0x0F |
| pshufd xmm4, xmm8, 0x39 |
| movdqa xmm8, xmm6 |
| shufps xmm8, xmm7, 250 |
| pblendw xmm9, xmm8, 0xCC |
| movdqa xmm8, xmm7 |
| punpcklqdq xmm8, xmm5 |
| pblendw xmm8, xmm6, 0xC0 |
| pshufd xmm8, xmm8, 0x78 |
| punpckhdq xmm5, xmm7 |
| punpckldq xmm6, xmm5 |
| pshufd xmm7, xmm6, 0x1E |
| movdqa xmm5, xmm9 |
| movdqa xmm6, xmm8 |
| jmp 9b |
| 9: |
| movdqu xmm4, xmmword ptr [rcx] |
| movdqu xmm5, xmmword ptr [rcx+0x10] |
| pxor xmm0, xmm2 |
| pxor xmm1, xmm3 |
| pxor xmm2, xmm4 |
| pxor xmm3, xmm5 |
| movups xmmword ptr [r10], xmm0 |
| movups xmmword ptr [r10+0x10], xmm1 |
| movups xmmword ptr [r10+0x20], xmm2 |
| movups xmmword ptr [r10+0x30], xmm3 |
| movdqa xmm6, xmmword ptr [rsp] |
| movdqa xmm7, xmmword ptr [rsp+0x10] |
| movdqa xmm8, xmmword ptr [rsp+0x20] |
| movdqa xmm9, xmmword ptr [rsp+0x30] |
| movdqa xmm11, xmmword ptr [rsp+0x40] |
| movdqa xmm14, xmmword ptr [rsp+0x50] |
| movdqa xmm15, xmmword ptr [rsp+0x60] |
| add rsp, 120 |
| ret |
| |
| |
| .section .rodata |
| .p2align 6 |
| BLAKE3_IV: |
| .long 0x6A09E667, 0xBB67AE85 |
| .long 0x3C6EF372, 0xA54FF53A |
| ROT16: |
| .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 |
| ROT8: |
| .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 |
| ADD0: |
| .long 0, 1, 2, 3 |
| ADD1: |
| .long 4, 4, 4, 4 |
| BLAKE3_IV_0: |
| .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 |
| BLAKE3_IV_1: |
| .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 |
| BLAKE3_IV_2: |
| .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 |
| BLAKE3_IV_3: |
| .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A |
| BLAKE3_BLOCK_LEN: |
| .long 64, 64, 64, 64 |
| CMP_MSB_MASK: |
| .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 |