Хабр Курсы для всех
РЕКЛАМА
Практикум, Хекслет, SkyPro, авторские курсы — собрали всех и попросили скидки. Осталось выбрать!
p.s. И вот что интересно, а можно ли сделать
#include <inttypes.h> #include <stdio.h> #define TAP_MASK_LOW64 0x0 #define TAP_MASK_HIGH64 0x28000005 do_lfsr(uint64_t *p) { uint64_t low = p[0] & TAP_MASK_LOW64; uint64_t high = p[1] & TAP_MASK_HIGH64; uint64_t s1 = low ^ high; uint32_t s2 = (uint32_t)s1 ^ (uint32_t)(s1 >> 32); uint32_t s3 = (s2 & 0xffff) ^ (s2 >> 16); uint32_t s4 = (s3 & 0xff) ^ (s3 >> 8); uint32_t s5 = (s4 & 0xf) ^ (s4 >> 4); uint32_t s6 = (s5 & 0x3) ^ (s5 >> 2); uint32_t s7 = (s6 & 0x1) ^ (s6 >> 1); p[1] = (p[1] << 1) | (p[0] >> 63); p[0] = (p[0] << 1) | s7; } int main() { uint64_t lfsr_data[2] = { 0xffffffffffffffffull, 0xffffffffffffffffull, }; volatile unsigned i; for(i = 0; i < 0xffff * 32; ++i) do_lfsr(lfsr_data); printf("%016"PRIx64":%016"PRIx64"\n", lfsr_data[1], lfsr_data[0]); }
$ gcc lfsr.c -o lfsr $ file lfsr lfsr: ELF 32-bit LSB executable, Intel 80386 ... $ time ./lfsr 976a2d15490d6693:844dc6ff26ad7a9a real 0m0.086s user 0m0.084s sys 0m0.000s
....... 1b976a2d15490d66:93844dc6ff26ad7a 372ed45a2a921acd:27089b8dfe4d5af5 6e5da8b45524359a:4e11371bfc9ab5ea dcbb5168aa486b34:9c226e37f9356bd4 b976a2d15490d669:3844dc6ff26ad7a9 72ed45a2a921acd2:7089b8dfe4d5af53 e5da8b45524359a4:e11371bfc9ab5ea6 cbb5168aa486b349:c226e37f9356bd4d 976a2d15490d6693:844dc6ff26ad7a9a 976a2d15490d6693:844dc6ff26ad7a9a real 0m33.816s user 0m1.560s sys 0m6.288s
int main(int argc, char ** argv)
{
uint64_t lfsr_data[2] = {
0xffffffffffffffffull,
0xffffffffffffffffull,
};
volatile unsigned i;
if(argc < 2) {
printf("Run as \"%s 0\"\n", argv[0]);
return 1;
}
uint64_t arg;
sscanf(argv[1], "%llu", &arg);
lfsr_data[0] += arg;
lfsr_data[1] += arg;
for(i = 0; i < 0xffff * 32; ++i)
do_lfsr(lfsr_data);
printf("%016"PRIx64":%016"PRIx64"\n", lfsr_data[1], lfsr_data[0]);
}
#include <inttypes.h> #include <stdio.h> #define TAP_MASK_LOW64 0x0 #define TAP_MASK_HIGH64 0x28000005 void do_lfsr(uint64_t *p) { uint64_t low = p[0] & TAP_MASK_LOW64; uint64_t high = p[1] & TAP_MASK_HIGH64; uint64_t s1 = low ^ high; uint32_t s2 = (uint32_t)s1 ^ (uint32_t)(s1 >> 32); uint32_t s3 = (s2 & 0xffff) ^ (s2 >> 16); uint32_t s4 = (s3 & 0xff) ^ (s3 >> 8); uint32_t s5 = (s4 & 0xf) ^ (s4 >> 4); uint32_t s6 = (s5 & 0x3) ^ (s5 >> 2); uint32_t s7 = (s6 & 0x1) ^ (s6 >> 1); p[1] = (p[1] << 1) | (p[0] >> 63); p[0] = (p[0] << 1) | s7; putchar(s7 ? '1' : '0'); } int main() { uint64_t lfsr_data[2] = { 0xffffffffffffffffull, 0xffffffffffffffffull, }; volatile unsigned i; for(i = 0; i < 0xffff * 32; ++i) do_lfsr(lfsr_data); printf("\n%016"PRIx64":%016"PRIx64"\n", lfsr_data[1], lfsr_data[0]); return 0; }
...... 111011011111100001110100101111001110011100001110001111011000001100110100001111101100010101010100100001110000000001000000010011000001000001000011100101000011111101000000011101110110100000110101111001100010110010111101011010010000110100000010010010100001010101001001011110001110101010010000010111010101100001001111011111001110100110111110111000010001101110010111011010100010110100010101010010010000110101100110100100111000010001001101110001101111111100100110101011010111101010011010 976a2d15490d6693:844dc6ff26ad7a9a real 0m0.577s user 0m0.104s sys 0m0.028s
unsigned int AR[4];
void step3(){
unsigned int q=(AR[0]&Mask0)^(AR[1]&Mask1)^(AR[2]&Mask2)^(AR[3]&Mask3);
unsigned int c=u[(q&0xffff)^(q>>16)];
AR[3]=(AR[3]<<1)|(AR[2]>>31);
AR[2]=(AR[2]<<1)|(AR[1]>>31);
AR[1]=(AR[1]<<1)|(AR[0]>>31);
AR[0]=(AR[0]<<1)|c;
*r1++=c;
}
void step3(int m){
unsigned int a=AR[0],b=AR[1],c=AR[2],d=AR[3];
do{
unsigned int q=(a&Mask0)^(b&Mask1)^(c&Mask2)^(d&Mask3);
q=u[(q&0xffff)^(q>>16)];
d=(d<<1)|(c>>31);
c=(d<<1)|(b>>31);
b=(d<<1)|(a>>31);
a=(d<<1)|(q);
*r1++=q;
}while(--m);
AR[0]=a; AR[1]=b; AR[2]=c; AR[3]=d;
}
mov DWORD PTR _m$[esp+20], 65536
npad 5
$LL3@step3:
push ebx
mov ebx,1
npad 10
$LL4@step3:
mov eax, edi
and eax, MASK3
mov ebp, ecx
and ebp, MASK2
xor eax, ebp
mov ebp, edx
and ebp, MASK1
xor eax, ebp
mov ebp, esi
and ebp, MASK0
xor eax, ebp
mov ebp, eax
shr ebp, 16
xor ebp, eax
mov eax, ebp
shr ebp, 8
xor ebp, eax
and ebp, 255
movzx eax, BYTE PTR _u[ebp]
shld edi,ecx,1
shld ecx,edx,1
shld edx,esi,1
add esi,esi
or esi,eax
ror eax,1
rcl ebx,1
jnc short $LL4@step3
mov eax,ebx
pop ebx
mov DWORD PTR [ebx], eax
add ebx,4
sub DWORD PTR _m$[esp+20], 1
jne $LL3@step3
mov ecx, RR_
l1:
; Apply LFSR mask
movq mm(inRTmpH),mm(inRSH)
pand mm(inRTmpH),mm(inRMH)
movq mm(inRTmpL),mm(inRSL)
pand mm(inRTmpL),mm(inRML)
; Calculate new bit
pxor mm(inRTmpH),mm(inRTmpL)
movd ebx, mm(inRTmpH)
psrlq mm(inRTmpH),020h
movd eax, mm(inRTmpH)
xor ebx,eax
mov ax,bx
sar ebx,010h
xor ax,bx
xor al,ah
lahf
not eax
sar eax,0Ah
and eax,01h
; Append new bit
psrlq mm(inRSL),01h
movq mm(inRTmp),mm(inRSH)
psllq mm(inRTmp),03Fh
por mm(inRSL),mm(inRTmp)
psrlq mm(inRSH),01h
movd mm(inRTmp), eax
psllq mm(inRTmp),03Fh
por mm(inRSH),mm(inRTmp)
loop l1
movq mm(inRTmpH),mm(inRSH)
movq mm(inRTmpL),mm(inRSH)
psllq mm(inRTmpH),03Fh
psllq mm(inRTmpL),03Fh
psrlq mm(inRSH),01h
psrlq mm(inRSL),01h
por mm(inRSH),mm(inRTmpL)
por mm(inRSL),mm(inRTmpH)
movq mm(inRTmpH),mm(inRMH)
movq mm(inRTmpL),mm(inRML)
psllq mm(inRTmpH),03Fh
psllq mm(inRTmpL),03Fh
psrlq mm(inRMH),01h
psrlq mm(inRML),01h
por mm(inRMH),mm(inRTmpL)
por mm(inRML),mm(inRTmpH)
add esi,esi
adc edx,edx
adc ecx,ecx
adc edi,edi
or esi,eax
«The packed arithmetic instructions operate on a set of bytes, words, or double words within a 64-bit block. For example, the PADDW instruction computes four 16-bit sums of two operand simultaneously. None of these instructions affect the CPU's FLAGs register. Therefore, there is no indication of overflow, underflow, zero result, negative result, etc.»
25 78563412
002811B5 |. 8BDA ||MOV EBX,EDX
002811B7 |. 81E3 563412F0 ||AND EBX,F0123456
002811BD |. 8BC8 ||MOV ECX,EAX
002811BF |. 81E1 F0DEBC9A ||AND ECX,9ABCDEF0
002811C5 |. 33D9 ||XOR EBX,ECX
003E103A mov ebx,dword ptr [esp+24h]
003E103E and ebx,12345679h
003E1044 mov edi,ebp
003E1046 mov ecx,esi
003E1048 and ecx,0F0123456h
003E104E mov eax,edx
003E1050 and eax,789ABCDEh
003E1055 xor eax,ebx
003E1057 and edi,9ABCDEF0h
003E105D xor ecx,edi
013E1400 > 8BC7 MOV EAX,EDI
013E1402 . 2305 44303E01 AND EAX,DWORD PTR DS:[13E3044]
013E1408 . 8BE9 MOV EBP,ECX
013E140A . 232D 40303E01 AND EBP,DWORD PTR DS:[13E3040]
013E1410 . 33C5 XOR EAX,EBP
013E1412 . 8BEA MOV EBP,EDX
013E1414 . 232D 3C303E01 AND EBP,DWORD PTR DS:[13E303C]
013E141A . 33C5 XOR EAX,EBP
013E141C . 8BEE MOV EBP,ESI
013E141E . 232D 38303E01 AND EBP,DWORD PTR DS:[13E3038]
.data
SH_ dq 0FFFFFFFFFFFFFFFFh ; Sequence[127-64]
SL_ dq 0FFFFFFFFFFFFFFFFh ; [63-0]
MH_ dq 00000000000000000h ; Mask[127-64] taps: 128,126,101,99
ML_ dq 00000000028000005h ; [63-0]
RR_ dd 0001FFFE0h ; Amount of rounds
_55 dq 05555555555555555h
_33 dq 03333333333333333h
_0F dq 00F0F0F0F0F0F0F0Fh
_FF dq 000FF00FF00FF00FFh
.code
; Initialize MMX registers
movq mm(inRSH),SH_
movq mm(inRSL),SL_
movq mm(inRMH),MH_
movq mm(inRML),ML_
mov ecx, RR_
l1:
; Apply LFSR mask
movq mm(inRTmpH),mm(inRSH)
pand mm(inRTmpH),mm(inRMH)
movq mm(inRTmpL),mm(inRSL)
pand mm(inRTmpL),mm(inRML)
; Calculate new bit
movq mm(inRTmp),mm(inRTmpH)
movq mm(inRTmpH),mm(inRTmpL)
psrlw mm(inRTmpH),01h
pand mm(inRTmpH),_55
pand mm(inRTmpL),_55
paddw mm(inRTmpL),mm(inRTmpH)
movq mm(inRTmpH),mm(inRTmpL)
psrlw mm(inRTmpH),02h
pand mm(inRTmpH),_33
pand mm(inRTmpL),_33
paddw mm(inRTmpL),mm(inRTmpH)
movq mm(inRTmpH),mm(inRTmpL)
psrlw mm(inRTmpH),04h
pand mm(inRTmpH),_0F
pand mm(inRTmpL),_0F
paddw mm(inRTmpL),mm(inRTmpH)
movq mm(inRTmpH),mm(inRTmpL)
psrlw mm(inRTmpH),08h
pand mm(inRTmpH),_FF
pand mm(inRTmpL),_FF
paddw mm(inRTmpL),mm(inRTmpH)
movq mm(inRTmpH),mm(inRTmpL)
psllq mm(inRTmpL),020h
paddd mm(inRTmpH),mm(inRTmpL)
movq mm(inRTmpL),mm(inRTmpH)
psllq mm(inRTmpL),010h
paddw mm(inRTmpH),mm(inRTmpL)
psllq mm(inRTmpH),0Fh
movq mm(inRTmpL),mm(inRTmp)
movq mm(inRTmp),mm(inRTmpH)
movq mm(inRTmpH),mm(inRTmpL)
psrlw mm(inRTmpH),01h
pand mm(inRTmpH),_55
pand mm(inRTmpL),_55
paddw mm(inRTmpL),mm(inRTmpH)
movq mm(inRTmpH),mm(inRTmpL)
psrlw mm(inRTmpH),02h
pand mm(inRTmpH),_33
pand mm(inRTmpL),_33
paddw mm(inRTmpL),mm(inRTmpH)
movq mm(inRTmpH),mm(inRTmpL)
psrlw mm(inRTmpH),04h
pand mm(inRTmpH),_0F
pand mm(inRTmpL),_0F
paddw mm(inRTmpL),mm(inRTmpH)
movq mm(inRTmpH),mm(inRTmpL)
psrlw mm(inRTmpH),08h
pand mm(inRTmpH),_FF
pand mm(inRTmpL),_FF
paddw mm(inRTmpL),mm(inRTmpH)
movq mm(inRTmpH),mm(inRTmpL)
psllq mm(inRTmpL),020h
paddd mm(inRTmpH),mm(inRTmpL)
movq mm(inRTmpL),mm(inRTmpH)
psllq mm(inRTmpL),010h
paddw mm(inRTmpH),mm(inRTmpL)
psllq mm(inRTmpH),0Fh
paddw mm(inRTmpH),mm(inRTmp)
psrlq mm(inRTmpH),03Fh
psllq mm(inRTmpH),03Fh
; Append new bit
psrlq mm(inRSL),01h
movq mm(inRTmp),mm(inRSH)
psllq mm(inRTmp),03Fh
por mm(inRSL),mm(inRTmp)
psrlq mm(inRSH),01h
por mm(inRSH),mm(inRTmpH)
Шустрый 128-битный LFSR (MMX required)