Elementary cellular automaton/Random number generator: Difference between revisions

m
→‎{{header|Pascal}}: inline all together to get next byte doubles speed. ~4/5 cycles per Bit.Needs long wake up to get stable results
(Realize in F#)
m (→‎{{header|Pascal}}: inline all together to get next byte doubles speed. ~4/5 cycles per Bit.Needs long wake up to get stable results)
Line 318:
=={{header|Pascal}}==
{{Works with|Free Pascal}}
Using ROR and ROL is as fast as assembler and more portable.<BR>[https://tio.run/##7VZdb@pGEH33r5iHSEAvYJsQ0kBTifBxawmwC6a9bVVFjr3AKmZtrZdwaZS/Xjq7iwPckOThvvQhSHx45szMmbPD7qZBFgZxZZaG263HkzkPljBexeTcahmmuRAibZomYdU1vacpiWhQTfjclE/miHwVtxMRCHIrI26PQpaBWKwTHkfVdRLPMGs1TJamzlxdiGVsPJ45/W6vD32v82QAPJ4Nk4hAl8Tpgj49nrUnw6Hb7YEz8nsDDXA93xk6f7Z9xx2BOyq3B@gwTenqILQ9cD6PIOVJeG0/YfreYNLTgW3P8//wetBxRxN30FPOUdfpPxmrjGQImWyyqaBx1jLChGUCLcvg6zhZsSiDa6j9YFuWpT5a6OLajq9rsNFiPAQczbo3LQg0YUqZaNTRO1uxUNCEwWciOt701qdL0oSdV2xSgrF@J11hNk7ChEcGHLx@oegqH5kGiUQ3oYv6Rq29izB80lwQIBAh07aMOzKnDI1BtpQ0u/6kI6OG7m86BXiCw18I9asq9d/lXvvLKwBFBwFdCVAFAdZULHTFKFFsOMlWMda/1l0WMcibliBbxHBeg0@6gZahwg25XiRacQIOo@JQxBZlMWVk38ChE5PbL1OcGMk8iRaGr1gZR4Q8lBlC96uUl0A/SOJHPNDuYYiyu@NfpSRFmcguSZdMpF2Db11HIyExRQV2x7JOCb7gD8kl7@N5UmQbOvZmg62OAkEfiMOE/H816pUbx4cwYIDDi3PKNij4nGaC8OydLnGsNy5T0@loy807fe@X0tqHgxYfwDSnjCcxCjyHgEWgtJYPL9cB7jawQMxRzqL@@Ul/laQwUj1oj7q61HsCv7EseUevSf5B5IPIB5H/LZHj0/S9nXN/AkxSQiJBMpHv@L5d9i3c8ZzRbgukTTjYUPMNcM2pIDErFp4TwCzhkMn6Gf2HQDKDQrlRLxfgjoqsUNL73@8BX8IqlTguQCSwDu6JNOCJh4A@pqBqu9zdH9RHxcaTcs0QbeVH5qm7lCRvyeCDi4Os@uKc3BXSlZ4vLq9U2Z8rLaXOiQK5Fsfw0qGrEG7CmGSQEg7SiQIXykXfrgirZD5TaFrN2mHYy@Xyg@w@XymqbkVkTviJZfEXBASCQSv/tga2XIndNW3Xukr0TUfN@ilyeWk1CL6aJNjxzNMU4KceXkvHP0s2nATRLrqK5zNec1MakwjkQU2F8cY8Nepqlox63XgpJ16Try4MI@/bgFrNAvvqEuw6vi/rYNuXAOr5Cp9tOWJgX9hGzs04JHNe@y4ydu3H6kXju9hst/@GsziYZ9uKe76tTB7@Aw Try it online!] counting CPU-Cycles 32 vs 31 on Ryzen Zen1 per Byte -> 100Mb/s
Using ROR and ROL is nearly as fast as assembler and more portable.<BR>[https://tio.run/##7VVdb@JGFH33r7gPkYCuwXYSrVa4VGLB7FoC2wtOu21VRY49wGiNx5oZh02j/PXSOx7MR8L@gtYPYM8599yPuXOnTESa5N1lme52EWcrnmxgXuXkxnYNy1pLWfYtixS9Lf1GS5LRpMf4ylJfVkC@y/uFTCS5Vxb3ZyabRK63jOdZb8vyJar2UraxtHJvLTe58XzlT8beBCbR6MUAeL6asYzAmOTlmr48Xw0Xs1k49sAPYm@qCWEU@zP/j2HshwGEgTmcImBZChohdTj1PwVQcpYOPpjTMIwGzgt68aYLT9sPoyj@PfJgFAaLcOrVYDD2Jy9GJYhAyuJJ3EmaC9dIWSEkrmyS73NWFZmAAVz/5Ni2Xf@4CHG9js8AHFwxHhOOijqKm2vlUqerawR9uKOFfH@LzGVVpJKyAj4ROYru7mO6IX3Yo/KpJGgbj8oKlTlJGc8MOHk@U4TMs6UpU@w@jLHkmXuESIFfKi4AJEhkKFnXeCArWuBiIjYqzHG8GCmrWfirloBIcvgTqXGvlv7L9IZff0Cow0HCWBFqhwBbKtfaY8bqaDgRVY7@BzrLNhpFdx0Q6xxLBe90Aq5RmxtqC0lWcQJ@QeVpEV1a5LQgxwROQRR33kpc6NJGRBeGV4WJXUMezQKpx11qXCAOKvCzOHA9QpN6PZx/USVpKyGnoyAlpKHpa@isJRSnXZPDufLTga/4omJp8jh0ikpD2358wlSDRNJH4hfSNT42ceoSK03dn4eSt/XrO/3XUc7aZ3EMgzFgeJcO9P86/02d18doURKSSSJkc2xix4xtHCh@UJ8WANqHC1255VSSvGi3DgKwZByE8ibo3wTYElrm@1uzBQ9UilZ9SCzrt4RvoCoVj0uIK/7AFBOHBsITFKAqQcc@juSug7NmW0gGdjN0LuWFwJuhorKxld7JOHb3frSjwzXwAy/Hk1mLORfEmkKc0zunUCt9SnMioCQcFIjVbZnt2OlKu2MdQujb/etTs7d7FSfiW7NNtL5XyIrwC3sSrwlIJIMu@6XCHGvgAGa9v/T2qddCrzLq314KrnFdd0FctxHs42xkWvCzh3f9/BcVDSdJtrfu7Xb/pMs8WYldN7zZdReP/wI Try it online!] counting CPU-Cycles 64 vs 76 one Ryzen Zen1
<lang pascal>Program Rule30;
//http://en.wikipedia.org/wiki/Next_State_Rule_30;
//http://mathworld.wolfram.com/Rule30.html
{$IFDEF FPC}
{$Mode Delphi}{$ASMMODE INTEL}
{$OPTIMIZATION ON,ALL}
// {$CODEALIGN proc=8,LOOP=1}
{$ELSE}
{$APPTYPE CONSOLE}
Line 334:
maxRounds = 2*1000*1000;
rounds = 10;
CpuF = 3.2e9; // Ryzen 5 1600 no Turbo 3.7 Ghz on my Linux64
 
var
{$ALIGN 32}
Rule30_State : Uint64;
 
function GetCPU_Time: int64;
type
TCpu = record
HiCpu,
LoCpu : Dword;
end;
var
Cput : TCpu;
begin
asm
RDTSC;
MOV Dword Ptr [CpuT.LoCpu],EAX
MOV Dword Ptr [CpuT.HiCpu],EDX
end;
with Cput do
result := int64(HiCPU) shl 32 + LoCpu;
end;
 
procedure InitRule30_State;inline;
Line 356 ⟶ 372:
 
function NextRule30Byte:NativeInt;
//64-BIT can use many registers
//32-Bit still fast
var
run, prev,next: Uint64;
myOne : UInt64;
Begin
resultrun := 0Rule30_State;
T0result := time0;
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
myOne := 1;
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
//Unrolling and inlining Next_State_Rule_30 by hand
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
result := (result+result) OR (Rule30_Staterun AND 1myOne);Next_State_Rule_30;
next := ROLQword(run,1);
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
Prev := RORQword(run,1);
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
run := (next OR run) XOR prev;
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
 
result := (result+result) OR (Rule30_State AND 1);Next_State_Rule_30;
result := (result+result) OR (Rule30_Staterun AND 1myOne);Next_State_Rule_30;
next := ROLQword(run,1);
Prev := RORQword(run,1);
run := (next OR run) XOR prev;
 
result := (result+result) OR (Rule30_Staterun AND 1myOne);Next_State_Rule_30;
next := ROLQword(run,1);
Prev := RORQword(run,1);
run := (next OR run) XOR prev;
 
result := (result+result) OR (Rule30_Staterun AND 1myOne);Next_State_Rule_30;
next := ROLQword(run,1);
Prev := RORQword(run,1);
run := (next OR run) XOR prev;
 
result := (result+result) OR (Rule30_Staterun AND 1myOne);Next_State_Rule_30;
next := ROLQword(run,1);
Prev := RORQword(run,1);
run := (next OR run) XOR prev;
 
result := (result+result) OR (Rule30_Staterun AND 1myOne);Next_State_Rule_30;
next := ROLQword(run,1);
Prev := RORQword(run,1);
run := (next OR run) XOR prev;
 
result := (result+result) OR (Rule30_Staterun AND 1myOne);Next_State_Rule_30;
next := ROLQword(run,1);
Prev := RORQword(run,1);
run := (next OR run) XOR prev;
 
result := (result+result) OR (Rule30_Staterun AND 1myOne);Next_State_Rule_30;
next := ROLQword(run,1);
Prev := RORQword(run,1);
Rule30_State := (next OR run) XOR prev;
end;
 
procedure Speedtest;
var
T1,T0 : TDateTimeINt64;
i: NativeInt;
Begin
writeln('Speedtest for statesize of ',64,' bits');
//Warm up start Turboto ofwake up CPU takes some time
For i := 10100*1000*1000-1 downto 0 do
Next_State_Rule_30;
 
T0 := GetCPU_Time;
InitRule30_State;
T0 := time;
For i := maxRounds-1 downto 0 do
NextRule30Byte;
T1 := timeGetCPU_Time;
writeln(NextRule30Byte);
writeln(maxRounds,'cycles callsper takeByte : ',FormatDateTime('HH:NN:SS.zzz',T1-T0t0)/maxRounds:0:2);
writeln('cycles per Byte : ',((T1-t0)*86400*CpuF)/maxRounds:0:2);
writeln;
end;
Line 395 ⟶ 449:
writeln('The task ');
InitRule30_State;
For i := 1 to rounds do
write(NextRule30Byte:4);
writeln;
end;
Line 406 ⟶ 460:
end.</lang>
{{out}}
<pre>//compiled 64-Bit
//running compiled for 64-BIT
Speedtest for statesize of 64 bits
44
cycles per Byte : 7830.4095
2000000 calls take 00:00:00.049
cycles per Byte : 78.40
 
The task
Line 417 ⟶ 469:
<ENTER>
 
//running compiled for 32-BIT Bit
Speedtest for statesize of 64 bits
44
cycles per Byte : 172128.8056
2000000 calls take 00:00:00.108
cycles per Byte : 172.80
 
The task
220 197 147 174 117 97 149 171 100 151
<ENTER> </pre>
</pre>
 
=={{header|Perl}}==
Anonymous user