Agner, I was hoping you could enlighten me on one more issue with Mother Of All. I'm trying to use your AsmLib code to build an inline Delphi function. You mentioned Delphi in some of your PDFs so I'm hoping this isn't an issue. I'm not very proficient in assembly and I'm not sure if I'm missing something obvious, or Delphi just doesn't want to properly align my data on the correct boundaries. I get an access violation due to alignment issues and changing movdqa to movdqu only moves the violation further down the function (to movq). Assuming this can be made to work, I'm assuming I'll have to initialize the Seed variable in reverse to have it match the Pascal version below. Is an inline function like the one below possible within Delphi's limitations? {$ALIGN 16} { This directive should make the compiler properly align }
{ the Seed variable, but it doesn't appear to. }
type
Seed6_32 = Array [0..5] of UInt32; { Seed type for Mother Of All. I've tried using -7 for the lower }
{ array bounds in an attempt to align the relevant part of the variable. No luck. } TMotherOfAll = class(TRandFuncs)
Strict Private
Seed: Seed6_32;
Public
Constructor Create(Aseed: Seed6_32);
Function NextRandom32: UInt32;
end; {$IFDEF CPUX64}
function TMotherOfAll.NextRandom32: UInt32; asm // I extracted this from your asmlib
.NoFrame
.ALIGN 16 // Align the code
movdqa xmm1, oword [Seed[1]] // Load M3, M2, M1, M0
mov eax, dword [Seed[4]] // Retrieve previous random number
movdqa xmm2, xmm1 // copy
movdqa xmm3, oword ptr [MF[0]] // load constants
psrlq xmm2, 32 // move M2, M0 down
movq qword [Seed[0]], xmm1 // M4=M3, M3=M2
movhps qword [Seed[2]], xmm1 // M2=M1, M1=M0
pmuludq xmm1, xmm3 // ; M3*MF3, M1*MF1
psrlq xmm3, 32 // ; move MF2,MF0 down
pmuludq xmm2, xmm3 // ; M2*MF2, M0*MF0
paddq xmm1, xmm2 // ; P2+P3, P0+P1
movhlps xmm2, xmm1 // ; Get high qword
paddq xmm1, xmm2 // ; P0+P1+P2+P3
paddq xmm1, oword [Seed[5]] //; +carry
movq qword [Seed[4]], xmm1 //; Store new M0 and carry
movq rax, xmm1 // Function result returned in RAX
end;
{$ELSE} function TMotherOfAll.NextRandom32: UInt32; Var
sum: UInt64; begin
// Output random bits
sum := UInt64(2111111111) * UInt64(Seed[3]) +
UInt64(1492) * UInt64(Seed[2]) +
UInt64(1776) * UInt64(Seed[1]) +
UInt64(5115) * UInt64(Seed[0]) +
UInt64(Seed[4]);
Seed[3] := Seed[2];
Seed[2] := Seed[1];
Seed[1] := Seed[0];
Seed[4] := UInt32(sum Shr 32); // Carry
Seed[0] := Uint32(sum); // Low 32 bits of sum
Result := Seed[0];
end;
{$ENDIF} Thanks in advance for your input.
Mike |