416 lines
11 KiB
NASM
416 lines
11 KiB
NASM
; File: Int128x64.asm
|
|
; build obj-file with
|
|
; ml64 /nologo /c /Zf /Fo$(IntDir)Int128x64.obj Int128x64.asm
|
|
.CODE
|
|
|
|
;void int128sum(_int128 &dst, cnost _int128 &x, const _int128 &y);
|
|
int128sum PROC
|
|
push rbx
|
|
mov rax, qword ptr[rdx]
|
|
add rax, qword ptr[r8]
|
|
mov rbx, qword ptr[rdx+8]
|
|
adc rbx, qword ptr[r8+8]
|
|
mov qword ptr[rcx], rax
|
|
mov qword ptr[rcx+8], rbx
|
|
pop rbx
|
|
ret
|
|
int128sum ENDP
|
|
|
|
;void int128dif( _int128 &dst, const _int128 &x, const _int128 &y);
|
|
int128dif PROC
|
|
push rbx
|
|
mov rax, qword ptr[rdx]
|
|
sub rax, qword ptr[r8]
|
|
mov rbx, qword ptr[rdx+8]
|
|
sbb rbx, qword ptr[r8+8]
|
|
mov qword ptr[rcx] , rax
|
|
mov qword ptr[rcx+8], rbx
|
|
pop rbx
|
|
ret
|
|
int128dif ENDP
|
|
|
|
;void int128mul(_int128 &dst, const _int128 &x, const _int128 &y);
|
|
int128mul PROC
|
|
push rbx
|
|
mov rax, qword ptr[rdx+8] ; rax = x.hi
|
|
mov rbx, qword ptr[r8+8] ; rbx = y.hi
|
|
or rbx, rax ; rbx = x.hi | y.hi
|
|
mov rbx, qword ptr[r8] ; rbx = y.lo
|
|
jne Hard ; if(x.hi|y.hi) goto Hard
|
|
; simple int64 multiplication
|
|
mov rax, qword ptr[rdx] ; rax = x.lo
|
|
mul rbx ; rdx:rax = rax * rbx
|
|
mov qword ptr[rcx] , rax ; dst.lo = rax
|
|
mov qword ptr[rcx+8], rdx ; dst.hi = rdx
|
|
pop rbx
|
|
ret
|
|
Hard: ; assume rax = x.hi, rbx = y.lo
|
|
push rsi
|
|
mov rsi, rdx ; need rdx for highend of mul, so rsi=&x
|
|
mul rbx ; rdx:rax = x.hi * y.lo
|
|
mov r9 , rax ;
|
|
mov rax, qword ptr[rsi] ; rax = x.lo
|
|
mul qword ptr[r8+8] ; rdx:rax = x.lo * y.hi
|
|
add r9, rax ; r9 = lo(x.hi*y.lo+x.lo*y.hi);
|
|
mov rax, qword ptr[rsi] ; rax = x.lo
|
|
mul rbx ; rdx:rax = x.lo * y.lo
|
|
add rdx, r9
|
|
mov qword ptr[rcx] , rax
|
|
mov qword ptr[rcx+8], rdx
|
|
pop rsi
|
|
pop rbx
|
|
ret
|
|
int128mul ENDP
|
|
|
|
|
|
;void int128div(_int128 &dst, const _int128 &x, const _int128 &y);
|
|
int128div PROC
|
|
push rdi
|
|
push rsi
|
|
push rbx
|
|
push rcx
|
|
mov r9, rdx
|
|
xor rdi, rdi
|
|
mov rax, qword ptr[r9+8]
|
|
or rax, rax
|
|
jge L1
|
|
inc rdi
|
|
mov rdx, qword ptr[r9]
|
|
neg rax
|
|
neg rdx
|
|
sbb rax, 0
|
|
mov qword ptr[r9+8], rax
|
|
mov qword ptr[r9], rdx
|
|
L1:
|
|
mov rax, qword ptr[r8+8]
|
|
or rax, rax
|
|
jge L2
|
|
inc rdi
|
|
mov rdx, qword ptr[r8]
|
|
neg rax
|
|
neg rdx
|
|
sbb rax,0
|
|
mov qword ptr[r8+8], rax
|
|
mov qword ptr[r8], rdx
|
|
L2:
|
|
or rax, rax
|
|
jne L3
|
|
mov rcx, qword ptr[r8]
|
|
mov rax, qword ptr[r9+8]
|
|
xor rdx, rdx
|
|
div rcx
|
|
mov rbx, rax
|
|
mov rax, qword ptr[r9]
|
|
div rcx
|
|
mov rdx, rbx
|
|
jmp L4
|
|
L3:
|
|
mov rbx,rax
|
|
mov rcx,qword ptr[r8]
|
|
mov rdx,qword ptr[r9+8]
|
|
mov rax,qword ptr[r9]
|
|
L5:
|
|
shr rbx, 1
|
|
rcr rcx, 1
|
|
shr rdx, 1
|
|
rcr rax, 1
|
|
or rbx, rbx
|
|
jne L5
|
|
div rcx
|
|
mov rsi, rax
|
|
mul qword ptr[r8+8]
|
|
mov rcx, rax
|
|
mov rax, qword ptr[r8]
|
|
mul rsi
|
|
add rdx, rcx
|
|
jb L6
|
|
cmp rdx, qword ptr[r9+8]
|
|
ja L6
|
|
jb L7
|
|
cmp rax, qword ptr[rdx]
|
|
jbe L7
|
|
L6:
|
|
dec rsi
|
|
L7:
|
|
xor rdx, rdx
|
|
mov rax, rsi
|
|
L4:
|
|
dec rdi
|
|
jne L8
|
|
neg rdx
|
|
neg rax
|
|
sbb rdx, 0
|
|
L8:
|
|
pop rcx
|
|
pop rbx
|
|
pop rsi
|
|
pop rdi
|
|
mov qword ptr[rcx], rax
|
|
mov qword ptr[rcx+8], rdx
|
|
ret
|
|
int128div ENDP
|
|
|
|
;void int128rem( _int128 &dst, const _int128 &x, const _int128 &y);
|
|
int128rem PROC
|
|
push rbx
|
|
push rdi
|
|
push rcx
|
|
mov r9, rdx
|
|
xor rdi, rdi
|
|
mov rax, qword ptr[r9+8]
|
|
or rax, rax
|
|
jge L1
|
|
inc rdi
|
|
mov rdx, qword ptr[r9]
|
|
neg rax
|
|
neg rdx
|
|
sbb rax, 0
|
|
mov qword ptr[r9+8], rax
|
|
mov qword ptr[r9], rdx
|
|
L1:
|
|
mov rax, qword ptr[r8+8]
|
|
or rax, rax
|
|
jge L2
|
|
mov rdx, qword ptr[r8]
|
|
neg rax
|
|
neg rdx
|
|
sbb rax, 0
|
|
mov qword ptr[r8+8], rax
|
|
mov qword ptr[r8], rdx
|
|
L2:
|
|
or rax, rax
|
|
jne L3
|
|
mov rcx, qword ptr[r8]
|
|
mov rax, qword ptr[r9+8]
|
|
xor rdx, rdx
|
|
div rcx
|
|
mov rax, qword ptr[r9]
|
|
div rcx
|
|
mov rax, rdx
|
|
xor rdx, rdx
|
|
dec rdi
|
|
jns L4
|
|
jmp L8
|
|
L3:
|
|
mov rbx, rax
|
|
mov rcx, qword ptr[r8]
|
|
mov rdx, qword ptr[r9+8]
|
|
mov rax, qword ptr[r9]
|
|
L5:
|
|
shr rbx, 1
|
|
rcr rcx, 1
|
|
shr rdx, 1
|
|
rcr rax, 1
|
|
or rbx, rbx
|
|
jne L5
|
|
div rcx
|
|
mov rcx, rax
|
|
mul qword ptr[r8+8]
|
|
xchg rax, rcx
|
|
mul qword ptr[r8]
|
|
add rdx, rcx
|
|
jb L6
|
|
cmp rdx, qword ptr[r9+8]
|
|
ja L6
|
|
jb L7
|
|
cmp rax, qword ptr[r9]
|
|
jbe L7
|
|
L6:
|
|
sub rax, qword ptr[r8]
|
|
sbb rdx, qword ptr[r8+8]
|
|
L7:
|
|
sub rax, qword ptr[r9]
|
|
sbb rdx, qword ptr[r9+8]
|
|
dec rdi
|
|
jns L8
|
|
L4:
|
|
neg rdx
|
|
neg rax
|
|
sbb rdx, 0
|
|
L8:
|
|
pop rcx
|
|
pop rdi
|
|
pop rbx
|
|
mov qword ptr[rcx], rax
|
|
mov qword ptr[rcx+8], rdx
|
|
ret
|
|
int128rem ENDP
|
|
|
|
;void int128neg( _int128 &dst, const _int128 &x);
|
|
int128neg PROC
|
|
mov rax,qword ptr[rdx]
|
|
neg rax
|
|
mov r8, qword ptr[rdx+8]
|
|
adc r8, 0
|
|
neg r8
|
|
mov qword ptr[rcx], rax
|
|
mov qword ptr[rcx+8], r8
|
|
ret
|
|
int128neg ENDP
|
|
|
|
;int int128cmp(const _int128 &n1, const _int128 &n2);
|
|
int128cmp PROC
|
|
mov rax, qword ptr[rcx+8] ; n1.hi
|
|
cmp rax, qword ptr[rdx+8] ; n2.hi
|
|
jl lessthan ; signed compare of n1.hi and n2.hi
|
|
jg greaterthan
|
|
mov rax, qword ptr[rcx] ; n2.lo
|
|
cmp rax, qword ptr[rdx] ; n2.lo
|
|
jb lessthan ; unsigned compare of n1.lo and n2.lo
|
|
ja greaterthan
|
|
mov rax, 0 ; they are equal
|
|
ret
|
|
greaterthan:
|
|
mov rax, 1
|
|
ret
|
|
lessthan:
|
|
mov rax, -1
|
|
ret
|
|
int128cmp ENDP
|
|
|
|
;END
|
|
|
|
; File:UInt128x64.asm
|
|
; build obj-file with
|
|
; ml64 /nologo /c /Zf /Fo$(IntDir)UInt128x64.obj UInt128x64.asm
|
|
|
|
;.CODE
|
|
|
|
;void uint128div(_uint128 &dst, const _uint128 &x, const _uint128 &y);
|
|
uint128div PROC
|
|
push rbx
|
|
push rsi
|
|
push rcx
|
|
mov r9, rdx
|
|
mov rax, qword ptr[r8+8]
|
|
or rax, rax
|
|
jne L1
|
|
mov rcx, qword ptr[r8]
|
|
mov rax, qword ptr[r9+8]
|
|
xor rdx, rdx
|
|
div rcx
|
|
mov rbx, rax
|
|
mov rax, qword ptr[r9]
|
|
div rcx
|
|
mov rdx, rbx
|
|
jmp L2
|
|
L1:
|
|
mov rcx, rax
|
|
mov rbx, qword ptr[r8]
|
|
mov rdx, qword ptr[r9+8]
|
|
mov rax, qword ptr[r9]
|
|
L3:
|
|
shr rcx, 1
|
|
rcr rbx, 1
|
|
shr rdx, 1
|
|
rcr rax, 1
|
|
or rcx, rcx
|
|
jne L3
|
|
div rbx
|
|
mov rsi, rax
|
|
mul qword ptr[r8+8]
|
|
mov rcx, rax
|
|
mov rax, qword ptr[r8]
|
|
mul rsi
|
|
add rdx, rcx
|
|
jb L4
|
|
cmp rdx, qword ptr[r9+8]
|
|
ja L4
|
|
jb L5
|
|
cmp rax, qword ptr[r9]
|
|
jbe L5
|
|
L4:
|
|
dec rsi
|
|
L5:
|
|
xor rdx, rdx
|
|
mov rax, rsi
|
|
L2:
|
|
pop rcx
|
|
pop rsi
|
|
pop rbx
|
|
mov qword ptr[rcx], rax
|
|
mov qword ptr[rcx+8], rdx
|
|
ret
|
|
uint128div ENDP
|
|
|
|
;void uint128rem(_uint128 &dst, const _uint128 &x, const _uint128 &y);
|
|
uint128rem PROC
|
|
push rbx
|
|
push rcx
|
|
mov r9, rdx
|
|
mov rax, qword ptr[r8+8]
|
|
or rax, rax
|
|
jne L1
|
|
mov rcx, qword ptr[r8]
|
|
mov rax, qword ptr[r9+8]
|
|
xor rdx, rdx
|
|
div rcx
|
|
mov rax, qword ptr[r9]
|
|
div rcx
|
|
mov rax, rdx
|
|
xor rdx, rdx
|
|
jmp L2
|
|
L1:
|
|
mov rcx, rax
|
|
mov rbx, qword ptr[r8]
|
|
mov rdx, qword ptr[r9+8]
|
|
mov rax, qword ptr[r9]
|
|
L3:
|
|
shr rcx, 1
|
|
rcr rbx, 1
|
|
shr rdx, 1
|
|
rcr rax, 1
|
|
or rcx, rcx
|
|
jne L3
|
|
div rbx
|
|
mov rcx, rax
|
|
mul qword ptr[r8+8]
|
|
xchg rax, rcx
|
|
mul qword ptr[r8]
|
|
add rdx, rcx
|
|
jb L4
|
|
cmp rdx, qword ptr[r9+8]
|
|
ja L4
|
|
jb L5
|
|
cmp rax, qword ptr[r9]
|
|
jbe L5
|
|
L4:
|
|
sub rax, qword ptr[r8]
|
|
sbb rdx, qword ptr[r8+8]
|
|
L5:
|
|
sub rax, qword ptr[r9]
|
|
sbb rdx, qword ptr[r9+8]
|
|
neg rdx
|
|
neg rax
|
|
sbb rdx, 0
|
|
L2:
|
|
pop rcx
|
|
pop rbx
|
|
mov qword ptr[rcx], rax
|
|
mov qword ptr[rcx+8], rdx
|
|
ret
|
|
uint128rem ENDP
|
|
|
|
;int uint128cmp(const _uint128 &n1, const _uint128 &n2);
|
|
uint128cmp PROC
|
|
mov rax, qword ptr[rcx+8] ; n1.hi
|
|
cmp rax, qword ptr[rdx+8] ; n2.hi
|
|
jb lessthan ; usigned compare of n1.hi and n2.hi
|
|
ja greaterthan
|
|
mov rax, qword ptr[rcx] ; n2.lo
|
|
cmp rax, qword ptr[rdx] ; n2.lo
|
|
jb lessthan ; unsigned compare of n1.lo and n2.lo
|
|
ja greaterthan
|
|
mov rax, 0 ; they are equal
|
|
ret
|
|
greaterthan:
|
|
mov rax, 1
|
|
ret
|
|
lessthan:
|
|
mov rax, -1
|
|
ret
|
|
uint128cmp ENDP
|
|
|
|
END
|
|
|