490 lines
19 KiB
NASM
490 lines
19 KiB
NASM
; These functions implements the basic operations for _int128 type
|
|
; running on 64-bit intel CPU.
|
|
; They are (almost) identical to MS's workhorse-functions that do
|
|
; __int64-math using 32-bit registers. Every memory reference that uses
|
|
; dword ptr(4 byte) are replaced by qword ptr(8 byte), and registernames
|
|
; eax, ebx, ecx... are replaced by rax, rbx, rcx etc.
|
|
; Parameters are passed in rcx(1st argument) and rdx(2nd argument) and
|
|
; return value is in rax, where the runtime-stack is used in x86 mode.
|
|
; So the code is modified to reflect this environment
|
|
;
|
|
|
|
.CODE
|
|
|
|
;void int128add(_int128 &dst, const _int128 &x); do assignop dst += x;
|
|
int128add PROC
|
|
mov rax, qword ptr[rdx]
|
|
add qword ptr[rcx], rax
|
|
mov rax, qword ptr[rdx+8]
|
|
adc qword ptr[rcx+8], rax
|
|
ret
|
|
int128add ENDP
|
|
|
|
;void int128sub(_int128 &dst, const _int128 &x); do assignop dst -= x;
|
|
int128sub PROC
|
|
mov rax, qword ptr[rdx]
|
|
sub qword ptr[rcx], rax
|
|
mov rax, qword ptr[rdx+8]
|
|
sbb qword ptr[rcx+8], rax
|
|
ret
|
|
int128sub ENDP
|
|
|
|
;void int128mul(_int128 &dst, const _int128 &x); do assignop dst *= x;
|
|
int128mul PROC
|
|
push rbx
|
|
mov rax, qword ptr[rdx+8] ; rax = x.hi
|
|
mov rbx, qword ptr[rcx+8] ; rbx = dst.hi
|
|
or rbx, rax ; rbx |= x.hi
|
|
mov rbx, qword ptr[rcx] ; rbx = dst.lo
|
|
jne Hard ; if(x.hi|dst.hi) goto Hard
|
|
; else simple int64 multiplication
|
|
mov rax, qword ptr[rdx] ; rax = x.lo
|
|
mul rbx ; rdx:rax = rax * rbx
|
|
mov qword ptr[rcx] , rax ; dst.lo = rax
|
|
mov qword ptr[rcx+8], rdx ; dst.hi = rdx
|
|
pop rbx
|
|
ret
|
|
Hard: ; assume rax = x.hi, rbx = dst.lo
|
|
push rsi
|
|
mov rsi, rdx ; need rdx for highend of mul, so rsi = &x
|
|
mul rbx ; rdx:rax = x.hi*dst.lo
|
|
mov r9 , rax ; r9 = lo(x.hi*dst.lo)
|
|
mov rax, qword ptr[rsi] ; rax = x.lo
|
|
mul qword ptr[rcx+8] ; rdx:rax = x.lo*dst.hi
|
|
add r9, rax ; r9 += lo(x.lo*dst.hi);
|
|
mov rax, qword ptr[rsi] ; rax = x.lo
|
|
mul rbx ; rdx:rax = x.lo*dst.lo
|
|
add rdx, r9 ; rdx += lo(x.hi*dst.lo) + lo(x.lo*dst.hi)
|
|
mov qword ptr[rcx] , rax
|
|
mov qword ptr[rcx+8], rdx
|
|
pop rsi
|
|
pop rbx
|
|
ret
|
|
int128mul ENDP
|
|
|
|
;void int128div(_int128 &dst, _int128 &x); do assignop dst /= x; if (x < 0) x = -x; !!
|
|
int128div PROC
|
|
push rbx ;
|
|
push rdi ;
|
|
push rsi ;
|
|
mov r8, rcx ; r8 = &dst
|
|
mov r9, rdx ; r9 = &x
|
|
xor rdi, rdi ; rdi = 0
|
|
mov rax, qword ptr[r8+8] ; check sign of dst
|
|
or rax, rax ;
|
|
jge L1 ; if(dst >= 0) goto L1
|
|
inc rdi ; rdi++
|
|
mov rdx, qword ptr[r8] ; dst is negative. Change sign
|
|
neg rax ;
|
|
neg rdx ;
|
|
sbb rax, 0 ;
|
|
mov qword ptr[r8+8], rax ; dst = -original dst
|
|
mov qword ptr[r8], rdx ;
|
|
L1: ; Assume dst >= 0, rdi = (original dst < 0)1:0
|
|
mov rax, qword ptr[r9+8] ; check sign of x
|
|
or rax, rax ;
|
|
jge L2 ;
|
|
inc rdi ; x is negative. change sign, and increment rdi
|
|
mov rdx, qword ptr[r9] ;
|
|
neg rax ;
|
|
neg rdx ;
|
|
sbb rax, 0 ;
|
|
mov qword ptr[r9+8], rax ; x = -original x
|
|
mov qword ptr[r9], rdx ;
|
|
L2: ; Assume dst >= 0, x > 0, rdi = #original negative arguments
|
|
or rax, rax ;
|
|
jne L3 ;
|
|
mov rcx, qword ptr[r9] ;
|
|
mov rax, qword ptr[r8+8] ;
|
|
xor rdx, rdx ;
|
|
div rcx ;
|
|
mov rbx, rax ;
|
|
mov rax, qword ptr[r8] ;
|
|
div rcx ;
|
|
mov rdx, rbx ;
|
|
jmp L4 ;
|
|
L3: ;
|
|
mov rbx, rax ;
|
|
mov rcx, qword ptr[r9] ;
|
|
mov rdx, qword ptr[r8+8] ;
|
|
mov rax, qword ptr[r8] ;
|
|
L5: ;
|
|
shr rbx, 1 ;
|
|
rcr rcx, 1 ;
|
|
shr rdx, 1 ;
|
|
rcr rax, 1 ;
|
|
or rbx, rbx ;
|
|
jne L5 ;
|
|
div rcx ;
|
|
mov rsi, rax ;
|
|
mul qword ptr[r9+8] ;
|
|
mov rcx, rax ;
|
|
mov rax, qword ptr[r9] ;
|
|
mul rsi ;
|
|
add rdx, rcx ;
|
|
jb L6 ;
|
|
cmp rdx, qword ptr[r8+8] ;
|
|
ja L6 ;
|
|
jb L7 ;
|
|
cmp rax, qword ptr[r8] ;
|
|
jbe L7 ;
|
|
L6: ;
|
|
dec rsi ;
|
|
L7: ;
|
|
xor rdx, rdx ;
|
|
mov rax, rsi ;
|
|
L4: ;
|
|
dec rdi ;
|
|
jne L8 ;
|
|
neg rdx ;
|
|
neg rax ;
|
|
sbb rdx, 0 ;
|
|
L8: ;
|
|
pop rsi ;
|
|
pop rdi ;
|
|
pop rbx ;
|
|
mov qword ptr[r8], rax ;
|
|
mov qword ptr[r8+8], rdx ;
|
|
ret ;
|
|
int128div ENDP
|
|
|
|
;void int128rem(_int128 &dst, _int128 &x); do assignop dst %= x; if (x < 0) x = -x; !!
|
|
int128rem PROC
|
|
push rbx ;
|
|
push rdi ;
|
|
mov r8, rcx ; r8 = &dst
|
|
mov r9, rdx ; r9 = &x
|
|
xor rdi, rdi ;
|
|
mov rax, qword ptr[r8+8] ; check sign of dst
|
|
or rax, rax ;
|
|
jge L1 ;
|
|
inc rdi ;
|
|
mov rdx, qword ptr[r8] ; dst is negative. change sign
|
|
neg rax ;
|
|
neg rdx ;
|
|
sbb rax, 0 ;
|
|
mov qword ptr[r8+8], rax ; dst = -original dst
|
|
mov qword ptr[r8], rdx ;
|
|
L1: ; Assume dst>=0, rdi = (original dst < 0)1:0
|
|
mov rax, qword ptr[r9+8] ; check sign of x
|
|
or rax, rax ;
|
|
jge L2 ; if(x >= 0) goto L2
|
|
mov rdx, qword ptr[r9] ; x is negative. Change sign
|
|
neg rax ;
|
|
neg rdx ;
|
|
sbb rax, 0 ;
|
|
mov qword ptr[r9+8], rax ;
|
|
mov qword ptr[r9], rdx ;
|
|
L2: ; Assume dst>=0 and x > 0, (original dst < 0)1:0. dont care about orignal sign of x
|
|
or rax, rax ;
|
|
jne L3 ;
|
|
mov rcx, qword ptr[r9] ;
|
|
mov rax, qword ptr[r8+8] ;
|
|
xor rdx, rdx ;
|
|
div rcx ;
|
|
mov rax, qword ptr[r8] ;
|
|
div rcx ;
|
|
mov rax, rdx ;
|
|
xor rdx, rdx ;
|
|
dec rdi ;
|
|
jns L4 ;
|
|
jmp L8 ;
|
|
L3: ;
|
|
mov rbx, rax ;
|
|
mov rcx, qword ptr[r9] ;
|
|
mov rdx, qword ptr[r8+8] ;
|
|
mov rax, qword ptr[r8] ;
|
|
L5: ;
|
|
shr rbx, 1 ;
|
|
rcr rcx, 1 ;
|
|
shr rdx, 1 ;
|
|
rcr rax, 1 ;
|
|
or rbx, rbx ;
|
|
jne L5 ;
|
|
div rcx ;
|
|
mov rcx, rax ;
|
|
mul qword ptr[r9+8] ;
|
|
xchg rax, rcx ;
|
|
mul qword ptr[r9] ;
|
|
add rdx, rcx ;
|
|
jb L6 ;
|
|
cmp rdx, qword ptr[r8+8] ;
|
|
ja L6 ;
|
|
jb L7 ;
|
|
cmp rax, qword ptr[r8] ;
|
|
jbe L7 ;
|
|
L6: ;
|
|
sub rax, qword ptr[r9] ;
|
|
sbb rdx, qword ptr[r9+8] ;
|
|
L7: ;
|
|
sub rax, qword ptr[r8] ;
|
|
sbb rdx, qword ptr[r8+8] ;
|
|
dec rdi ;
|
|
jns L8 ;
|
|
L4: ;
|
|
neg rdx ;
|
|
neg rax ;
|
|
sbb rdx, 0 ;
|
|
L8: ;
|
|
pop rdi ;
|
|
pop rbx ;
|
|
mov qword ptr[r8], rax ;
|
|
mov qword ptr[r8+8], rdx ;
|
|
ret ;
|
|
int128rem ENDP
|
|
|
|
;void int128neg(_int128 &x); set x = -x;
|
|
int128neg PROC
|
|
mov rax, qword ptr[rcx]
|
|
neg rax
|
|
mov qword ptr[rcx], rax
|
|
mov rax, qword ptr[rcx + 8]
|
|
adc rax, 0
|
|
neg rax
|
|
mov qword ptr[rcx + 8], rax
|
|
ret
|
|
int128neg ENDP
|
|
|
|
;void int128inc(_int128 &x); set x = x + 1;
|
|
int128inc PROC
|
|
add qword ptr[rcx], 1
|
|
adc qword ptr[rcx+8], 0
|
|
ret
|
|
int128inc ENDP
|
|
|
|
;void int128dec(_int128 &x); set x = x - 1;
|
|
int128dec PROC
|
|
sub qword ptr[rcx], 1
|
|
sbb qword ptr[rcx+8], 0
|
|
ret
|
|
int128dec ENDP
|
|
|
|
;void int128shr(int shft, _int128 &x); do assignop x >>= shft; (if(x<0) shift 1-bits in from left, else 0-bits)
|
|
int128shr PROC
|
|
mov rax, qword ptr[rdx+8] ; rax = x.hi
|
|
cmp cl, 40h
|
|
jae More64 ; if(cl >= 64) goto More64;
|
|
sar qword ptr[rdx+8], cl ; shift x.hi
|
|
shrd qword ptr[rdx], rax, cl ; shift x.lo taking new bits from x.hi (rax)
|
|
ret
|
|
More64: ; assume rax = x.hi
|
|
cmp cl, 80h
|
|
jae RetSign ; if(cl >= 128) goto RetSign;
|
|
sar qword ptr[rdx+8], 3Fh ; set all bits in x.hi to sign-bit
|
|
and cl, 3Fh ; cl %= 64
|
|
sar rax, cl ; rax = x.hi >> cl
|
|
mov qword ptr[rdx], rax ; x.lo = rax
|
|
ret
|
|
RetSign: ; assume rax = x.hi
|
|
sar rax,3Fh ; set all bits in rax to sign-bit
|
|
mov qword ptr[rdx], rax
|
|
mov qword ptr[rdx+8], rax
|
|
ret
|
|
int128shr ENDP
|
|
|
|
;void int128shl(int shft, _int128 &x); do assignop x <<= shft;
|
|
int128shl PROC
|
|
cmp cl, 40h
|
|
jae More64 ; if(cl >= 64) goto More64;
|
|
mov rax, qword ptr[rdx] ; rax = x.lo
|
|
shl qword ptr[rdx], cl ; shift x.lo
|
|
shld qword ptr[rdx+8], rax, cl ; shift x.hi taking new bits from x.lo (rax)
|
|
ret
|
|
More64:
|
|
cmp cl, 80h
|
|
jae RetZero ; if(cl >= 128) goto RetZero;
|
|
and cl, 3Fh ; cl %= 64
|
|
mov rax, qword ptr[rdx] ; rax = x.lo
|
|
shl rax, cl ; shift rax (x.lo)
|
|
mov qword ptr[rdx+8], rax ; x.hi = rax
|
|
xor rax, rax
|
|
mov qword ptr[rdx], rax ; x.lo = 0
|
|
ret
|
|
RetZero:
|
|
xor rax, rax ; return 0
|
|
mov qword ptr[rdx], rax
|
|
mov qword ptr[rdx+8], rax
|
|
ret
|
|
int128shl ENDP
|
|
|
|
;int int128cmp(const _int128 &x1, const _int128 &x2); return sign(x1 - x2);
|
|
int128cmp PROC
|
|
mov rax, qword ptr[rcx+8] ; x1.hi
|
|
cmp rax, qword ptr[rdx+8] ; x2.hi
|
|
jl lessthan ; signed compare of x1.hi and x2.hi
|
|
jg greaterthan
|
|
mov rax, qword ptr[rcx] ; x1.lo
|
|
cmp rax, qword ptr[rdx] ; x2.lo
|
|
jb lessthan ; unsigned compare of x1.lo and x2.lo
|
|
ja greaterthan
|
|
xor rax, rax ; they are equal
|
|
ret
|
|
greaterthan:
|
|
mov rax, 1
|
|
ret
|
|
lessthan:
|
|
mov rax, -1
|
|
ret
|
|
int128cmp ENDP
|
|
|
|
;void uint128div(_uint128 &dst, const _uint128 &x); do assignop dst /= x;
|
|
uint128div PROC
|
|
push rbx ; same as signed division
|
|
push rsi ; but without sign check on arguments
|
|
mov r8, rcx ; r8 = &dst
|
|
mov r9, rdx ; r9 = &x
|
|
mov rax, qword ptr[r9+8] ;
|
|
or rax, rax ;
|
|
jne L1 ;
|
|
mov rcx, qword ptr[r9]
|
|
mov rax, qword ptr[r8+8]
|
|
xor rdx, rdx
|
|
div rcx
|
|
mov rbx, rax
|
|
mov rax, qword ptr[r8]
|
|
div rcx
|
|
mov rdx, rbx
|
|
jmp L2
|
|
L1:
|
|
mov rcx, rax
|
|
mov rbx, qword ptr[r9]
|
|
mov rdx, qword ptr[r8+8]
|
|
mov rax, qword ptr[r8]
|
|
L3:
|
|
shr rcx, 1
|
|
rcr rbx, 1
|
|
shr rdx, 1
|
|
rcr rax, 1
|
|
or rcx, rcx
|
|
jne L3
|
|
div rbx
|
|
mov rsi, rax
|
|
mul qword ptr[r9+8]
|
|
mov rcx, rax
|
|
mov rax, qword ptr[r9]
|
|
mul rsi
|
|
add rdx, rcx
|
|
jb L4
|
|
cmp rdx, qword ptr[r8+8]
|
|
ja L4
|
|
jb L5
|
|
cmp rax, qword ptr[r8]
|
|
jbe L5
|
|
L4:
|
|
dec rsi
|
|
L5:
|
|
xor rdx, rdx
|
|
mov rax, rsi
|
|
L2:
|
|
pop rsi
|
|
pop rbx
|
|
mov qword ptr[r8], rax
|
|
mov qword ptr[r8+8], rdx
|
|
ret
|
|
uint128div ENDP
|
|
|
|
; calculates unsigned remainder
|
|
;void uint128rem(_uint128 &dst, const _uint128 &x); do assignop dst %= x;
|
|
uint128rem PROC
|
|
push rbx
|
|
mov r8, rcx
|
|
mov r9, rdx
|
|
mov rax, qword ptr[r9+8]
|
|
or rax, rax
|
|
jne L1
|
|
mov rcx, qword ptr[r9]
|
|
mov rax, qword ptr[r8+8]
|
|
xor rdx, rdx
|
|
div rcx
|
|
mov rax, qword ptr[r8]
|
|
div rcx
|
|
mov rax, rdx
|
|
xor rdx, rdx
|
|
jmp L2
|
|
L1:
|
|
mov rcx, rax
|
|
mov rbx, qword ptr[r9]
|
|
mov rdx, qword ptr[r8+8]
|
|
mov rax, qword ptr[r8]
|
|
L3:
|
|
shr rcx, 1
|
|
rcr rbx, 1
|
|
shr rdx, 1
|
|
rcr rax, 1
|
|
or rcx, rcx
|
|
jne L3
|
|
div rbx
|
|
mov rcx, rax
|
|
mul qword ptr[r9+8]
|
|
xchg rax, rcx
|
|
mul qword ptr[r9]
|
|
add rdx, rcx
|
|
jb L4
|
|
cmp rdx, qword ptr[r8+8]
|
|
ja L4
|
|
jb L5
|
|
cmp rax, qword ptr[r8]
|
|
jbe L5
|
|
L4:
|
|
sub rax, qword ptr[r9]
|
|
sbb rdx, qword ptr[r9+8]
|
|
L5:
|
|
sub rax, qword ptr[r8]
|
|
sbb rdx, qword ptr[r8+8]
|
|
neg rdx
|
|
neg rax
|
|
sbb rdx, 0
|
|
L2:
|
|
pop rbx
|
|
mov qword ptr[r8], rax
|
|
mov qword ptr[r8+8], rdx
|
|
ret
|
|
uint128rem ENDP
|
|
|
|
;void uint128shr(int shft, void *x); do assignop x >>= shft. always shift 0-bits in from left
|
|
uint128shr PROC
|
|
cmp cl, 40h
|
|
jae More64 ; if(cl >= 64) goto More64;
|
|
mov rax, qword ptr[rdx+8] ; rax = x.hi
|
|
shr qword ptr[rdx+8], cl ; shift x.hi
|
|
shrd qword ptr[rdx], rax, cl ; shift x.lo taking new bits from x.hi (rax)
|
|
ret
|
|
More64:
|
|
cmp cl, 80h
|
|
jae RetZero ; if(cl >= 128) goto RetZero;
|
|
and cl, 3Fh ; cl %= 64
|
|
mov rax, qword ptr[rdx+8] ; rax = x.hi
|
|
shr rax, cl ; rax >>= cl
|
|
mov qword ptr[rdx], rax ; x.lo = rax
|
|
xor rax, rax
|
|
mov qword ptr[rdx+8], rax ; x.hi = 0
|
|
ret
|
|
RetZero:
|
|
xor rax, rax ; return 0
|
|
mov qword ptr[rdx], rax
|
|
mov qword ptr[rdx+8], rax
|
|
ret
|
|
uint128shr ENDP
|
|
|
|
;int uint128cmp(const _uint128 &x1, const _uint128 &x2); return sign(x1 - x2);
|
|
uint128cmp PROC
|
|
mov rax, qword ptr[rcx+8] ; x1.hi
|
|
cmp rax, qword ptr[rdx+8] ; x2.hi
|
|
jb lessthan ; unsigned compare of x1.hi and x2.hi
|
|
ja greaterthan
|
|
mov rax, qword ptr[rcx] ; x1.lo
|
|
cmp rax, qword ptr[rdx] ; x2.lo
|
|
jb lessthan ; unsigned compare of x1.lo and x2.lo
|
|
ja greaterthan
|
|
xor rax, rax ; they are equal
|
|
ret
|
|
greaterthan:
|
|
mov rax, 1
|
|
ret
|
|
lessthan:
|
|
mov rax, -1
|
|
ret
|
|
uint128cmp ENDP
|
|
|
|
END
|