Patrick Kelley 8fd444092b initial
2025-05-07 15:35:15 -04:00

490 lines
19 KiB
NASM

; These functions implements the basic operations for _int128 type
; running on 64-bit intel CPU.
; They are (almost) identical to MS's workhorse-functions that do
; __int64-math using 32-bit registers. Every memory reference that uses
; dword ptr(4 byte) are replaced by qword ptr(8 byte), and registernames
; eax, ebx, ecx... are replaced by rax, rbx, rcx etc.
; Parameters are passed in rcx(1st argument) and rdx(2nd argument) and
; return value is in rax, where the runtime-stack is used in x86 mode.
; So the code is modified to reflect this environment
;
.CODE
;void int128add(_int128 &dst, const _int128 &x); do assignop dst += x;
int128add PROC
mov rax, qword ptr[rdx]
add qword ptr[rcx], rax
mov rax, qword ptr[rdx+8]
adc qword ptr[rcx+8], rax
ret
int128add ENDP
;void int128sub(_int128 &dst, const _int128 &x); do assignop dst -= x;
int128sub PROC
mov rax, qword ptr[rdx]
sub qword ptr[rcx], rax
mov rax, qword ptr[rdx+8]
sbb qword ptr[rcx+8], rax
ret
int128sub ENDP
;void int128mul(_int128 &dst, const _int128 &x); do assignop dst *= x;
int128mul PROC
push rbx
mov rax, qword ptr[rdx+8] ; rax = x.hi
mov rbx, qword ptr[rcx+8] ; rbx = dst.hi
or rbx, rax ; rbx |= x.hi
mov rbx, qword ptr[rcx] ; rbx = dst.lo
jne Hard ; if(x.hi|dst.hi) goto Hard
; else simple int64 multiplication
mov rax, qword ptr[rdx] ; rax = x.lo
mul rbx ; rdx:rax = rax * rbx
mov qword ptr[rcx] , rax ; dst.lo = rax
mov qword ptr[rcx+8], rdx ; dst.hi = rdx
pop rbx
ret
Hard: ; assume rax = x.hi, rbx = dst.lo
push rsi
mov rsi, rdx ; need rdx for highend of mul, so rsi = &x
mul rbx ; rdx:rax = x.hi*dst.lo
mov r9 , rax ; r9 = lo(x.hi*dst.lo)
mov rax, qword ptr[rsi] ; rax = x.lo
mul qword ptr[rcx+8] ; rdx:rax = x.lo*dst.hi
add r9, rax ; r9 += lo(x.lo*dst.hi);
mov rax, qword ptr[rsi] ; rax = x.lo
mul rbx ; rdx:rax = x.lo*dst.lo
add rdx, r9 ; rdx += lo(x.hi*dst.lo) + lo(x.lo*dst.hi)
mov qword ptr[rcx] , rax
mov qword ptr[rcx+8], rdx
pop rsi
pop rbx
ret
int128mul ENDP
;void int128div(_int128 &dst, _int128 &x); do assignop dst /= x; if (x < 0) x = -x; !!
int128div PROC
push rbx ;
push rdi ;
push rsi ;
mov r8, rcx ; r8 = &dst
mov r9, rdx ; r9 = &x
xor rdi, rdi ; rdi = 0
mov rax, qword ptr[r8+8] ; check sign of dst
or rax, rax ;
jge L1 ; if(dst >= 0) goto L1
inc rdi ; rdi++
mov rdx, qword ptr[r8] ; dst is negative. Change sign
neg rax ;
neg rdx ;
sbb rax, 0 ;
mov qword ptr[r8+8], rax ; dst = -original dst
mov qword ptr[r8], rdx ;
L1: ; Assume dst >= 0, rdi = (original dst < 0)1:0
mov rax, qword ptr[r9+8] ; check sign of x
or rax, rax ;
jge L2 ;
inc rdi ; x is negative. change sign, and increment rdi
mov rdx, qword ptr[r9] ;
neg rax ;
neg rdx ;
sbb rax, 0 ;
mov qword ptr[r9+8], rax ; x = -original x
mov qword ptr[r9], rdx ;
L2: ; Assume dst >= 0, x > 0, rdi = #original negative arguments
or rax, rax ;
jne L3 ;
mov rcx, qword ptr[r9] ;
mov rax, qword ptr[r8+8] ;
xor rdx, rdx ;
div rcx ;
mov rbx, rax ;
mov rax, qword ptr[r8] ;
div rcx ;
mov rdx, rbx ;
jmp L4 ;
L3: ;
mov rbx, rax ;
mov rcx, qword ptr[r9] ;
mov rdx, qword ptr[r8+8] ;
mov rax, qword ptr[r8] ;
L5: ;
shr rbx, 1 ;
rcr rcx, 1 ;
shr rdx, 1 ;
rcr rax, 1 ;
or rbx, rbx ;
jne L5 ;
div rcx ;
mov rsi, rax ;
mul qword ptr[r9+8] ;
mov rcx, rax ;
mov rax, qword ptr[r9] ;
mul rsi ;
add rdx, rcx ;
jb L6 ;
cmp rdx, qword ptr[r8+8] ;
ja L6 ;
jb L7 ;
cmp rax, qword ptr[r8] ;
jbe L7 ;
L6: ;
dec rsi ;
L7: ;
xor rdx, rdx ;
mov rax, rsi ;
L4: ;
dec rdi ;
jne L8 ;
neg rdx ;
neg rax ;
sbb rdx, 0 ;
L8: ;
pop rsi ;
pop rdi ;
pop rbx ;
mov qword ptr[r8], rax ;
mov qword ptr[r8+8], rdx ;
ret ;
int128div ENDP
;void int128rem(_int128 &dst, _int128 &x); do assignop dst %= x; if (x < 0) x = -x; !!
int128rem PROC
push rbx ;
push rdi ;
mov r8, rcx ; r8 = &dst
mov r9, rdx ; r9 = &x
xor rdi, rdi ;
mov rax, qword ptr[r8+8] ; check sign of dst
or rax, rax ;
jge L1 ;
inc rdi ;
mov rdx, qword ptr[r8] ; dst is negative. change sign
neg rax ;
neg rdx ;
sbb rax, 0 ;
mov qword ptr[r8+8], rax ; dst = -original dst
mov qword ptr[r8], rdx ;
L1: ; Assume dst>=0, rdi = (original dst < 0)1:0
mov rax, qword ptr[r9+8] ; check sign of x
or rax, rax ;
jge L2 ; if(x >= 0) goto L2
mov rdx, qword ptr[r9] ; x is negative. Change sign
neg rax ;
neg rdx ;
sbb rax, 0 ;
mov qword ptr[r9+8], rax ;
mov qword ptr[r9], rdx ;
L2: ; Assume dst>=0 and x > 0, (original dst < 0)1:0. dont care about orignal sign of x
or rax, rax ;
jne L3 ;
mov rcx, qword ptr[r9] ;
mov rax, qword ptr[r8+8] ;
xor rdx, rdx ;
div rcx ;
mov rax, qword ptr[r8] ;
div rcx ;
mov rax, rdx ;
xor rdx, rdx ;
dec rdi ;
jns L4 ;
jmp L8 ;
L3: ;
mov rbx, rax ;
mov rcx, qword ptr[r9] ;
mov rdx, qword ptr[r8+8] ;
mov rax, qword ptr[r8] ;
L5: ;
shr rbx, 1 ;
rcr rcx, 1 ;
shr rdx, 1 ;
rcr rax, 1 ;
or rbx, rbx ;
jne L5 ;
div rcx ;
mov rcx, rax ;
mul qword ptr[r9+8] ;
xchg rax, rcx ;
mul qword ptr[r9] ;
add rdx, rcx ;
jb L6 ;
cmp rdx, qword ptr[r8+8] ;
ja L6 ;
jb L7 ;
cmp rax, qword ptr[r8] ;
jbe L7 ;
L6: ;
sub rax, qword ptr[r9] ;
sbb rdx, qword ptr[r9+8] ;
L7: ;
sub rax, qword ptr[r8] ;
sbb rdx, qword ptr[r8+8] ;
dec rdi ;
jns L8 ;
L4: ;
neg rdx ;
neg rax ;
sbb rdx, 0 ;
L8: ;
pop rdi ;
pop rbx ;
mov qword ptr[r8], rax ;
mov qword ptr[r8+8], rdx ;
ret ;
int128rem ENDP
;void int128neg(_int128 &x); set x = -x;
int128neg PROC
mov rax, qword ptr[rcx]
neg rax
mov qword ptr[rcx], rax
mov rax, qword ptr[rcx + 8]
adc rax, 0
neg rax
mov qword ptr[rcx + 8], rax
ret
int128neg ENDP
;void int128inc(_int128 &x); set x = x + 1;
int128inc PROC
add qword ptr[rcx], 1
adc qword ptr[rcx+8], 0
ret
int128inc ENDP
;void int128dec(_int128 &x); set x = x - 1;
int128dec PROC
sub qword ptr[rcx], 1
sbb qword ptr[rcx+8], 0
ret
int128dec ENDP
;void int128shr(int shft, _int128 &x); do assignop x >>= shft; (if(x<0) shift 1-bits in from left, else 0-bits)
int128shr PROC
mov rax, qword ptr[rdx+8] ; rax = x.hi
cmp cl, 40h
jae More64 ; if(cl >= 64) goto More64;
sar qword ptr[rdx+8], cl ; shift x.hi
shrd qword ptr[rdx], rax, cl ; shift x.lo taking new bits from x.hi (rax)
ret
More64: ; assume rax = x.hi
cmp cl, 80h
jae RetSign ; if(cl >= 128) goto RetSign;
sar qword ptr[rdx+8], 3Fh ; set all bits in x.hi to sign-bit
and cl, 3Fh ; cl %= 64
sar rax, cl ; rax = x.hi >> cl
mov qword ptr[rdx], rax ; x.lo = rax
ret
RetSign: ; assume rax = x.hi
sar rax,3Fh ; set all bits in rax to sign-bit
mov qword ptr[rdx], rax
mov qword ptr[rdx+8], rax
ret
int128shr ENDP
;void int128shl(int shft, _int128 &x); do assignop x <<= shft;
int128shl PROC
cmp cl, 40h
jae More64 ; if(cl >= 64) goto More64;
mov rax, qword ptr[rdx] ; rax = x.lo
shl qword ptr[rdx], cl ; shift x.lo
shld qword ptr[rdx+8], rax, cl ; shift x.hi taking new bits from x.lo (rax)
ret
More64:
cmp cl, 80h
jae RetZero ; if(cl >= 128) goto RetZero;
and cl, 3Fh ; cl %= 64
mov rax, qword ptr[rdx] ; rax = x.lo
shl rax, cl ; shift rax (x.lo)
mov qword ptr[rdx+8], rax ; x.hi = rax
xor rax, rax
mov qword ptr[rdx], rax ; x.lo = 0
ret
RetZero:
xor rax, rax ; return 0
mov qword ptr[rdx], rax
mov qword ptr[rdx+8], rax
ret
int128shl ENDP
;int int128cmp(const _int128 &x1, const _int128 &x2); return sign(x1 - x2);
int128cmp PROC
mov rax, qword ptr[rcx+8] ; x1.hi
cmp rax, qword ptr[rdx+8] ; x2.hi
jl lessthan ; signed compare of x1.hi and x2.hi
jg greaterthan
mov rax, qword ptr[rcx] ; x1.lo
cmp rax, qword ptr[rdx] ; x2.lo
jb lessthan ; unsigned compare of x1.lo and x2.lo
ja greaterthan
xor rax, rax ; they are equal
ret
greaterthan:
mov rax, 1
ret
lessthan:
mov rax, -1
ret
int128cmp ENDP
;void uint128div(_uint128 &dst, const _uint128 &x); do assignop dst /= x;
uint128div PROC
push rbx ; same as signed division
push rsi ; but without sign check on arguments
mov r8, rcx ; r8 = &dst
mov r9, rdx ; r9 = &x
mov rax, qword ptr[r9+8] ;
or rax, rax ;
jne L1 ;
mov rcx, qword ptr[r9]
mov rax, qword ptr[r8+8]
xor rdx, rdx
div rcx
mov rbx, rax
mov rax, qword ptr[r8]
div rcx
mov rdx, rbx
jmp L2
L1:
mov rcx, rax
mov rbx, qword ptr[r9]
mov rdx, qword ptr[r8+8]
mov rax, qword ptr[r8]
L3:
shr rcx, 1
rcr rbx, 1
shr rdx, 1
rcr rax, 1
or rcx, rcx
jne L3
div rbx
mov rsi, rax
mul qword ptr[r9+8]
mov rcx, rax
mov rax, qword ptr[r9]
mul rsi
add rdx, rcx
jb L4
cmp rdx, qword ptr[r8+8]
ja L4
jb L5
cmp rax, qword ptr[r8]
jbe L5
L4:
dec rsi
L5:
xor rdx, rdx
mov rax, rsi
L2:
pop rsi
pop rbx
mov qword ptr[r8], rax
mov qword ptr[r8+8], rdx
ret
uint128div ENDP
; calculates unsigned remainder
;void uint128rem(_uint128 &dst, const _uint128 &x); do assignop dst %= x;
uint128rem PROC
push rbx
mov r8, rcx
mov r9, rdx
mov rax, qword ptr[r9+8]
or rax, rax
jne L1
mov rcx, qword ptr[r9]
mov rax, qword ptr[r8+8]
xor rdx, rdx
div rcx
mov rax, qword ptr[r8]
div rcx
mov rax, rdx
xor rdx, rdx
jmp L2
L1:
mov rcx, rax
mov rbx, qword ptr[r9]
mov rdx, qword ptr[r8+8]
mov rax, qword ptr[r8]
L3:
shr rcx, 1
rcr rbx, 1
shr rdx, 1
rcr rax, 1
or rcx, rcx
jne L3
div rbx
mov rcx, rax
mul qword ptr[r9+8]
xchg rax, rcx
mul qword ptr[r9]
add rdx, rcx
jb L4
cmp rdx, qword ptr[r8+8]
ja L4
jb L5
cmp rax, qword ptr[r8]
jbe L5
L4:
sub rax, qword ptr[r9]
sbb rdx, qword ptr[r9+8]
L5:
sub rax, qword ptr[r8]
sbb rdx, qword ptr[r8+8]
neg rdx
neg rax
sbb rdx, 0
L2:
pop rbx
mov qword ptr[r8], rax
mov qword ptr[r8+8], rdx
ret
uint128rem ENDP
;void uint128shr(int shft, void *x); do assignop x >>= shft. always shift 0-bits in from left
uint128shr PROC
cmp cl, 40h
jae More64 ; if(cl >= 64) goto More64;
mov rax, qword ptr[rdx+8] ; rax = x.hi
shr qword ptr[rdx+8], cl ; shift x.hi
shrd qword ptr[rdx], rax, cl ; shift x.lo taking new bits from x.hi (rax)
ret
More64:
cmp cl, 80h
jae RetZero ; if(cl >= 128) goto RetZero;
and cl, 3Fh ; cl %= 64
mov rax, qword ptr[rdx+8] ; rax = x.hi
shr rax, cl ; rax >>= cl
mov qword ptr[rdx], rax ; x.lo = rax
xor rax, rax
mov qword ptr[rdx+8], rax ; x.hi = 0
ret
RetZero:
xor rax, rax ; return 0
mov qword ptr[rdx], rax
mov qword ptr[rdx+8], rax
ret
uint128shr ENDP
;int uint128cmp(const _uint128 &x1, const _uint128 &x2); return sign(x1 - x2);
uint128cmp PROC
mov rax, qword ptr[rcx+8] ; x1.hi
cmp rax, qword ptr[rdx+8] ; x2.hi
jb lessthan ; unsigned compare of x1.hi and x2.hi
ja greaterthan
mov rax, qword ptr[rcx] ; x1.lo
cmp rax, qword ptr[rdx] ; x2.lo
jb lessthan ; unsigned compare of x1.lo and x2.lo
ja greaterthan
xor rax, rax ; they are equal
ret
greaterthan:
mov rax, 1
ret
lessthan:
mov rax, -1
ret
uint128cmp ENDP
END