; These functions implements the basic operations for _int128 type ; running on 64-bit intel CPU. ; They are (almost) identical to MS's workhorse-functions that do ; __int64-math using 32-bit registers. Every memory reference that uses ; dword ptr(4 byte) are replaced by qword ptr(8 byte), and registernames ; eax, ebx, ecx... are replaced by rax, rbx, rcx etc. ; Parameters are passed in rcx(1st argument) and rdx(2nd argument) and ; return value is in rax, where the runtime-stack is used in x86 mode. ; So the code is modified to reflect this environment ; .CODE ;void int128add(_int128 &dst, const _int128 &x); do assignop dst += x; int128add PROC mov rax, qword ptr[rdx] add qword ptr[rcx], rax mov rax, qword ptr[rdx+8] adc qword ptr[rcx+8], rax ret int128add ENDP ;void int128sub(_int128 &dst, const _int128 &x); do assignop dst -= x; int128sub PROC mov rax, qword ptr[rdx] sub qword ptr[rcx], rax mov rax, qword ptr[rdx+8] sbb qword ptr[rcx+8], rax ret int128sub ENDP ;void int128mul(_int128 &dst, const _int128 &x); do assignop dst *= x; int128mul PROC push rbx mov rax, qword ptr[rdx+8] ; rax = x.hi mov rbx, qword ptr[rcx+8] ; rbx = dst.hi or rbx, rax ; rbx |= x.hi mov rbx, qword ptr[rcx] ; rbx = dst.lo jne Hard ; if(x.hi|dst.hi) goto Hard ; else simple int64 multiplication mov rax, qword ptr[rdx] ; rax = x.lo mul rbx ; rdx:rax = rax * rbx mov qword ptr[rcx] , rax ; dst.lo = rax mov qword ptr[rcx+8], rdx ; dst.hi = rdx pop rbx ret Hard: ; assume rax = x.hi, rbx = dst.lo push rsi mov rsi, rdx ; need rdx for highend of mul, so rsi = &x mul rbx ; rdx:rax = x.hi*dst.lo mov r9 , rax ; r9 = lo(x.hi*dst.lo) mov rax, qword ptr[rsi] ; rax = x.lo mul qword ptr[rcx+8] ; rdx:rax = x.lo*dst.hi add r9, rax ; r9 += lo(x.lo*dst.hi); mov rax, qword ptr[rsi] ; rax = x.lo mul rbx ; rdx:rax = x.lo*dst.lo add rdx, r9 ; rdx += lo(x.hi*dst.lo) + lo(x.lo*dst.hi) mov qword ptr[rcx] , rax mov qword ptr[rcx+8], rdx pop rsi pop rbx ret int128mul ENDP ;void int128div(_int128 &dst, _int128 &x); do assignop dst /= x; if (x < 0) x = -x; !! int128div PROC push rbx ; push rdi ; push rsi ; mov r8, rcx ; r8 = &dst mov r9, rdx ; r9 = &x xor rdi, rdi ; rdi = 0 mov rax, qword ptr[r8+8] ; check sign of dst or rax, rax ; jge L1 ; if(dst >= 0) goto L1 inc rdi ; rdi++ mov rdx, qword ptr[r8] ; dst is negative. Change sign neg rax ; neg rdx ; sbb rax, 0 ; mov qword ptr[r8+8], rax ; dst = -original dst mov qword ptr[r8], rdx ; L1: ; Assume dst >= 0, rdi = (original dst < 0)1:0 mov rax, qword ptr[r9+8] ; check sign of x or rax, rax ; jge L2 ; inc rdi ; x is negative. change sign, and increment rdi mov rdx, qword ptr[r9] ; neg rax ; neg rdx ; sbb rax, 0 ; mov qword ptr[r9+8], rax ; x = -original x mov qword ptr[r9], rdx ; L2: ; Assume dst >= 0, x > 0, rdi = #original negative arguments or rax, rax ; jne L3 ; mov rcx, qword ptr[r9] ; mov rax, qword ptr[r8+8] ; xor rdx, rdx ; div rcx ; mov rbx, rax ; mov rax, qword ptr[r8] ; div rcx ; mov rdx, rbx ; jmp L4 ; L3: ; mov rbx, rax ; mov rcx, qword ptr[r9] ; mov rdx, qword ptr[r8+8] ; mov rax, qword ptr[r8] ; L5: ; shr rbx, 1 ; rcr rcx, 1 ; shr rdx, 1 ; rcr rax, 1 ; or rbx, rbx ; jne L5 ; div rcx ; mov rsi, rax ; mul qword ptr[r9+8] ; mov rcx, rax ; mov rax, qword ptr[r9] ; mul rsi ; add rdx, rcx ; jb L6 ; cmp rdx, qword ptr[r8+8] ; ja L6 ; jb L7 ; cmp rax, qword ptr[r8] ; jbe L7 ; L6: ; dec rsi ; L7: ; xor rdx, rdx ; mov rax, rsi ; L4: ; dec rdi ; jne L8 ; neg rdx ; neg rax ; sbb rdx, 0 ; L8: ; pop rsi ; pop rdi ; pop rbx ; mov qword ptr[r8], rax ; mov qword ptr[r8+8], rdx ; ret ; int128div ENDP ;void int128rem(_int128 &dst, _int128 &x); do assignop dst %= x; if (x < 0) x = -x; !! int128rem PROC push rbx ; push rdi ; mov r8, rcx ; r8 = &dst mov r9, rdx ; r9 = &x xor rdi, rdi ; mov rax, qword ptr[r8+8] ; check sign of dst or rax, rax ; jge L1 ; inc rdi ; mov rdx, qword ptr[r8] ; dst is negative. change sign neg rax ; neg rdx ; sbb rax, 0 ; mov qword ptr[r8+8], rax ; dst = -original dst mov qword ptr[r8], rdx ; L1: ; Assume dst>=0, rdi = (original dst < 0)1:0 mov rax, qword ptr[r9+8] ; check sign of x or rax, rax ; jge L2 ; if(x >= 0) goto L2 mov rdx, qword ptr[r9] ; x is negative. Change sign neg rax ; neg rdx ; sbb rax, 0 ; mov qword ptr[r9+8], rax ; mov qword ptr[r9], rdx ; L2: ; Assume dst>=0 and x > 0, (original dst < 0)1:0. dont care about orignal sign of x or rax, rax ; jne L3 ; mov rcx, qword ptr[r9] ; mov rax, qword ptr[r8+8] ; xor rdx, rdx ; div rcx ; mov rax, qword ptr[r8] ; div rcx ; mov rax, rdx ; xor rdx, rdx ; dec rdi ; jns L4 ; jmp L8 ; L3: ; mov rbx, rax ; mov rcx, qword ptr[r9] ; mov rdx, qword ptr[r8+8] ; mov rax, qword ptr[r8] ; L5: ; shr rbx, 1 ; rcr rcx, 1 ; shr rdx, 1 ; rcr rax, 1 ; or rbx, rbx ; jne L5 ; div rcx ; mov rcx, rax ; mul qword ptr[r9+8] ; xchg rax, rcx ; mul qword ptr[r9] ; add rdx, rcx ; jb L6 ; cmp rdx, qword ptr[r8+8] ; ja L6 ; jb L7 ; cmp rax, qword ptr[r8] ; jbe L7 ; L6: ; sub rax, qword ptr[r9] ; sbb rdx, qword ptr[r9+8] ; L7: ; sub rax, qword ptr[r8] ; sbb rdx, qword ptr[r8+8] ; dec rdi ; jns L8 ; L4: ; neg rdx ; neg rax ; sbb rdx, 0 ; L8: ; pop rdi ; pop rbx ; mov qword ptr[r8], rax ; mov qword ptr[r8+8], rdx ; ret ; int128rem ENDP ;void int128neg(_int128 &x); set x = -x; int128neg PROC mov rax, qword ptr[rcx] neg rax mov qword ptr[rcx], rax mov rax, qword ptr[rcx + 8] adc rax, 0 neg rax mov qword ptr[rcx + 8], rax ret int128neg ENDP ;void int128inc(_int128 &x); set x = x + 1; int128inc PROC add qword ptr[rcx], 1 adc qword ptr[rcx+8], 0 ret int128inc ENDP ;void int128dec(_int128 &x); set x = x - 1; int128dec PROC sub qword ptr[rcx], 1 sbb qword ptr[rcx+8], 0 ret int128dec ENDP ;void int128shr(int shft, _int128 &x); do assignop x >>= shft; (if(x<0) shift 1-bits in from left, else 0-bits) int128shr PROC mov rax, qword ptr[rdx+8] ; rax = x.hi cmp cl, 40h jae More64 ; if(cl >= 64) goto More64; sar qword ptr[rdx+8], cl ; shift x.hi shrd qword ptr[rdx], rax, cl ; shift x.lo taking new bits from x.hi (rax) ret More64: ; assume rax = x.hi cmp cl, 80h jae RetSign ; if(cl >= 128) goto RetSign; sar qword ptr[rdx+8], 3Fh ; set all bits in x.hi to sign-bit and cl, 3Fh ; cl %= 64 sar rax, cl ; rax = x.hi >> cl mov qword ptr[rdx], rax ; x.lo = rax ret RetSign: ; assume rax = x.hi sar rax,3Fh ; set all bits in rax to sign-bit mov qword ptr[rdx], rax mov qword ptr[rdx+8], rax ret int128shr ENDP ;void int128shl(int shft, _int128 &x); do assignop x <<= shft; int128shl PROC cmp cl, 40h jae More64 ; if(cl >= 64) goto More64; mov rax, qword ptr[rdx] ; rax = x.lo shl qword ptr[rdx], cl ; shift x.lo shld qword ptr[rdx+8], rax, cl ; shift x.hi taking new bits from x.lo (rax) ret More64: cmp cl, 80h jae RetZero ; if(cl >= 128) goto RetZero; and cl, 3Fh ; cl %= 64 mov rax, qword ptr[rdx] ; rax = x.lo shl rax, cl ; shift rax (x.lo) mov qword ptr[rdx+8], rax ; x.hi = rax xor rax, rax mov qword ptr[rdx], rax ; x.lo = 0 ret RetZero: xor rax, rax ; return 0 mov qword ptr[rdx], rax mov qword ptr[rdx+8], rax ret int128shl ENDP ;int int128cmp(const _int128 &x1, const _int128 &x2); return sign(x1 - x2); int128cmp PROC mov rax, qword ptr[rcx+8] ; x1.hi cmp rax, qword ptr[rdx+8] ; x2.hi jl lessthan ; signed compare of x1.hi and x2.hi jg greaterthan mov rax, qword ptr[rcx] ; x1.lo cmp rax, qword ptr[rdx] ; x2.lo jb lessthan ; unsigned compare of x1.lo and x2.lo ja greaterthan xor rax, rax ; they are equal ret greaterthan: mov rax, 1 ret lessthan: mov rax, -1 ret int128cmp ENDP ;void uint128div(_uint128 &dst, const _uint128 &x); do assignop dst /= x; uint128div PROC push rbx ; same as signed division push rsi ; but without sign check on arguments mov r8, rcx ; r8 = &dst mov r9, rdx ; r9 = &x mov rax, qword ptr[r9+8] ; or rax, rax ; jne L1 ; mov rcx, qword ptr[r9] mov rax, qword ptr[r8+8] xor rdx, rdx div rcx mov rbx, rax mov rax, qword ptr[r8] div rcx mov rdx, rbx jmp L2 L1: mov rcx, rax mov rbx, qword ptr[r9] mov rdx, qword ptr[r8+8] mov rax, qword ptr[r8] L3: shr rcx, 1 rcr rbx, 1 shr rdx, 1 rcr rax, 1 or rcx, rcx jne L3 div rbx mov rsi, rax mul qword ptr[r9+8] mov rcx, rax mov rax, qword ptr[r9] mul rsi add rdx, rcx jb L4 cmp rdx, qword ptr[r8+8] ja L4 jb L5 cmp rax, qword ptr[r8] jbe L5 L4: dec rsi L5: xor rdx, rdx mov rax, rsi L2: pop rsi pop rbx mov qword ptr[r8], rax mov qword ptr[r8+8], rdx ret uint128div ENDP ; calculates unsigned remainder ;void uint128rem(_uint128 &dst, const _uint128 &x); do assignop dst %= x; uint128rem PROC push rbx mov r8, rcx mov r9, rdx mov rax, qword ptr[r9+8] or rax, rax jne L1 mov rcx, qword ptr[r9] mov rax, qword ptr[r8+8] xor rdx, rdx div rcx mov rax, qword ptr[r8] div rcx mov rax, rdx xor rdx, rdx jmp L2 L1: mov rcx, rax mov rbx, qword ptr[r9] mov rdx, qword ptr[r8+8] mov rax, qword ptr[r8] L3: shr rcx, 1 rcr rbx, 1 shr rdx, 1 rcr rax, 1 or rcx, rcx jne L3 div rbx mov rcx, rax mul qword ptr[r9+8] xchg rax, rcx mul qword ptr[r9] add rdx, rcx jb L4 cmp rdx, qword ptr[r8+8] ja L4 jb L5 cmp rax, qword ptr[r8] jbe L5 L4: sub rax, qword ptr[r9] sbb rdx, qword ptr[r9+8] L5: sub rax, qword ptr[r8] sbb rdx, qword ptr[r8+8] neg rdx neg rax sbb rdx, 0 L2: pop rbx mov qword ptr[r8], rax mov qword ptr[r8+8], rdx ret uint128rem ENDP ;void uint128shr(int shft, void *x); do assignop x >>= shft. always shift 0-bits in from left uint128shr PROC cmp cl, 40h jae More64 ; if(cl >= 64) goto More64; mov rax, qword ptr[rdx+8] ; rax = x.hi shr qword ptr[rdx+8], cl ; shift x.hi shrd qword ptr[rdx], rax, cl ; shift x.lo taking new bits from x.hi (rax) ret More64: cmp cl, 80h jae RetZero ; if(cl >= 128) goto RetZero; and cl, 3Fh ; cl %= 64 mov rax, qword ptr[rdx+8] ; rax = x.hi shr rax, cl ; rax >>= cl mov qword ptr[rdx], rax ; x.lo = rax xor rax, rax mov qword ptr[rdx+8], rax ; x.hi = 0 ret RetZero: xor rax, rax ; return 0 mov qword ptr[rdx], rax mov qword ptr[rdx+8], rax ret uint128shr ENDP ;int uint128cmp(const _uint128 &x1, const _uint128 &x2); return sign(x1 - x2); uint128cmp PROC mov rax, qword ptr[rcx+8] ; x1.hi cmp rax, qword ptr[rdx+8] ; x2.hi jb lessthan ; unsigned compare of x1.hi and x2.hi ja greaterthan mov rax, qword ptr[rcx] ; x1.lo cmp rax, qword ptr[rdx] ; x2.lo jb lessthan ; unsigned compare of x1.lo and x2.lo ja greaterthan xor rax, rax ; they are equal ret greaterthan: mov rax, 1 ret lessthan: mov rax, -1 ret uint128cmp ENDP END