1839 lines
27 KiB
NASM
1839 lines
27 KiB
NASM
|
|
.code
|
|
|
|
; rcx - n
|
|
; n*(n + 1)/2
|
|
triangle_number PROC
|
|
mov rax, rcx ; rax = n
|
|
add rax, 1 ; rax = n + 1
|
|
mul rcx ; rax = (n + 1)*n
|
|
shr rax, 1 ; rax = (n + 1)*n/2
|
|
ret
|
|
triangle_number ENDP
|
|
|
|
; rcx - n
|
|
; n*(2n + 1)*(n + 1)/6
|
|
sum_of_squares PROC
|
|
; rdx = 2n + 1
|
|
lea rdx, [1 + rcx*2]
|
|
|
|
; r8 = n + 1
|
|
lea r8, [1 + rcx]
|
|
|
|
; rax = n*(2n + 1)*(n + 1)
|
|
mov rax, rcx
|
|
imul rax, rdx
|
|
imul rax, r8
|
|
|
|
; rax = n*(2n + 1)*(n + 1)/6
|
|
xor rdx,rdx
|
|
mov rcx,6
|
|
idiv rcx
|
|
|
|
ret
|
|
sum_of_squares ENDP
|
|
|
|
|
|
; rcx - dst, rdx - src
|
|
fibonacci_stepper_mul PROC
|
|
; rax = n1*k1
|
|
mov rax, [rcx]
|
|
imul rax, [rdx]
|
|
|
|
; r8 = n2*k2
|
|
mov r8, [rcx + 8]
|
|
imul r8, [rdx + 8]
|
|
|
|
; r9 = n1*k2
|
|
mov r9, [rcx]
|
|
imul r9, [rdx + 8]
|
|
|
|
; p1 = n1*k1 + n2*k2 = (rax + r8)
|
|
mov [rcx], rax
|
|
add [rcx], r8
|
|
|
|
; rax = n2*(k1 + k2)
|
|
mov rax, [rdx]
|
|
add rax, [rdx + 8]
|
|
imul rax, [rcx + 8]
|
|
|
|
; p2 = n1*k2 + n2*(k1 + k2) = (r9 + rax)
|
|
mov [rcx + 8], r9
|
|
add [rcx + 8], rax
|
|
|
|
ret
|
|
fibonacci_stepper_mul ENDP
|
|
|
|
; rcx - dst
|
|
fibonacci_stepper_sqr PROC
|
|
mov rax, [rcx] ; rax = n1
|
|
mov r8, [rcx + 8] ; r8 = n2
|
|
mov r9, rax
|
|
imul r9, r8 ; r9 = n1*n2
|
|
imul rax, rax ; rax = n1*n1
|
|
imul r8, r8 ; r8 = n2*n2
|
|
|
|
; p1 = n1*n1 + n2*n2
|
|
add rax, r8 ; rax = n1*n1 + n2*n2
|
|
mov [rcx], rax
|
|
|
|
; p2 = 2*n1*n2 + n2*n2
|
|
lea rax, [r8 + r9*2] ; rax = 2*n1*n2 + n2*n2
|
|
mov [rcx + 8], rax
|
|
ret
|
|
fibonacci_stepper_sqr ENDP
|
|
|
|
; rcx - n
|
|
fibonacci_number PROC FRAME
|
|
mov qword ptr [rsp + 8],rsi
|
|
.savereg rsi, 8
|
|
sub rsp, 64
|
|
.allocstack 64
|
|
.endprolog
|
|
|
|
mov qword ptr [rsp + 32], 1 ; a = [1, 0]
|
|
mov qword ptr [rsp + 40], 0
|
|
mov qword ptr [rsp + 48], 0 ; s = [0, 1]
|
|
mov qword ptr [rsp + 56], 1
|
|
|
|
; relocate n
|
|
mov rsi, rcx
|
|
|
|
; if n == 0 goto done;
|
|
test rsi, rsi
|
|
jz done
|
|
|
|
loop0:
|
|
; if (n&1) == 0 goto skip_accumulator;
|
|
test rsi, 1
|
|
jz skip_accumulator
|
|
|
|
; mul(a,s)
|
|
lea rcx, [rsp + 32]
|
|
lea rdx, [rsp + 48]
|
|
call fibonacci_stepper_mul
|
|
|
|
skip_accumulator:
|
|
; n = n >> 1
|
|
shr rsi, 1
|
|
|
|
; if n == 0 goto done;
|
|
jz done
|
|
|
|
; sqr(s)
|
|
lea rcx, [rsp + 48]
|
|
call fibonacci_stepper_sqr
|
|
|
|
; goto loop0;
|
|
jmp loop0
|
|
|
|
done:
|
|
mov rax, [rsp + 40]
|
|
add rsp, 64
|
|
mov rsi, qword ptr [rsp + 8]
|
|
ret
|
|
fibonacci_number ENDP
|
|
|
|
; rcx - n
|
|
; fib(n + 2) - 1
|
|
fibonacci_sigma PROC FRAME
|
|
sub rsp,32
|
|
.allocstack 32
|
|
.endprolog
|
|
|
|
add rcx, 2
|
|
call fibonacci_number
|
|
dec rax
|
|
|
|
add rsp,32
|
|
ret
|
|
fibonacci_sigma ENDP
|
|
|
|
; rcx - buffer (U64*)
|
|
; rdx - n (U64)
|
|
; r8 - m (U64)
|
|
; assumptions:
|
|
; buffer points to at least n + 1 U64s worth of free memory
|
|
; m <= n
|
|
choose__asm PROC
|
|
; m' = n - m
|
|
mov r9,rdx
|
|
sub r9,r8
|
|
|
|
; r = min(m, m'), c = max(m, m')
|
|
; r8 contains, r9 contains c
|
|
mov rax,r8
|
|
cmp r8,r9
|
|
cmova r8,r9
|
|
cmova r9,rax
|
|
|
|
; if (r == 0) goto return_1;
|
|
test r8,r8
|
|
jz return_1
|
|
|
|
; if (r == 1) goto return_n;
|
|
cmp r8,1
|
|
je return_n
|
|
|
|
; if (r & 1) goto r_odd_case;
|
|
test r8,1
|
|
jnz r_odd_case
|
|
|
|
|
|
;; EVEN CASE ;;
|
|
; i = 2
|
|
mov rax,2
|
|
|
|
; k = 6
|
|
mov r11,6
|
|
|
|
loop0_even:
|
|
; buffer[i] = k
|
|
mov qword ptr [rcx + rax*8],r11
|
|
; i += 1
|
|
inc rax
|
|
|
|
; k += i + 1
|
|
inc r11
|
|
add r11,rax
|
|
|
|
; if (i <= c) goto loop0_even;
|
|
cmp rax,r9
|
|
jbe loop0_even
|
|
|
|
; row = 4
|
|
mov rax,4
|
|
|
|
; goto loop1
|
|
jmp loop1
|
|
|
|
|
|
;; ODD CASE ;;
|
|
r_odd_case:
|
|
; i = 3
|
|
mov rax,3
|
|
|
|
; minor_slot = 10
|
|
mov rdx,10
|
|
|
|
; k = 20
|
|
mov r11,20
|
|
|
|
loop0_odd:
|
|
; buffer[i] = k
|
|
mov qword ptr [rcx + rax*8],r11
|
|
; i += 1
|
|
inc rax
|
|
|
|
; minor_slot += i + 1
|
|
inc rdx
|
|
add rdx,rax
|
|
|
|
; k += minor_slot
|
|
add r11,rdx
|
|
|
|
; if (i <= c) goto loop0_odd;
|
|
cmp rax,r9
|
|
jbe loop0_odd
|
|
|
|
; row = 5
|
|
mov rax,5
|
|
|
|
loop1:
|
|
; if (row > r) goto return_buffer_c
|
|
cmp rax,r8
|
|
ja return_buffer_c
|
|
|
|
; col = row
|
|
mov r10,rax
|
|
|
|
;; initialize the loop
|
|
mov r11,qword ptr [rcx + r10*8 - 8]
|
|
shl r11,1
|
|
add r11,qword ptr [rcx + r10*8]
|
|
mov rdx,r11
|
|
shl rdx,1
|
|
|
|
; buffer[col] = major_slot
|
|
mov qword ptr [rcx + r10*8],rdx
|
|
|
|
loop2:
|
|
; col += 1
|
|
inc r10
|
|
|
|
; if (col > c) goto loop2_done;
|
|
cmp r10,r9
|
|
ja loop2_done
|
|
|
|
; minor_slot += buffer[col]
|
|
add r11,qword ptr [rcx + r10*8]
|
|
; major_slot += minor_slot
|
|
add rdx,r11
|
|
; buffer[col] = major_slot
|
|
mov qword ptr [rcx + r10*8],rdx
|
|
|
|
; goto loop2
|
|
jmp loop2
|
|
|
|
loop2_done:
|
|
|
|
; row += 2
|
|
add rax,2
|
|
|
|
; goto loop1
|
|
jmp loop1
|
|
|
|
return_buffer_c:
|
|
mov rax,qword ptr [rcx + r9*8]
|
|
ret
|
|
|
|
return_1:
|
|
mov rax,1
|
|
ret
|
|
|
|
return_n:
|
|
mov rax,rdx
|
|
ret
|
|
choose__asm ENDP
|
|
|
|
|
|
; rcx - table (U64*)
|
|
; rdx - count (U64)
|
|
fill_factorial_table PROC
|
|
; i = 0
|
|
xor rax,rax
|
|
; n = 1
|
|
mov r8,1
|
|
|
|
;
|
|
loop0:
|
|
; if (i >= count) goto done
|
|
cmp rax,rdx
|
|
jae done
|
|
|
|
; table[i] = n
|
|
mov [rcx + rax*8],r8
|
|
; i += 1
|
|
; n *= i + 1
|
|
add rax,2
|
|
imul r8,rax
|
|
dec rax
|
|
|
|
; goto loop0
|
|
jmp loop0
|
|
|
|
done:
|
|
ret
|
|
fill_factorial_table ENDP
|
|
|
|
|
|
; rcx - table_memory (B8*)
|
|
; rdx - primes_list (U32*)
|
|
; r8d - first (U32)
|
|
; r9d - opl (U32)
|
|
; [rsp + 0x28] - node (ListNode_U32*)
|
|
prime_sieve__asm PROC FRAME
|
|
push r15
|
|
.pushreg r15
|
|
push r14
|
|
.pushreg r14
|
|
push r13
|
|
.pushreg r13
|
|
push r12
|
|
.pushreg r12
|
|
sub rsp,16
|
|
.allocstack 16
|
|
.endprolog
|
|
|
|
; extend first and opl to 64-bits
|
|
mov r8d,r8d
|
|
mov r9d,r9d
|
|
|
|
; save primes list at [rsp]
|
|
mov [rsp],rdx
|
|
|
|
; save counter at [rsp + 1]
|
|
mov qword ptr [rsp+8],0
|
|
|
|
; move node to r15
|
|
mov r15, [rsp + 58h]
|
|
|
|
; if (node != 0) goto skip_emit_2;
|
|
test r15,r15
|
|
jnz skip_emit_2;
|
|
|
|
; emit 2
|
|
mov dword ptr [rdx],2
|
|
mov qword ptr [rsp+8],1
|
|
skip_emit_2:
|
|
|
|
; M' = (first even)?(first + 1):(first);
|
|
or r8d,1
|
|
|
|
; move opl into r12
|
|
mov r12,r9
|
|
|
|
; ibound = (opl - M' + 1)/2
|
|
sub r9,r8
|
|
inc r9
|
|
shr r9,1
|
|
|
|
; table_memory_bound = table_memory + ibound
|
|
mov r13,rcx
|
|
add r13,r9
|
|
|
|
mixing0:
|
|
; if (node == 0) goto done_mixing;
|
|
test r15,r15
|
|
jz done_mixing
|
|
|
|
; ptr = node->array.v
|
|
mov r14, [r15 + 8]
|
|
; ptr_opl = node->array.v + node->array.count
|
|
mov r10, [r15 + 16]
|
|
lea r10, [r14 + r10*4]
|
|
|
|
mixing1:
|
|
; if (ptr >= ptr_opl) goto mixing0_next;
|
|
cmp r14,r10
|
|
jae mixing0_next
|
|
|
|
; p = *ptr
|
|
mov r11d, dword ptr [r14]
|
|
|
|
; if (p == 2) goto mixing1_next;
|
|
cmp r11,2
|
|
je mixing1_next;
|
|
|
|
; if (p*p >= opl) goto done_mixing;
|
|
mov rax,r11
|
|
imul rax,rax
|
|
cmp rax,r12
|
|
jae done_mixing
|
|
|
|
; k = floor((M' + p - 1)/p)
|
|
mov rax,r8
|
|
add rax,r11
|
|
dec rax
|
|
xor rdx,rdx
|
|
idiv r11
|
|
|
|
; if (k < p) k = p;
|
|
cmp rax,r11
|
|
cmovl rax,r11
|
|
|
|
; if (k even) k += 1;
|
|
or rax,1
|
|
|
|
; x = k*p
|
|
imul rax,r11
|
|
|
|
; j = (x - M')/2
|
|
sub rax,r8
|
|
shr rax,1
|
|
|
|
; ptr = table_memory + j
|
|
add rax, rcx
|
|
|
|
mixing2:
|
|
; if (ptr >= table_memory_bound) goto mixing1_next;
|
|
cmp rax,r13
|
|
jae mixing1_next;
|
|
|
|
; mark x in the table
|
|
mov byte ptr [rax],1
|
|
|
|
; j += p, goto mixing2
|
|
add rax,r11
|
|
jmp mixing2
|
|
|
|
mixing1_next:
|
|
; increment r14
|
|
add r14,4
|
|
jmp mixing1
|
|
|
|
mixing0_next:
|
|
; node = node->next
|
|
mov r15,[r15]
|
|
jmp mixing0
|
|
|
|
done_mixing:
|
|
|
|
; retrieve base of primes_list
|
|
mov rdx,[rsp]
|
|
|
|
; retrieve counter
|
|
mov rax,[rsp+8]
|
|
|
|
; ptr = table_memory
|
|
mov r12, rcx
|
|
|
|
scanning0:
|
|
; mark = *ptr
|
|
movzx r15, byte ptr [r12]
|
|
|
|
; if (mark != 0) goto scanning0_next;
|
|
test r15,r15
|
|
jnz scanning0_next;
|
|
|
|
; j = ptr - table_memory
|
|
mov r14, r12
|
|
sub r14, rcx
|
|
|
|
; p = M' + 2*j
|
|
lea r14, [r8 + r14*2]
|
|
|
|
; emit p
|
|
mov dword ptr [rdx + rax*4], r14d
|
|
inc rax
|
|
|
|
; j' = (p*p - M')/2
|
|
mov r10,r14
|
|
imul r10,r10
|
|
sub r10,r8
|
|
shr r10,1
|
|
|
|
; ptr' = table_memory + j'
|
|
add r10, rcx
|
|
|
|
scanning1:
|
|
; if (ptr' >= table_memory_bound) goto scanning0_next;
|
|
cmp r10,r13
|
|
jae scanning0_next;
|
|
|
|
; *(ptr') = 1
|
|
mov byte ptr [r10], 1
|
|
|
|
; ptr' += p
|
|
add r10,r14
|
|
jmp scanning1
|
|
|
|
scanning0_next:
|
|
; ptr += 1
|
|
inc r12
|
|
|
|
; if (ptr < table_memory_bound) goto scanning0;
|
|
cmp r12,r13
|
|
jb scanning0
|
|
|
|
add rsp,16
|
|
pop r12
|
|
pop r13
|
|
pop r14
|
|
pop r15
|
|
ret
|
|
prime_sieve__asm ENDP
|
|
|
|
|
|
; rcx - table_memory (U32*)
|
|
; rdx - count (U32)
|
|
; assumptions:
|
|
; rcx is a pointer to 4*edx bytes, cleared to zero
|
|
factorization_table__asm PROC
|
|
; zero extend count to 64-bit
|
|
mov edx,edx
|
|
|
|
; i = 0
|
|
xor rax,rax
|
|
|
|
loop0:
|
|
; if (i >= count) goto done;
|
|
cmp rax,rdx
|
|
jae done
|
|
|
|
; f = table_memory[i]
|
|
mov r9d, dword ptr [rcx + rax*4]
|
|
|
|
; if (f != 0) goto loop0_next
|
|
test r9,r9
|
|
jnz loop0_next
|
|
|
|
; n = 3 + 2*i
|
|
lea r8, [3 + rax*2]
|
|
|
|
; table_memory[i] = n
|
|
mov dword ptr [rcx + rax*4], r8d
|
|
|
|
; r9 = n*n
|
|
mov r9,r8
|
|
imul r9,r9
|
|
|
|
; j = (n*n - 3)/2
|
|
sub r9,3
|
|
shr r9,1
|
|
|
|
loop1:
|
|
; if (j >= count) goto loop0_next;
|
|
cmp r9,rdx
|
|
jae loop0_next
|
|
|
|
; table_memory[j] = n
|
|
mov dword ptr [rcx + r9*4], r8d
|
|
|
|
; increment
|
|
add r9,r8
|
|
jmp loop1
|
|
|
|
loop0_next:
|
|
; increment
|
|
inc rax
|
|
jmp loop0
|
|
|
|
done:
|
|
ret
|
|
factorization_table__asm ENDP
|
|
|
|
|
|
; rcx - fact_tbl (U32*)
|
|
; edx - count (U32)
|
|
; r8d - n (U32)
|
|
get_prime_factor PROC
|
|
; if (n even) goto return_2;
|
|
test r8d,1
|
|
jz return_2
|
|
|
|
; if (n == 1) goto return_1;
|
|
cmp r8d,1
|
|
je return_1
|
|
|
|
; j = (n - 3)/2
|
|
sub r8d,3
|
|
shr r8d,1
|
|
|
|
; if (j >= count) goto return_0;
|
|
cmp r8d,edx
|
|
jae return_0
|
|
|
|
; f = fact_tbl[j]
|
|
mov eax, dword ptr [rcx + r8*4]
|
|
ret
|
|
|
|
return_2:
|
|
mov rax,2
|
|
ret
|
|
|
|
return_1:
|
|
mov rax,1
|
|
ret
|
|
|
|
return_0:
|
|
xor rax,rax
|
|
ret
|
|
get_prime_factor ENDP
|
|
|
|
|
|
; ecx - n
|
|
; edx - f
|
|
max_divide PROC
|
|
|
|
; move f into r8
|
|
mov r8d,edx
|
|
; r = 0
|
|
xor r9d,r9d
|
|
|
|
loop0:
|
|
; compute n/f and n%f
|
|
xor edx,edx
|
|
mov eax,ecx
|
|
idiv r8d
|
|
|
|
; if ((remainder) != 0) goto done;
|
|
test edx,edx
|
|
jnz done
|
|
|
|
; r += 1
|
|
inc r9d
|
|
|
|
; n = (quotient)
|
|
mov ecx,eax
|
|
|
|
jmp loop0
|
|
|
|
done:
|
|
mov eax,ecx
|
|
shl rax,32
|
|
or rax,r9
|
|
ret
|
|
max_divide ENDP
|
|
|
|
|
|
; rcx - fact_tbl (U32*)
|
|
; edx - count (U32)
|
|
; r8d - n (U32)
|
|
divisor_count PROC FRAME
|
|
push rsi
|
|
.pushreg rsi
|
|
push rdi
|
|
.pushreg rdi
|
|
push r12
|
|
.pushreg r12
|
|
push r13
|
|
.pushreg r13
|
|
sub rsp,32
|
|
.allocstack 32
|
|
.endprolog
|
|
|
|
; move fact_tbl to rsi
|
|
mov rsi, rcx
|
|
|
|
; move count to edi
|
|
mov edi, edx
|
|
|
|
; move n to r12d
|
|
mov r12d, r8d
|
|
|
|
; accum = 1
|
|
mov r13d,1
|
|
|
|
loop0:
|
|
; if (n <= 1) goto done;
|
|
cmp r12d,1
|
|
jbe done
|
|
|
|
; call get_prime_factor(fact_tbl, count, n)
|
|
mov rcx, rsi
|
|
mov edx, edi
|
|
mov r8d, r12d
|
|
call get_prime_factor
|
|
|
|
; f is in rax
|
|
|
|
; call max_divide(n, f)
|
|
mov ecx, r12d
|
|
mov edx, eax
|
|
call max_divide
|
|
|
|
; {r,n'} are in rax
|
|
|
|
; accum *= (r + 1)
|
|
mov ecx,eax
|
|
inc ecx
|
|
imul r13d,ecx
|
|
|
|
; n = n'
|
|
shr rax,32
|
|
mov r12d,eax
|
|
|
|
jmp loop0
|
|
|
|
done:
|
|
|
|
mov eax,r13d
|
|
|
|
add rsp,32
|
|
pop r13
|
|
pop r12
|
|
pop rdi
|
|
pop rsi
|
|
ret
|
|
divisor_count ENDP
|
|
|
|
; rcx - fact_tbl (U32*)
|
|
; edx - count (U32)
|
|
; r8d - n (U32)
|
|
divisor_sum PROC FRAME
|
|
push rsi
|
|
.pushreg rsi
|
|
sub rsp,32
|
|
.allocstack 32
|
|
.endprolog
|
|
|
|
; save params
|
|
mov qword ptr [rsp + 48],rcx
|
|
mov dword ptr [rsp + 56],edx
|
|
mov dword ptr [rsp + 64],r8d
|
|
|
|
; accum = 1
|
|
mov esi,1
|
|
|
|
loop0:
|
|
; recover n from stack
|
|
mov r8d, dword ptr [rsp + 64]
|
|
|
|
; if (n <= 1) goto done0
|
|
cmp r8d,1
|
|
jbe done0
|
|
|
|
; call get_prime_factor(fact_tbl, count, n)
|
|
mov rcx, qword ptr [rsp + 48]
|
|
mov edx, dword ptr [rsp + 56]
|
|
; r8d is already set to n
|
|
call get_prime_factor
|
|
|
|
; move p to rcx
|
|
mov rcx,rax
|
|
|
|
; zero rdx
|
|
xor rdx,rdx
|
|
|
|
; move n to rax
|
|
mov eax, dword ptr [rsp + 64]
|
|
|
|
; n,r = n div p
|
|
div rcx
|
|
|
|
; p_power = p
|
|
mov r8,rcx
|
|
|
|
; p_sum = 1 + p
|
|
lea r9,[1 + rcx]
|
|
|
|
loop1:
|
|
; good_n = n
|
|
mov r10d,eax
|
|
|
|
; n,r = n div p
|
|
div rcx
|
|
|
|
; if (r != 0) goto done1
|
|
test rdx,rdx
|
|
jnz done1
|
|
|
|
; p_power *= p
|
|
imul r8,rcx
|
|
|
|
; p_sum += p_power
|
|
add r9,r8
|
|
|
|
jmp loop1
|
|
|
|
done1:
|
|
; move n back to stack memory
|
|
mov dword ptr [rsp + 64],r10d
|
|
|
|
; accum *= p_sum
|
|
imul rsi,r9
|
|
|
|
jmp loop0
|
|
|
|
done0:
|
|
mov rax,rsi
|
|
|
|
add rsp,32
|
|
pop rsi
|
|
ret
|
|
divisor_sum ENDP
|
|
|
|
|
|
; rcx - return pointer (Pair_U64*)
|
|
; rdx - min (U64)
|
|
; r8 - max (U64)
|
|
; r9 - target_product (U64)
|
|
; assumptions:
|
|
; rdx < r8
|
|
; rdx*rdx <= target_product <= max*max
|
|
find_bounded_factors PROC
|
|
; clear result to zero
|
|
mov qword ptr [rcx], 0
|
|
mov qword ptr [rcx+8], 0
|
|
|
|
; move min to r11
|
|
mov r11, rdx
|
|
|
|
; product = min*max
|
|
mov r10, r11
|
|
imul r10, r8
|
|
|
|
; if (product == target_product) goto return_min_max;
|
|
; if (product < target_product) goto too_small;
|
|
; goto too_big
|
|
cmp r10, r9
|
|
jz return_min_max
|
|
jb too_small
|
|
jmp too_big
|
|
|
|
too_small:
|
|
; increase min so that:
|
|
; min*max >= target_product & (min - 1)*max < target_product
|
|
; min >= target_product/max & min < target_product/max + 1
|
|
; target_product/max <= min < target_product/max + 1
|
|
; min <- ceil(target_product/max)
|
|
|
|
; q,r = target_product div max
|
|
xor rdx,rdx
|
|
mov rax, r9
|
|
idiv r8
|
|
|
|
; if (r == 0) goto too_small0;
|
|
; goto too_small1;
|
|
test rdx,rdx
|
|
jz too_small0
|
|
|
|
too_small1:
|
|
; min = floor((0:target_product)/max)
|
|
mov r11, rax
|
|
inc r11
|
|
|
|
; if (min > max) goto return;
|
|
cmp r11, r8
|
|
ja return
|
|
jmp too_big
|
|
|
|
too_small0:
|
|
; min = floor((0:target_product)/max)
|
|
mov r11, rax
|
|
|
|
; if (min > max) goto return;
|
|
cmp r11, r8
|
|
ja return
|
|
jmp return_min_max
|
|
|
|
too_big:
|
|
; decrease max so that:
|
|
; min*max <= target_product & min*(max + 1) > target_product
|
|
; max <= target_product/min & max > (target_product/min) - 1
|
|
; (target_product/min) - 1 < max <= target_product/min
|
|
; max <- floor(target_product/min)
|
|
|
|
; max = floor((0:target_product)/min)
|
|
xor rdx,rdx
|
|
mov rax, r9
|
|
idiv r11
|
|
mov r8, rax
|
|
|
|
; if (min > max) goto return;
|
|
cmp r11, r8
|
|
ja return
|
|
|
|
; if ((0:target_product) == 0) goto return_min_max;
|
|
; goto too_small;
|
|
test rdx, rdx
|
|
jz return_min_max
|
|
jmp too_small
|
|
|
|
return_min_max:
|
|
mov [rcx], r11
|
|
mov [rcx+8], r8
|
|
|
|
return:
|
|
mov rax, rcx
|
|
ret
|
|
find_bounded_factors ENDP
|
|
|
|
|
|
; rcx - A (U64)
|
|
; rdx - B (U64)
|
|
gcd_euclidean PROC
|
|
; transfer B into r8
|
|
mov r8, rdx
|
|
|
|
; if (A <= B) goto loop0;
|
|
cmp rcx,r8
|
|
jbe loop0
|
|
; swap
|
|
mov r8, rcx
|
|
mov rcx, rdx
|
|
|
|
loop0:
|
|
; if (A == 0) goto return
|
|
test rcx,rcx
|
|
jz return
|
|
|
|
; euclidean_step
|
|
mov rax, r8
|
|
xor rdx, rdx
|
|
idiv rcx
|
|
mov r8, rcx
|
|
mov rcx, rdx
|
|
|
|
jmp loop0
|
|
|
|
return:
|
|
mov rax, r8
|
|
ret
|
|
gcd_euclidean ENDP
|
|
|
|
|
|
; rcx - A (U64)
|
|
; rdx - B (U64)
|
|
lcm_euclidean PROC FRAME
|
|
mov qword ptr [rsp + 8],rsi
|
|
.savereg rsi,8
|
|
sub rsp,32
|
|
.allocstack 32
|
|
.endprolog
|
|
|
|
; rsi = A*B
|
|
mov rsi,rcx
|
|
imul rsi,rdx
|
|
|
|
; call gcd(...)
|
|
call gcd_euclidean
|
|
|
|
; rcx = gcd(...)
|
|
mov rcx,rax
|
|
|
|
; rax = A*B/gcd(...)
|
|
mov rax, rsi
|
|
xor rdx, rdx
|
|
idiv rcx
|
|
|
|
add rsp,32
|
|
mov rsi,qword ptr [rsp + 8]
|
|
ret
|
|
lcm_euclidean ENDP
|
|
|
|
; rcx - d (U64)
|
|
dec_repeating_cycle_score PROC
|
|
; if (d == 0) goto return_0;
|
|
test rcx,rcx
|
|
jz return_0
|
|
|
|
;; divide out all factors of 2
|
|
loop2:
|
|
; if (d & 1) goto loop2_done;
|
|
test rcx,1
|
|
jnz loop2_done
|
|
|
|
; d /= 2
|
|
shr rcx,1
|
|
jmp loop2
|
|
|
|
loop2_done:
|
|
|
|
;; divide out all factors of 5
|
|
xor rdx,rdx
|
|
mov rax,rcx
|
|
loop5:
|
|
mov rcx,rax
|
|
|
|
; d /= 5
|
|
mov r8,5
|
|
div r8
|
|
|
|
; if (remainder != 0) goto loop5_done
|
|
test rdx,rdx
|
|
jnz loop5_done
|
|
jmp loop5
|
|
|
|
loop5_done:
|
|
|
|
; if (d == 1) goto return_0;
|
|
cmp rcx,1
|
|
je return_0
|
|
|
|
; s = 1
|
|
mov r8,1
|
|
; n = 10
|
|
mov r9,10
|
|
|
|
loop9:
|
|
; m = n - 1
|
|
mov rax,r9
|
|
dec rax
|
|
|
|
; m div d
|
|
xor rdx,rdx
|
|
div rcx
|
|
|
|
; if (remainder == 0) goto loop9_done
|
|
test rdx,rdx
|
|
jz loop9_done
|
|
|
|
; s += 1
|
|
inc r8
|
|
|
|
; if (s >= 18) assert
|
|
cmp r8,18
|
|
jb normal
|
|
int 3
|
|
|
|
normal:
|
|
; n *= 10
|
|
imul r9,10
|
|
|
|
jmp loop9
|
|
|
|
loop9_done:
|
|
mov rax,r8
|
|
ret
|
|
|
|
return_0:
|
|
xor rax,rax
|
|
ret
|
|
dec_repeating_cycle_score ENDP
|
|
|
|
|
|
; rcx - return pointer (EulerData*)
|
|
; rdx - text (String8*)
|
|
; r8 - out (U8*)
|
|
euler_data_from_text__asm PROC
|
|
; store og return pointer for later
|
|
mov [rsp + 8],rcx
|
|
|
|
; result.v = out
|
|
mov [rcx],r8
|
|
|
|
; ptr = text.str
|
|
mov rax,[rdx]
|
|
|
|
; opl = ptr + text.size
|
|
mov rdx,[rdx + 8]
|
|
lea rdx,[rax + rdx]
|
|
|
|
; line_count = 0
|
|
xor r10,r10
|
|
|
|
loop0:
|
|
; if (ptr >= opl) goto done0;
|
|
cmp rax,rdx
|
|
jae done0
|
|
|
|
; c = *ptr
|
|
movzx rcx, byte ptr [rax]
|
|
|
|
; if (c == '\n') goto inc_line_counter;
|
|
cmp rcx,0ah
|
|
je inc_line_counter
|
|
|
|
; d = c - '0'
|
|
sub rcx,'0'
|
|
|
|
; if (d <= 9) goto process_digit;
|
|
cmp rcx,9
|
|
jbe process_digit
|
|
|
|
jmp loop0_next
|
|
|
|
inc_line_counter:
|
|
; line_count += 1
|
|
inc r10
|
|
jmp loop0_next
|
|
|
|
process_digit:
|
|
; *out_ptr = d, out_ptr += 1
|
|
mov [r8],rcx
|
|
inc r8
|
|
|
|
loop0_next:
|
|
; increment (ptr += 1, goto loop0)
|
|
inc rax
|
|
jmp loop0
|
|
|
|
done0:
|
|
|
|
mov rax,[rsp + 8]
|
|
|
|
; result.count = out_ptr - out;
|
|
mov rdx,[rax]
|
|
sub r8,rdx
|
|
mov [rax + 8],r8
|
|
|
|
; result.line_count = line_count;
|
|
mov [rax + 16],r10
|
|
|
|
ret
|
|
euler_data_from_text__asm ENDP
|
|
|
|
|
|
; rcx - return pointer (EulerData*)
|
|
; rdx - text (String8*)
|
|
; r8 - out (U8*)
|
|
euler_data_from_text_2dig__asm PROC
|
|
; store og return pointer for later
|
|
mov [rsp + 8],rcx
|
|
|
|
; result.v = out
|
|
mov [rcx],r8
|
|
|
|
; ptr = text.str
|
|
mov rax,[rdx]
|
|
|
|
; opl = ptr + text.size
|
|
mov rdx,[rdx + 8]
|
|
lea rdx,[rax + rdx]
|
|
|
|
; line_count = 0
|
|
xor r10,r10
|
|
|
|
; dig_count = 0
|
|
xor r9,r9
|
|
|
|
; accum = 0
|
|
xor r11,r11
|
|
|
|
loop0:
|
|
; if (ptr >= opl) goto done0;
|
|
cmp rax,rdx
|
|
jae done0
|
|
|
|
; c = *ptr
|
|
movzx rcx, byte ptr [rax]
|
|
|
|
; if (c == '\n') goto inc_line_counter;
|
|
cmp rcx,0ah
|
|
je inc_line_counter
|
|
|
|
; d = c - '0'
|
|
sub rcx,'0'
|
|
|
|
; if (d <= 9) goto process_digit;
|
|
cmp rcx,9
|
|
jbe process_digit
|
|
|
|
jmp loop0_next
|
|
|
|
inc_line_counter:
|
|
; line_count += 1
|
|
inc r10
|
|
jmp loop0_next
|
|
|
|
process_digit:
|
|
; accum *= 10
|
|
imul r11,10
|
|
|
|
; accum += d
|
|
add r11,rcx
|
|
|
|
; dig_count += 1
|
|
inc r9
|
|
|
|
; if (dig_count != 2) goto loop0_next
|
|
cmp r9,2
|
|
jne loop0_next
|
|
|
|
; *out_ptr = accum, out_ptr += 1
|
|
mov [r8],r11
|
|
inc r8
|
|
|
|
; accum = 0, dig_count = 0
|
|
xor r11,r11
|
|
xor r9,r9
|
|
|
|
loop0_next:
|
|
; increment (ptr += 1, goto loop0)
|
|
inc rax
|
|
jmp loop0
|
|
|
|
done0:
|
|
|
|
mov rax,[rsp + 8]
|
|
|
|
; result.count = out_ptr - out;
|
|
mov rdx,[rax]
|
|
sub r8,rdx
|
|
mov [rax + 8],r8
|
|
|
|
; result.line_count = line_count;
|
|
mov [rax + 16],r10
|
|
|
|
ret
|
|
euler_data_from_text_2dig__asm ENDP
|
|
|
|
; rcx - result pointer (U64x3*)
|
|
; rdx - a (U64x3*)
|
|
; r8 - b (U64x3*)
|
|
add_u64x3 PROC
|
|
; c0,r0 = a0 + b0
|
|
xor r9,r9
|
|
mov rax, qword ptr [rdx]
|
|
add rax, qword ptr [r8]
|
|
setc r9b
|
|
|
|
; result[0] = r0
|
|
mov qword ptr [rcx], rax
|
|
|
|
; /c1,/r1 = a1 + b1
|
|
xor r10,r10
|
|
mov rax, qword ptr [rdx + 8]
|
|
add rax, qword ptr [r8 + 8]
|
|
setc r10b
|
|
|
|
; ^c1,r1 = /r1 + c0
|
|
xor r11,r11
|
|
add rax, r9
|
|
setc r11b
|
|
|
|
; result[1] = r1
|
|
mov qword ptr [rcx + 8], rax
|
|
|
|
; c1 = ^c1 | /c1
|
|
or r10,r11
|
|
|
|
; /r2 = a2 + b2
|
|
; r2 = /r2 + c1
|
|
mov rax, qword ptr [rdx + 16]
|
|
add rax, qword ptr [r8 + 16]
|
|
add rax, r10
|
|
|
|
; result[2] = r2
|
|
mov qword ptr [rcx + 16], rax
|
|
|
|
; return the result pointer
|
|
mov rax, rcx
|
|
ret
|
|
add_u64x3 ENDP
|
|
|
|
; rcx - result pointer (U64x3*)
|
|
; rdx - a (U64x3*)
|
|
; r8 - b (U64)
|
|
add_small_u64x3 PROC
|
|
; c0,r0 = a0 + b
|
|
xor r9,r9
|
|
mov rax, qword ptr [rdx]
|
|
add rax, r8
|
|
setc r9b
|
|
|
|
; result[0] = r0
|
|
mov qword ptr [rcx], rax
|
|
|
|
; /r1 = a1
|
|
mov rax, qword ptr [rdx + 8]
|
|
; c1,r1 = /r1 + c0
|
|
xor r10,r10
|
|
add rax, r9
|
|
setc r10b
|
|
; result[1] = r1
|
|
mov qword ptr [rcx + 8], rax
|
|
|
|
; /r2 = a2
|
|
mov rax, qword ptr [rdx + 16]
|
|
; r2 = /r2 + c1
|
|
add rax, r10
|
|
; result[2] = r2
|
|
mov qword ptr [rcx + 16], rax
|
|
|
|
; return the result pointer
|
|
mov rax, rcx
|
|
ret
|
|
add_small_u64x3 ENDP
|
|
|
|
; rcx - result pointer (U64x3*)
|
|
; rdx - a (U64x3*)
|
|
; r8 - b (U64)
|
|
mul_small_u64x3 PROC
|
|
|
|
; move a to r9
|
|
mov r9,rdx
|
|
|
|
; c0,r0 = a0*b (c0:rdx, r0:rax)
|
|
mov rax, qword ptr [r9]
|
|
mul r8
|
|
|
|
; result[0] = r0
|
|
mov qword ptr [rcx], rax
|
|
|
|
; move c0 to r10
|
|
mov r10, rdx
|
|
|
|
; /c1,/r1 = a1*b (/c1:rdx, /r1:rax)
|
|
mov rax, qword ptr [r9 + 8]
|
|
mul r8
|
|
|
|
; ^c1,r1 = /r1 + c0
|
|
xor r11,r11
|
|
add rax,r10
|
|
setc r11b
|
|
|
|
; result[1] = r1
|
|
mov qword ptr [rcx + 8], rax
|
|
|
|
; c1 = /c1 + ^c1
|
|
add r11, rdx
|
|
|
|
; /r2 = a2*b
|
|
mov rax, qword ptr [r9 + 16]
|
|
mul r8
|
|
|
|
; r2 = /r2 + c1
|
|
add rax,r11
|
|
|
|
; result[2] = r2
|
|
mov qword ptr [rcx + 16], rax
|
|
|
|
; return the result pointer
|
|
mov rax, rcx
|
|
ret
|
|
mul_small_u64x3 ENDP
|
|
|
|
; rcx - result pointer (U64x3_DivR*)
|
|
; rdx - n (U64x3*)
|
|
; r8 - d (U64)
|
|
div_small_u64x3 PROC
|
|
; move n into r9
|
|
mov r9,rdx
|
|
|
|
; (q2,r2) = n2 div d
|
|
mov rax, qword ptr [r9 + 16]
|
|
xor rdx,rdx
|
|
div r8
|
|
|
|
; result[2] = q2
|
|
mov qword ptr [rcx + 16], rax
|
|
|
|
; (q1,r1) = r2:n1 div d
|
|
mov rax, qword ptr [r9 + 8]
|
|
;; note: move r2 into high bits is a noop (rdx->rdx)
|
|
div r8
|
|
|
|
; result[1] = q1
|
|
mov qword ptr [rcx + 8], rax
|
|
|
|
; (q0,r0) = r1:n0 div d
|
|
mov rax, qword ptr [r9]
|
|
;; note: move r2 into high bits is a noop (rdx->rdx)
|
|
div r8
|
|
|
|
; result[0] = q0
|
|
mov qword ptr [rcx], rax
|
|
|
|
; result.r = r0
|
|
mov qword ptr [rcx + 24], rdx
|
|
|
|
; return the result pointer
|
|
mov rax,rcx
|
|
ret
|
|
div_small_u64x3 ENDP
|
|
|
|
|
|
; rcx - result pointer (U64x3*)
|
|
; rdx - digits (U8*)
|
|
; r8 - count (U64)
|
|
u64x3_from_dec PROC FRAME
|
|
push r12
|
|
.pushreg r12
|
|
push r13
|
|
.pushreg r13
|
|
push r14
|
|
.pushreg r14
|
|
push rbx
|
|
.pushreg rbx
|
|
sub rsp,32
|
|
.allocstack 32
|
|
.endprolog
|
|
|
|
; clear result to zer0
|
|
mov qword ptr [rcx],0
|
|
mov qword ptr [rcx + 8],0
|
|
mov qword ptr [rcx + 16],0
|
|
|
|
; move result pointer to rbx
|
|
mov rbx,rcx
|
|
|
|
; ptr = digits
|
|
mov r12, rdx
|
|
|
|
; opl = ptr + count
|
|
mov r13,r12
|
|
add r13,r8
|
|
|
|
loop0_begin:
|
|
; if (ptr >= opl) goto done
|
|
cmp r12,r13
|
|
jae done
|
|
|
|
; x = 0
|
|
xor r14,r14
|
|
; m = 1
|
|
mov r8,1
|
|
|
|
loop0:
|
|
; x *= 10, x += *ptr
|
|
imul r14,10
|
|
movzx rax, byte ptr [r12]
|
|
add r14, rax
|
|
|
|
; m *= 10
|
|
imul r8,10
|
|
|
|
; ptr += 1
|
|
inc r12
|
|
|
|
; if (m >= 1000000000000000000) goto loop1
|
|
mov rdx,1000000000000000000
|
|
cmp r8,rdx
|
|
jae loop1
|
|
|
|
; if (ptr >= opl) goto loop1
|
|
cmp r12,r13
|
|
jae loop1
|
|
|
|
; goto loop0
|
|
jmp loop0
|
|
|
|
loop1:
|
|
; call mul_small_u64x3(result pointer, result pointer, m)
|
|
mov rcx,rbx
|
|
mov rdx,rbx
|
|
;mov r8,r8
|
|
call mul_small_u64x3
|
|
|
|
; call add_small_u64x3(result pointer, result pointer, x)
|
|
mov rcx,rbx
|
|
mov rdx,rbx
|
|
mov r8,r14
|
|
call add_small_u64x3
|
|
|
|
jmp loop0_begin
|
|
|
|
done:
|
|
|
|
; return result pointer
|
|
mov rax, rbx
|
|
|
|
add rsp,32
|
|
pop rbx
|
|
pop r14
|
|
pop r13
|
|
pop r12
|
|
ret
|
|
u64x3_from_dec ENDP
|
|
|
|
|
|
; rcx - buffer (U8*)
|
|
; rdx - x (U64x3*)
|
|
dec_from_u64x3__asm PROC FRAME
|
|
push r12
|
|
.pushreg r12
|
|
push r13
|
|
.pushreg r13
|
|
push r14
|
|
.pushreg r14
|
|
sub rsp,64
|
|
.allocstack 64
|
|
.endprolog
|
|
|
|
; move buffer into r12
|
|
mov r12,rcx
|
|
|
|
; ptr = buffer
|
|
mov r13,r12
|
|
|
|
; move x to rsp + 32 "by value"
|
|
mov rax,[rdx]
|
|
mov [rsp + 32],rax
|
|
mov rax,[rdx + 8]
|
|
mov [rsp + 40],rax
|
|
mov rax,[rdx + 16]
|
|
mov [rsp + 48],rax
|
|
|
|
loop0:
|
|
|
|
; if (x[0] == 0 && x[1] == 0 && x[2] == 0) goto finish;
|
|
mov rax,[rsp + 32]
|
|
or rax,[rsp + 40]
|
|
or rax,[rsp + 48]
|
|
test rax,rax
|
|
jz finish
|
|
|
|
; call div_small_u64x3(div-result, x, 10)
|
|
lea rcx,[rsp + 32]
|
|
lea rdx,[rsp + 32]
|
|
mov r8,10
|
|
call div_small_u64x3
|
|
|
|
; d = div_result.r
|
|
mov rcx,[rsp + 56]
|
|
|
|
; *ptr = d
|
|
mov byte ptr [r13], cl
|
|
|
|
; ptr += 1
|
|
inc r13
|
|
|
|
; goto loop0
|
|
jmp loop0
|
|
|
|
finish:
|
|
; set return value to (ptr - buffer)
|
|
mov rax,r13
|
|
sub rax,r12
|
|
|
|
; ptra = ptr
|
|
; ptrb = buffer
|
|
; (ptra: r12, ptrb: r13)
|
|
; ptrb -= 1
|
|
dec r13
|
|
reverse_loop:
|
|
; if (ptra >= ptrb) goto done
|
|
cmp r12, r13
|
|
jae done
|
|
|
|
; swap
|
|
mov dl, byte ptr [r12]
|
|
mov cl, byte ptr [r13]
|
|
mov byte ptr [r12], cl
|
|
mov byte ptr [r13], dl
|
|
|
|
; ptra += 1, ptrb -= 1
|
|
inc r12
|
|
dec r13
|
|
|
|
; goto reverse_loop
|
|
jmp reverse_loop
|
|
|
|
done:
|
|
|
|
add rsp,64
|
|
pop r14
|
|
pop r13
|
|
pop r12
|
|
ret
|
|
dec_from_u64x3__asm ENDP
|
|
|
|
; rcx - a (U64xN*)
|
|
; rdx - b (U64)
|
|
; assumption: a has an extra slot of memory available
|
|
add_small_in_place_u64xn PROC
|
|
; size = a.size
|
|
mov r11, [rcx]
|
|
|
|
; if (size == 0) goto simple_case;
|
|
test r11,r11
|
|
jz simple_case
|
|
|
|
; c,r0 = a[0] + b
|
|
xor r9,r9
|
|
mov rax, qword ptr [rcx + 8]
|
|
add rax, rdx
|
|
setc r9b
|
|
|
|
; a[0] = r0
|
|
mov qword ptr [rcx + 8], rax
|
|
|
|
; ptr = &a[1]
|
|
lea rdx, [rcx + 16]
|
|
; opl = &a[size]
|
|
lea r8, [8 + rcx + r11*8]
|
|
|
|
; *opl = 0
|
|
mov qword ptr [r8],0
|
|
|
|
loop0:
|
|
; if (c == 0) goto loop0_done
|
|
test r9b,r9b
|
|
jz loop0_done
|
|
|
|
; c',r = *ptr + c
|
|
xor r10,r10
|
|
mov rax, qword ptr [rdx]
|
|
add rax, r9
|
|
setc r10b
|
|
|
|
; *ptr = r
|
|
mov qword ptr [rdx], rax
|
|
|
|
; ptr += 1
|
|
add rdx,8
|
|
|
|
; c = c'
|
|
mov r9,r10
|
|
|
|
jmp loop0
|
|
|
|
loop0_done:
|
|
; if (*opl == 0) goto skip_inc_size;
|
|
mov rax, [r8]
|
|
test rax,rax
|
|
jz skip_inc_size
|
|
inc r11
|
|
mov [rcx],r11
|
|
|
|
skip_inc_size:
|
|
ret
|
|
|
|
simple_case:
|
|
mov qword ptr [rcx], 1
|
|
mov [rcx + 8], rdx
|
|
ret
|
|
add_small_in_place_u64xn ENDP
|
|
|
|
; rcx - a (U64xN*)
|
|
; rdx - b (U64)
|
|
; assumption: a has an extra slot of memory available
|
|
mul_small_in_place_u64xn PROC FRAME
|
|
mov [rsp + 8],r12
|
|
.savereg r12, 8
|
|
.endprolog
|
|
|
|
; move b into r9
|
|
mov r9,rdx
|
|
|
|
; f = 0
|
|
xor r8,r8
|
|
|
|
; i = 0
|
|
xor r10,r10
|
|
|
|
; save a.size in r12
|
|
mov r12,[rcx]
|
|
|
|
loop0:
|
|
; if (i >= a.size) goto done
|
|
cmp r10,r12
|
|
jae done
|
|
|
|
; l,h = a[i]*b
|
|
mov rax, qword ptr [rcx + r10*8 + 8]
|
|
mul r9
|
|
|
|
; c,r = l + f
|
|
xor r11,r11
|
|
add rax,r8
|
|
setc r11b
|
|
|
|
; a[i] = r
|
|
mov qword ptr [rcx + r10*8 + 8], rax
|
|
|
|
; f = h + c
|
|
mov r8,r11
|
|
add r8,rdx
|
|
|
|
; i += 1
|
|
inc r10
|
|
|
|
; goto loop0
|
|
jmp loop0
|
|
|
|
done:
|
|
; if (f == 0) goto return;
|
|
test r8,r8
|
|
jz return
|
|
|
|
; a[a.size] = f
|
|
mov qword ptr [rcx + r12*8 + 8], r8
|
|
|
|
; a.size += 1
|
|
inc r12
|
|
mov qword ptr [rcx],r12
|
|
|
|
return:
|
|
mov r12,[rsp + 8]
|
|
ret
|
|
mul_small_in_place_u64xn ENDP
|
|
|
|
; rcx - n (U64xN*)
|
|
; rdx - d (U64)
|
|
div_small_in_place_u64xn PROC
|
|
; move d into r8
|
|
mov r8,rdx
|
|
|
|
; r = 0
|
|
xor rdx,rdx
|
|
|
|
; i = n.size
|
|
mov r9, qword ptr [rcx]
|
|
|
|
loop0:
|
|
; if (i == 0) goto done;
|
|
test r9,r9
|
|
jz done
|
|
|
|
; i -= 1
|
|
dec r9
|
|
|
|
; q,r = r:n[i] div d
|
|
mov rax, qword ptr [rcx + r9*8 + 8]
|
|
div r8
|
|
|
|
; n[i] = q
|
|
mov qword ptr [rcx + r9*8 + 8], rax
|
|
|
|
; goto loop0
|
|
jmp loop0
|
|
|
|
done:
|
|
|
|
; if (n.size > 0 && n[n.size - 1] == 0) n.size -= 1
|
|
mov r9, qword ptr [rcx]
|
|
test r9,r9
|
|
jz return
|
|
mov r8, qword ptr [rcx + r9*8]
|
|
test r8,r8
|
|
jnz return
|
|
|
|
dec r9
|
|
mov qword ptr [rcx], r9
|
|
|
|
return:
|
|
; set result value to last remainder
|
|
mov rax,rdx
|
|
ret
|
|
div_small_in_place_u64xn ENDP
|
|
|
|
; rcx - buffer (U8*)
|
|
; rdx - x (U64xN*) (we're allowed to modify this in place)
|
|
dec_from_u64xn__asm PROC FRAME
|
|
push r12
|
|
.pushreg r12
|
|
push r13
|
|
.pushreg r13
|
|
push r14
|
|
.pushreg r14
|
|
sub rsp,32
|
|
.allocstack 32
|
|
.endprolog
|
|
|
|
; move buffer into r12
|
|
mov r12,rcx
|
|
|
|
; ptr = buffer
|
|
mov r13,r12
|
|
|
|
; move x to r14
|
|
mov r14,rdx
|
|
|
|
loop0:
|
|
|
|
; if (*x == 0) goto finish;
|
|
mov rax, qword ptr [r14]
|
|
test rax,rax
|
|
jz finish
|
|
|
|
; call div_small_in_place_u64xn(x, 10)
|
|
mov rcx,r14
|
|
mov rdx,10
|
|
call div_small_in_place_u64xn
|
|
|
|
; *ptr = remainder
|
|
mov byte ptr [r13], al
|
|
|
|
; ptr += 1
|
|
inc r13
|
|
|
|
; goto loop0
|
|
jmp loop0
|
|
|
|
|
|
finish:
|
|
; set return value to (ptr - buffer)
|
|
mov rax,r13
|
|
sub rax,r12
|
|
|
|
; ptra = ptr
|
|
; ptrb = buffer
|
|
; (ptra: r12, ptrb: r13)
|
|
|
|
; ptrb -= 1
|
|
dec r13
|
|
|
|
reverse_loop:
|
|
; if (ptra >= ptrb) goto done
|
|
cmp r12, r13
|
|
jae done
|
|
|
|
; swap
|
|
mov dl, byte ptr [r12]
|
|
mov cl, byte ptr [r13]
|
|
mov byte ptr [r12], cl
|
|
mov byte ptr [r13], dl
|
|
|
|
; ptra += 1, ptrb -= 1
|
|
inc r12
|
|
dec r13
|
|
|
|
; goto reverse_loop
|
|
jmp reverse_loop
|
|
|
|
done:
|
|
|
|
add rsp,32
|
|
pop r14
|
|
pop r13
|
|
pop r12
|
|
ret
|
|
dec_from_u64xn__asm ENDP
|
|
|
|
END
|
|
|