rcx mov r8, rdx movabs rcx, -5421010862428 mov r10, rdx imul r10, rcx movabs r9, 8814407033341083648 mov rax, rdx mul r9 add r10, rdx add rax, rdi adc r10, rsi add r9, rax adc rcx, r10 movabs rdx, -8814407033341083649 cmp rdx, rax movabs rdx, 5421010862427 sbb rdx, r10 cmovae rcx, r10 cmovae r9, rax adc r8, 0 mov rax, r8 mov rdx, r9 ret # aarch64-unknown-linux-gnu udivrem_1e32: mov w8, #60487 mov x9, #18137646891008 movk w8, #51, lsl #16 movk x9, #31315, lsl #48 mov x10, #-16732 mov x13, #-18137646891009 umulh x8, x1, x8 movk x10, #53906, lsl #16 movk x10, #64273, lsl #32 mov x14, #16731 movk x13, #34220, lsl #48 movk x14, #11629, lsl #16 umulh x11, x8, x9 movk x14, #1262, lsl #32 mul x12, x8, x9 madd x11, x8, x10, x11 adds x12, x12, x0 adc x11, x11, x1 adds x9, x12, x9 adc x10, x11, x10 cmp x13, x12 sbcs xzr, x14, x11 cinc x0, x8, lo csel x2, x10, x11, lo csel x1, x9, x12, lo ret A+Bから始める異常高速化 // 2^128 未満の整数 x の入力に対して floor(x / 10^32), (x mod 10^32) を計算 pub fn udivrem_1e32(x: u128) -> (u64, u128) { // (z0, z1) = (floor(x / 10^32), x mod 10^32) // floor((2^128)/(10^32)) = 3402823 let mut z0 = ((((x >> 64) as u64 as u128) * 3402823) >> 64) as u64; let mut z1 = (x - (z0 as u128) * 100000000000000000000000000000000); if let Some(zt) = z1.checked_sub(100000000000000000000000000000000) { z1 = zt; z0 += 1; } (z0, z1) } Mizar/みざー <http://github.com/mizar> 52