diff --git a/benchmarks/bench_gt_multiexp_bls12_381.nim b/benchmarks/bench_gt_multiexp_bls12_381.nim index ef60000a..cdd672cf 100644 --- a/benchmarks/bench_gt_multiexp_bls12_381.nim +++ b/benchmarks/bench_gt_multiexp_bls12_381.nim @@ -30,15 +30,20 @@ const AvailableCurves = [ const testNumPoints = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] +type Fp12over4[C: static Algebra] = CubicExt[Fp4[C]] +type Fp12over6[C: static Algebra] = QuadraticExt[Fp6[C]] + proc main() = separator() staticFor i, 0, AvailableCurves.len: const curve = AvailableCurves[i] - var ctx = createBenchMultiExpContext(Fp12[curve], testNumPoints) + var ctx12o4 = createBenchMultiExpContext(Fp12over4[curve], testNumPoints) + var ctx12o6 = createBenchMultiExpContext(Fp12over6[curve], testNumPoints) separator() for numPoints in testNumPoints: let batchIters = max(1, Iters div numPoints) - ctx.multiExpParallelBench(numPoints, batchIters) + ctx12o4.multiExpParallelBench(numPoints, batchIters) + ctx12o6.multiExpParallelBench(numPoints, batchIters) separator() separator() diff --git a/benchmarks/bench_gt_parallel_template.nim b/benchmarks/bench_gt_parallel_template.nim index 0e804400..47ee21a6 100644 --- a/benchmarks/bench_gt_parallel_template.nim +++ b/benchmarks/bench_gt_parallel_template.nim @@ -39,9 +39,9 @@ proc report(op, domain: string, start, stop: MonoTime, startClk, stopClk: int64, let ns = inNanoseconds((stop-start) div iters) let throughput = 1e9 / float64(ns) when SupportsGetTicks: - echo &"{op:<68} {domain:<20} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)" + echo &"{op:<55} {domain:<20} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)" else: - echo &"{op:<68} {domain:<20} {throughput:>15.3f} ops/s {ns:>9} ns/op" + echo &"{op:<55} {domain:<20} {throughput:>15.3f} ops/s {ns:>9} ns/op" macro fixFieldDisplay(T: typedesc): untyped = # At compile-time, enums are integers and their display is buggy @@ -52,7 +52,7 @@ macro fixFieldDisplay(T: typedesc): untyped = result = newLit name func fixDisplay(T: typedesc): string = - when T is (Fp or Fp2 or Fp4 or Fp6 or Fp12): + when T is (Fp or ExtensionField): fixFieldDisplay(T) else: $T @@ -68,7 +68,7 @@ func random_gt*(rng: var RngState, F: typedesc): F {.inline, noInit.} = result = rng.random_unsafe(F) result.finalExp() -# Multi-exponentiations +# multi-exp # --------------------------------------------------------------------------- type BenchMultiexpContext*[GT] = object @@ -128,9 +128,13 @@ proc multiExpParallelBench*[GT](ctx: var BenchMultiExpContext[GT], numInputs: in var startNaive, stopNaive, startMultiExpBaseline, stopMultiExpBaseline: MonoTime var startMultiExpOpt, stopMultiExpOpt, startMultiExpPara, stopMultiExpPara: MonoTime + when GT is QuadraticExt: + var startMultiExpBaselineTorus: MonoTime + var stopMultiExpBaselineTorus: MonoTime + if numInputs <= 100000: # startNaive = getMonotime() - bench("𝔾ₜ exponentiations " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + bench("𝔾ₜ exponentiations " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): var tmp: GT r.setOne() for i in 0 ..< elems.len: @@ -140,7 +144,7 @@ proc multiExpParallelBench*[GT](ctx: var BenchMultiExpContext[GT], numInputs: in if numInputs <= 100000: startNaive = getMonotime() - bench("𝔾ₜ exponentiations vartime " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + bench("𝔾ₜ exponentiations vartime " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): var tmp: GT r.setOne() for i in 0 ..< elems.len: @@ -150,13 +154,20 @@ proc multiExpParallelBench*[GT](ctx: var BenchMultiExpContext[GT], numInputs: in if numInputs <= 100000: startMultiExpBaseline = getMonotime() - bench("𝔾ₜ multi-exponentiations baseline " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): - r.multiExp_reference_vartime(elems, exponents) + bench("𝔾ₜ multi-exp baseline " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + r.multiExp_reference_vartime(elems, exponents, useTorus = false) stopMultiExpBaseline = getMonotime() + if numInputs <= 100000: + when GT is QuadraticExt: + startMultiExpBaselineTorus = getMonotime() + bench("𝔾ₜ multi-exp baseline + torus" & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + r.multiExp_reference_vartime(elems, exponents, useTorus = true) + stopMultiExpBaselineTorus = getMonotime() + block: startMultiExpOpt = getMonotime() - bench("𝔾ₜ multi-exponentiations optimized " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + bench("𝔾ₜ multi-exp optimized " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): r.multiExp_vartime(elems, exponents) stopMultiExpOpt = getMonotime() @@ -164,7 +175,7 @@ proc multiExpParallelBench*[GT](ctx: var BenchMultiExpContext[GT], numInputs: in ctx.tp = Threadpool.new() startMultiExpPara = getMonotime() - bench("𝔾ₜ multi-exponentiations" & align($ctx.tp.numThreads & " threads", 11) & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + bench("𝔾ₜ multi-exp " & align($ctx.tp.numThreads & " threads", 11) & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): ctx.tp.multiExp_vartime_parallel(r, elems, exponents) stopMultiExpPara = getMonotime() @@ -174,6 +185,8 @@ proc multiExpParallelBench*[GT](ctx: var BenchMultiExpContext[GT], numInputs: in let perfMultiExpBaseline = inNanoseconds((stopMultiExpBaseline-startMultiExpBaseline) div iters) let perfMultiExpOpt = inNanoseconds((stopMultiExpOpt-startMultiExpOpt) div iters) let perfMultiExpPara = inNanoseconds((stopMultiExpPara-startMultiExpPara) div iters) + when GT is QuadraticExt: + let perfMultiExpBaselineTorus = inNanoseconds((stopMultiExpBaselineTorus-startMultiExpBaselineTorus) div iters) if numInputs <= 100000: let speedupBaseline = float(perfNaive) / float(perfMultiExpBaseline) @@ -185,5 +198,9 @@ proc multiExpParallelBench*[GT](ctx: var BenchMultiExpContext[GT], numInputs: in let speedupOptBaseline = float(perfMultiExpBaseline) / float(perfMultiExpOpt) echo &"Speedup ratio optimized over baseline linear combination: {speedupOptBaseline:>6.3f}x" + when GT is QuadraticExt: + let speedupTorusOverBaseline = float(perfMultiExpBaseline) / float(perfMultiExpBaselineTorus) + echo &"Speedup ratio baseline+Torus over baseline linear combination: {speedupTorusOverBaseline:>6.3f}x" + let speedupParaOpt = float(perfMultiExpOpt) / float(perfMultiExpPara) echo &"Speedup ratio parallel over optimized linear combination: {speedupParaOpt:>6.3f}x" diff --git a/constantine/math/pairings/gt_exponentiations.nim b/constantine/math/pairings/gt_exponentiations.nim index 3b66eaff..79db297d 100644 --- a/constantine/math/pairings/gt_exponentiations.nim +++ b/constantine/math/pairings/gt_exponentiations.nim @@ -59,8 +59,8 @@ func gtExpEndo*[Gt: ExtensionField, scalBits: static int]( " order: " & $Fr[Gt.Name].bits() & "-bit\n" # 1. Compute endomorphisms - const M = when Gt is Fp6: 2 - elif Gt is Fp12: 4 + const M = when Gt.Name.getEmbeddingDegree() == 6: 2 + elif Gt.Name.getEmbeddingDegree() == 12: 4 else: {.error: "Unconfigured".} var endos {.noInit.}: array[M-1, Gt] @@ -131,9 +131,9 @@ func gtExp*[Gt](r: var Gt, a: Gt, scalar: BigInt) {.inline, meter.} = ## Those will be assumed to maintain constant-time property when Gt.Name.hasEndomorphismAcceleration() and BigInt.bits >= EndomorphismThreshold: - when Gt is Fp6: + when Gt.Name.getEmbeddingDegree() == 6: r.gtExpEndo(a, scalar) # TODO: window method - elif Gt is Fp12: + elif Gt.Name.getEmbeddingDegree() == 12: r.gtExpEndo(a, scalar) else: # Curves defined on Fp^m with m > 2 {.error: "Unconfigured".} diff --git a/constantine/math/pairings/gt_multiexp_parallel.nim b/constantine/math/pairings/gt_multiexp_parallel.nim index cb81c613..9203f9bf 100644 --- a/constantine/math/pairings/gt_multiexp_parallel.nim +++ b/constantine/math/pairings/gt_multiexp_parallel.nim @@ -122,8 +122,8 @@ proc applyEndomorphism_parallel[bits: static int, GT]( ## Returns a new triplet (endoElems, endoExpos, N) ## endoElems and endoExpos MUST be freed afterwards - const M = when Gt is Fp6: 2 - elif Gt is Fp12: 4 + const M = when Gt.Name.getEmbeddingDegree() == 6: 2 + elif Gt.Name.getEmbeddingDegree() == 12: 4 else: {.error: "Unconfigured".} const L = Fr[Gt.Name].bits().computeEndoRecodedLength(M) diff --git a/tests/math_pairings/t_pairing_template.nim b/tests/math_pairings/t_pairing_template.nim index e4e6f65d..6d9482ba 100644 --- a/tests/math_pairings/t_pairing_template.nim +++ b/tests/math_pairings/t_pairing_template.nim @@ -217,7 +217,7 @@ proc runGTexponentiationTests*(GT: typedesc, iters: int) = proc runGTmultiexpTests*[N: static int](GT: typedesc, num_points: array[N, int], iters: int) = var rng: RngState - let timeseed = uint32(toUnix(getTime()) and (1'i64 shl 32 - 1)) # unixTime mod 2^32 + let timeseed = 1727299797 # uint32(toUnix(getTime()) and (1'i64 shl 32 - 1)) # unixTime mod 2^32 seed(rng, timeseed) echo "\n------------------------------------------------------\n" echo "test_pairing_",$GT.Name,"_gt_multiexp xoshiro512** seed: ", timeseed @@ -242,11 +242,11 @@ proc runGTmultiexpTests*[N: static int](GT: typedesc, num_points: array[N, int], var mexp_ref, mexp_ref_torus, mexp_opt: GT mexp_ref.multiExp_reference_vartime(elems, exponents, useTorus = false) - # mexp_ref_torus.multiExp_reference_vartime(elems, exponents, useTorus = true) + mexp_ref_torus.multiExp_reference_vartime(elems, exponents, useTorus = true) mexp_opt.multiExp_vartime(elems, exponents) doAssert bool(naive == mexp_ref) - # doAssert bool(naive == mexp_ref_torus) + doAssert bool(naive == mexp_ref_torus) doAssert bool(naive == mexp_opt) stdout.write '.'