diff --git a/Cargo.lock b/Cargo.lock index 390f2615ad..2e77c75da1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -659,6 +659,8 @@ version = "0.2.0" dependencies = [ "blake2b_simd", "byteorder", + "cc", + "hex", ] [[package]] diff --git a/components/equihash/Cargo.toml b/components/equihash/Cargo.toml index 2eb7c023d1..ea3dd29cce 100644 --- a/components/equihash/Cargo.toml +++ b/components/equihash/Cargo.toml @@ -9,9 +9,21 @@ license = "MIT OR Apache-2.0" edition = "2021" rust-version = "1.56.1" +[features] +default = [] + +# Experimental tromp solver support, builds the C++ tromp solver and Rust FFI layer. +solver = ["dep:cc"] + [dependencies] blake2b_simd = "1" byteorder = "1" +[build-dependencies] +cc = { version = "1", optional = true } + +[dev-dependencies] +hex = "0.4" + [lib] bench = false diff --git a/components/equihash/build.rs b/components/equihash/build.rs new file mode 100644 index 0000000000..74122e450a --- /dev/null +++ b/components/equihash/build.rs @@ -0,0 +1,17 @@ +//! Build script for the equihash tromp solver in C. + +fn main() { + #[cfg(feature = "solver")] + build_tromp_solver(); +} + +#[cfg(feature = "solver")] +fn build_tromp_solver() { + cc::Build::new() + .include("tromp/") + .file("tromp/equi_miner.c") + .compile("equitromp"); + + // Tell Cargo to only rerun this build script if the tromp C files or headers change. + println!("cargo:rerun-if-changed=tromp"); +} diff --git a/components/equihash/src/blake2b.rs b/components/equihash/src/blake2b.rs new file mode 100644 index 0000000000..75da59d5ab --- /dev/null +++ b/components/equihash/src/blake2b.rs @@ -0,0 +1,59 @@ +// Copyright (c) 2020-2022 The Zcash developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or https://www.opensource.org/licenses/mit-license.php . + +// This module uses unsafe code for FFI into blake2b. +#![allow(unsafe_code)] + +use blake2b_simd::{State, PERSONALBYTES}; + +use std::ptr; +use std::slice; + +#[no_mangle] +pub extern "C" fn blake2b_init( + output_len: usize, + personalization: *const [u8; PERSONALBYTES], +) -> *mut State { + let personalization = unsafe { personalization.as_ref().unwrap() }; + + Box::into_raw(Box::new( + blake2b_simd::Params::new() + .hash_length(output_len) + .personal(personalization) + .to_state(), + )) +} + +#[no_mangle] +pub extern "C" fn blake2b_clone(state: *const State) -> *mut State { + unsafe { state.as_ref() } + .map(|state| Box::into_raw(Box::new(state.clone()))) + .unwrap_or(ptr::null_mut()) +} + +#[no_mangle] +pub extern "C" fn blake2b_free(state: *mut State) { + if !state.is_null() { + drop(unsafe { Box::from_raw(state) }); + } +} + +#[no_mangle] +pub extern "C" fn blake2b_update(state: *mut State, input: *const u8, input_len: usize) { + let state = unsafe { state.as_mut().unwrap() }; + let input = unsafe { slice::from_raw_parts(input, input_len) }; + + state.update(input); +} + +#[no_mangle] +pub extern "C" fn blake2b_finalize(state: *mut State, output: *mut u8, output_len: usize) { + let state = unsafe { state.as_mut().unwrap() }; + let output = unsafe { slice::from_raw_parts_mut(output, output_len) }; + + // Allow consuming only part of the output. + let hash = state.finalize(); + assert!(output_len <= hash.as_bytes().len()); + output.copy_from_slice(&hash.as_bytes()[..output_len]); +} diff --git a/components/equihash/src/lib.rs b/components/equihash/src/lib.rs index cb6131ca3b..e0ddf8c5f3 100644 --- a/components/equihash/src/lib.rs +++ b/components/equihash/src/lib.rs @@ -28,3 +28,8 @@ mod verify; mod test_vectors; pub use verify::{is_valid_solution, Error}; + +#[cfg(feature = "solver")] +mod blake2b; +#[cfg(feature = "solver")] +pub mod tromp; diff --git a/components/equihash/src/minimal.rs b/components/equihash/src/minimal.rs index 81da63e657..838907840f 100644 --- a/components/equihash/src/minimal.rs +++ b/components/equihash/src/minimal.rs @@ -5,6 +5,49 @@ use byteorder::{BigEndian, ReadBytesExt}; use crate::params::Params; +// Rough translation of CompressArray() from: +// https://github.com/zcash/zcash/blob/6fdd9f1b81d3b228326c9826fa10696fc516444b/src/crypto/equihash.cpp#L39-L76 +#[cfg(any(feature = "solver", test))] +fn compress_array(array: &[u8], bit_len: usize, byte_pad: usize) -> Vec { + let index_bytes = (u32::BITS / 8) as usize; + assert!(bit_len >= 8); + assert!(8 * index_bytes >= 7 + bit_len); + + let in_width: usize = (bit_len + 7) / 8 + byte_pad; + let out_len = bit_len * array.len() / (8 * in_width); + + let mut out = Vec::with_capacity(out_len); + let bit_len_mask: u32 = (1 << (bit_len as u32)) - 1; + + // The acc_bits least-significant bits of acc_value represent a bit sequence + // in big-endian order. + let mut acc_bits: usize = 0; + let mut acc_value: u32 = 0; + + let mut j: usize = 0; + for _i in 0..out_len { + // When we have fewer than 8 bits left in the accumulator, read the next + // input element. + if acc_bits < 8 { + acc_value <<= bit_len; + for x in byte_pad..in_width { + acc_value |= ( + // Apply bit_len_mask across byte boundaries + (array[j + x] & ((bit_len_mask >> (8 * (in_width - x - 1))) as u8)) as u32 + ) + .wrapping_shl(8 * (in_width - x - 1) as u32); // Big-endian + } + j += in_width; + acc_bits += bit_len; + } + + acc_bits -= 8; + out.push((acc_value >> acc_bits) as u8); + } + + out +} + pub(crate) fn expand_array(vin: &[u8], bit_len: usize, byte_pad: usize) -> Vec { assert!(bit_len >= 8); assert!(u32::BITS as usize >= 7 + bit_len); @@ -50,6 +93,31 @@ pub(crate) fn expand_array(vin: &[u8], bit_len: usize, byte_pad: usize) -> Vec Vec { + let c_bit_len = p.collision_bit_length(); + let index_bytes = (u32::BITS / 8) as usize; + let digit_bytes = ((c_bit_len + 1) + 7) / 8; + assert!(digit_bytes <= index_bytes); + + let len_indices = indices.len() * index_bytes; + let byte_pad = index_bytes - digit_bytes; + + // Rough translation of EhIndexToArray(index, array_pointer) from: + // https://github.com/zcash/zcash/blob/6fdd9f1b81d3b228326c9826fa10696fc516444b/src/crypto/equihash.cpp#L123-L128 + // + // Big-endian so that lexicographic array comparison is equivalent to integer comparison. + let array: Vec = indices + .iter() + .flat_map(|index| index.to_be_bytes()) + .collect(); + assert_eq!(array.len(), len_indices); + + compress_array(&array, c_bit_len + 1, byte_pad) +} + /// Returns `None` if the parameters are invalid for this minimal encoding. pub(crate) fn indices_from_minimal(p: Params, minimal: &[u8]) -> Option> { let c_bit_len = p.collision_bit_length(); @@ -76,11 +144,14 @@ pub(crate) fn indices_from_minimal(p: Params, minimal: &[u8]) -> Option #[cfg(test)] mod tests { - use super::{expand_array, indices_from_minimal, Params}; + use crate::minimal::minimal_from_indices; + + use super::{compress_array, expand_array, indices_from_minimal, Params}; #[test] - fn array_expansion() { + fn array_compression_and_expansion() { let check_array = |(bit_len, byte_pad), compact, expanded| { + assert_eq!(compress_array(expanded, bit_len, byte_pad), compact); assert_eq!(expand_array(compact, bit_len, byte_pad), expanded); }; @@ -149,10 +220,9 @@ mod tests { #[test] fn minimal_solution_repr() { let check_repr = |minimal, indices| { - assert_eq!( - indices_from_minimal(Params { n: 80, k: 3 }, minimal).unwrap(), - indices, - ); + let p = Params { n: 80, k: 3 }; + assert_eq!(minimal_from_indices(p, indices), minimal); + assert_eq!(indices_from_minimal(p, minimal).unwrap(), indices); }; // The solutions here are not intended to be valid. diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs new file mode 100644 index 0000000000..37daa6d741 --- /dev/null +++ b/components/equihash/src/tromp.rs @@ -0,0 +1,256 @@ +//! Rust interface to the tromp equihash solver. + +use std::marker::{PhantomData, PhantomPinned}; +use std::slice; + +use blake2b_simd::State; + +use crate::{blake2b, minimal::minimal_from_indices, params::Params, verify}; + +#[repr(C)] +struct CEqui { + _f: [u8; 0], + _m: PhantomData<(*mut u8, PhantomPinned)>, +} + +#[link(name = "equitromp")] +extern "C" { + #[allow(improper_ctypes)] + fn equi_new( + blake2b_clone: extern "C" fn(state: *const State) -> *mut State, + blake2b_free: extern "C" fn(state: *mut State), + blake2b_update: extern "C" fn(state: *mut State, input: *const u8, input_len: usize), + blake2b_finalize: extern "C" fn(state: *mut State, output: *mut u8, output_len: usize), + ) -> *mut CEqui; + fn equi_free(eq: *mut CEqui); + #[allow(improper_ctypes)] + fn equi_setstate(eq: *mut CEqui, ctx: *const State); + fn equi_clearslots(eq: *mut CEqui); + fn equi_digit0(eq: *mut CEqui, id: u32); + fn equi_digitodd(eq: *mut CEqui, r: u32, id: u32); + fn equi_digiteven(eq: *mut CEqui, r: u32, id: u32); + fn equi_digitK(eq: *mut CEqui, id: u32); + fn equi_nsols(eq: *const CEqui) -> usize; + /// Returns `equi_nsols()` solutions of length `2^K`, in a single memory allocation. + fn equi_sols(eq: *const CEqui) -> *const u32; +} + +/// Performs a single equihash solver run with equihash parameters `p` and hash state `curr_state`. +/// Returns zero or more unique solutions. +/// +/// # SAFETY +/// +/// The parameters to this function must match the hard-coded parameters in the C++ code. +/// +/// This function uses unsafe code for FFI into the tromp solver. +#[allow(unsafe_code)] +#[allow(clippy::print_stdout)] +unsafe fn worker(eq: *mut CEqui, p: Params, curr_state: &State) -> Vec> { + // SAFETY: caller must supply a valid `eq` instance. + // + // Review Note: nsols is set to zero in C++ here + equi_setstate(eq, curr_state); + + // Initialization done, start algo driver. + equi_digit0(eq, 0); + equi_clearslots(eq); + // SAFETY: caller must supply a `p` instance that matches the hard-coded values in the C code. + for r in 1..p.k { + if (r & 1) != 0 { + equi_digitodd(eq, r, 0) + } else { + equi_digiteven(eq, r, 0) + }; + equi_clearslots(eq); + } + // Review Note: nsols is increased here, but only if the solution passes the strictly ordered check. + // With 256 nonces, we get to around 6/9 digits strictly ordered. + equi_digitK(eq, 0); + + let solutions = { + let nsols = equi_nsols(eq); + let sols = equi_sols(eq); + let solution_len = 1 << p.k; + //println!("{nsols} solutions of length {solution_len} at {sols:?}"); + + // SAFETY: + // - caller must supply a `p` instance that matches the hard-coded values in the C code. + // - `sols` is a single allocation containing at least `nsols` solutions. + // - this slice is a shared ref to the memory in a valid `eq` instance supplied by the caller. + let solutions: &[u32] = slice::from_raw_parts(sols, nsols * solution_len); + + /* + println!( + "{nsols} solutions of length {solution_len} as a slice of length {:?}", + solutions.len() + ); + */ + + let mut chunks = solutions.chunks_exact(solution_len); + + // SAFETY: + // - caller must supply a `p` instance that matches the hard-coded values in the C code. + // - each solution contains `solution_len` u32 values. + // - the temporary slices are shared refs to a valid `eq` instance supplied by the caller. + // - the bytes in the shared ref are copied before they are returned. + // - dropping `solutions: &[u32]` does not drop the underlying memory owned by `eq`. + let mut solutions = (&mut chunks) + .map(|solution| solution.to_vec()) + .collect::>(); + + assert_eq!(chunks.remainder().len(), 0); + + // Sometimes the solver returns identical solutions. + solutions.sort(); + solutions.dedup(); + + solutions + }; + + /* + println!( + "{} solutions as cloned vectors of length {:?}", + solutions.len(), + solutions + .iter() + .map(|solution| solution.len()) + .collect::>() + ); + */ + + solutions +} + +/// Performs multiple equihash solver runs with equihash parameters `200, 9`, initialising the hash with +/// the supplied partial `input`. Between each run, generates a new nonce of length `N` using the +/// `next_nonce` function. +/// +/// Returns zero or more unique solutions. +pub fn solve_200_9( + input: &[u8], + mut next_nonce: impl FnMut() -> Option<[u8; N]>, +) -> Vec> { + let p = Params::new(200, 9).expect("should be valid"); + let mut state = verify::initialise_state(p.n, p.k, p.hash_output()); + state.update(input); + + // Create solver and initialize it. + // + // # SAFETY + // - the parameters 200,9 match the hard-coded parameters in the C++ code. + // - tromp is compiled without multi-threading support, so each instance can only support 1 thread. + // - the blake2b functions are in the correct order in Rust and C++ initializers. + #[allow(unsafe_code)] + let eq = unsafe { + equi_new( + blake2b::blake2b_clone, + blake2b::blake2b_free, + blake2b::blake2b_update, + blake2b::blake2b_finalize, + ) + }; + + let solutions = loop { + let nonce = match next_nonce() { + Some(nonce) => nonce, + None => break vec![], + }; + + let mut curr_state = state.clone(); + // Review Note: these hashes are changing when the nonce changes + curr_state.update(&nonce); + + // SAFETY: + // - the parameters 200,9 match the hard-coded parameters in the C++ code. + // - the eq instance is initilized above. + #[allow(unsafe_code)] + let solutions = unsafe { worker(eq, p, &curr_state) }; + if !solutions.is_empty() { + break solutions; + } + }; + + // SAFETY: + // - the eq instance is initilized above, and not used after this point. + #[allow(unsafe_code)] + unsafe { + equi_free(eq) + }; + + solutions +} + +/// Performs multiple equihash solver runs with equihash parameters `200, 9`, initialising the hash with +/// the supplied partial `input`. Between each run, generates a new nonce of length `N` using the +/// `next_nonce` function. +/// +/// Returns zero or more unique compressed solutions. +pub fn solve_200_9_compressed( + input: &[u8], + next_nonce: impl FnMut() -> Option<[u8; N]>, +) -> Vec> { + let p = Params::new(200, 9).expect("should be valid"); + let solutions = solve_200_9(input, next_nonce); + + let mut solutions: Vec> = solutions + .iter() + .map(|solution| minimal_from_indices(p, solution)) + .collect(); + + // Just in case the solver returns solutions that become the same when compressed. + solutions.sort(); + solutions.dedup(); + + solutions +} + +#[cfg(test)] +mod tests { + use super::solve_200_9_compressed; + + #[test] + #[allow(clippy::print_stdout)] + fn run_solver() { + let input = b"Equihash is an asymmetric PoW based on the Generalised Birthday problem."; + let mut nonce: [u8; 32] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, + ]; + let mut nonces = 0..=32_u32; + let nonce_count = nonces.clone().count(); + + let solutions = solve_200_9_compressed(input, || { + let variable_nonce = nonces.next()?; + println!("Using variable nonce [0..4] of {}", variable_nonce); + + let variable_nonce = variable_nonce.to_le_bytes(); + nonce[0] = variable_nonce[0]; + nonce[1] = variable_nonce[1]; + nonce[2] = variable_nonce[2]; + nonce[3] = variable_nonce[3]; + + Some(nonce) + }); + + if solutions.is_empty() { + // Expected solution rate is documented at: + // https://github.com/tromp/equihash/blob/master/README.md + panic!("Found no solutions after {nonce_count} runs, expected 1.88 solutions per run",); + } else { + println!("Found {} solutions:", solutions.len()); + for (sol_num, solution) in solutions.iter().enumerate() { + println!("Validating solution {sol_num}:-\n{}", hex::encode(solution)); + crate::is_valid_solution(200, 9, input, &nonce, solution).unwrap_or_else(|error| { + panic!( + "unexpected invalid equihash 200, 9 solution:\n\ + error: {error:?}\n\ + input: {input:?}\n\ + nonce: {nonce:?}\n\ + solution: {solution:?}" + ) + }); + println!("Solution {sol_num} is valid!\n"); + } + } + } +} diff --git a/components/equihash/src/verify.rs b/components/equihash/src/verify.rs index 53071ddc01..0cc4d27771 100644 --- a/components/equihash/src/verify.rs +++ b/components/equihash/src/verify.rs @@ -114,7 +114,7 @@ impl fmt::Display for Kind { } } -fn initialise_state(n: u32, k: u32, digest_len: u8) -> Blake2bState { +pub(crate) fn initialise_state(n: u32, k: u32, digest_len: u8) -> Blake2bState { let mut personalization: Vec = Vec::from("ZcashPoW"); personalization.write_u32::(n).unwrap(); personalization.write_u32::(k).unwrap(); diff --git a/components/equihash/tromp/blake2b.h b/components/equihash/tromp/blake2b.h new file mode 100644 index 0000000000..23a7409b74 --- /dev/null +++ b/components/equihash/tromp/blake2b.h @@ -0,0 +1,51 @@ +// Copyright (c) 2020-2022 The Zcash developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or https://www.opensource.org/licenses/mit-license.php . + +#ifndef ZCASH_RUST_INCLUDE_RUST_BLAKE2B_H +#define ZCASH_RUST_INCLUDE_RUST_BLAKE2B_H + +#include + +struct BLAKE2bState; +typedef struct BLAKE2bState BLAKE2bState; +#define BLAKE2bPersonalBytes 16U + +/// Initializes a BLAKE2b state with no key and no salt. +/// +/// `personalization` MUST be a pointer to a 16-byte array. +/// +/// Please free this with `blake2b_free` when you are done. +typedef BLAKE2bState* (*blake2b_init)( + size_t output_len, + const unsigned char* personalization); + +/// Clones the given BLAKE2b state. +/// +/// Both states need to be separately freed with `blake2b_free` when you are +/// done. +typedef BLAKE2bState* (*blake2b_clone)(const BLAKE2bState* state); + +/// Frees a BLAKE2b state returned by `blake2b_init`. +typedef void (*blake2b_free)(BLAKE2bState* state); + +/// Adds input to the hash. You can call this any number of times. +typedef void (*blake2b_update)( + BLAKE2bState* state, + const unsigned char* input, + size_t input_len); + +/// Finalizes the `state` and stores the result in `output`. +/// +/// `output_len` MUST be less than or equal to the value that was passed as the +/// first parameter to `blake2b_init`. +/// +/// This method is idempotent, and calling it multiple times will give the same +/// result. It's also possible to call `blake2b_update` with more input in +/// between. +typedef void (*blake2b_finalize)( + BLAKE2bState* state, + unsigned char* output, + size_t output_len); + +#endif // ZCASH_RUST_INCLUDE_RUST_BLAKE2B_H diff --git a/components/equihash/tromp/equi.h b/components/equihash/tromp/equi.h new file mode 100644 index 0000000000..7b3969f52f --- /dev/null +++ b/components/equihash/tromp/equi.h @@ -0,0 +1,47 @@ +// Equihash solver +// Copyright (c) 2016-2016 John Tromp, The Zcash developers + +#ifndef ZCASH_POW_TROMP_EQUI_H +#define ZCASH_POW_TROMP_EQUI_H + +#include // for type bool +#include // for types uint32_t,uint64_t +#include // for functions memset +#include // for function qsort + +#include "blake2b.h" + +typedef uint32_t u32; +typedef unsigned char uchar; + +// algorithm parameters, prefixed with W to reduce include file conflicts + +#ifndef WN +#define WN 200 +#endif + +#ifndef WK +#define WK 9 +#endif + +#define NDIGITS (WK+1) +#define DIGITBITS (WN/(NDIGITS)) + +#define PROOFSIZE (1<0 the leftmost leaf of its left subtree +// is less than the leftmost leaf of its right subtree + +// The algorithm below solves this by maintaining the trees +// in a graph of K layers, each split into buckets +// with buckets indexed by the first n-RESTBITS bits following +// the i*n 0s, each bucket having 4 * 2^RESTBITS slots, +// twice the number of subtrees expected to land there. + +#ifndef ZCASH_POW_TROMP_EQUI_MINER_H +#define ZCASH_POW_TROMP_EQUI_MINER_H + +#include "equi.h" + +// Provides htole32() on macOS and Windows +#include "portable_endian.h" + +#include +#include +#include + +typedef uint16_t u16; +typedef uint64_t u64; + +#ifdef EQUIHASH_TROMP_ATOMIC +#include +typedef atomic_uint au32; +#else +typedef u32 au32; +#endif + +#ifndef RESTBITS +#define RESTBITS 8 +#endif + +// 2_log of number of buckets +#define BUCKBITS (DIGITBITS-RESTBITS) + +#ifndef SAVEMEM +#if RESTBITS == 4 +// can't save memory in such small buckets +#define SAVEMEM 1 +#elif RESTBITS >= 8 +// take advantage of law of large numbers (sum of 2^8 random numbers) +// this reduces (200,9) memory to under 144MB, with negligible discarding +#define SAVEMEM 9/14 +#endif +#endif + +// number of buckets +#define NBUCKETS (1<bid_s0_s1; + } + u32 bucketid(const tree *t) { +#ifdef SLOTDIFF + return t->bid_s0_s1 >> (2 * SLOTBITS - 1); +#else + return t->bid_s0_s1 >> (2 * SLOTBITS); +#endif + } + u32 slotid0(const tree *t) { +#ifdef SLOTDIFF + return (t->bid_s0_s1 >> (SLOTBITS-1)) & SLOTMASK; +#else + return (t->bid_s0_s1 >> SLOTBITS) & SLOTMASK; +#endif + } + u32 slotid1(const tree *t) { +#ifdef SLOTDIFF + return (slotid0() + 1 + (t->bid_s0_s1 & (SLOTMASK>>1))) & SLOTMASK; +#else + return t->bid_s0_s1 & SLOTMASK; +#endif + } + +union hashunit { + u32 word; + uchar bytes[sizeof(u32)]; +}; +typedef union hashunit hashunit; + +#define WORDS(bits) ((bits + 31) / 32) +#define HASHWORDS0 WORDS(WN - DIGITBITS + RESTBITS) +#define HASHWORDS1 WORDS(WN - 2*DIGITBITS + RESTBITS) + +struct slot0 { + tree attr; + hashunit hash[HASHWORDS0]; +}; +typedef struct slot0 slot0; + +struct slot1 { + tree attr; + hashunit hash[HASHWORDS1]; +}; +typedef struct slot1 slot1; + +// a bucket is NSLOTS treenodes +typedef slot0 bucket0[NSLOTS]; +typedef slot1 bucket1[NSLOTS]; +// the N-bit hash consists of K+1 n-bit "digits" +// each of which corresponds to a layer of NBUCKETS buckets +typedef bucket0 digit0[NBUCKETS]; +typedef bucket1 digit1[NBUCKETS]; + +// size (in bytes) of hash in round 0 <= r < WK +u32 hashsize(const u32 r) { + const u32 hashbits = WN - (r+1) * DIGITBITS + RESTBITS; + return (hashbits + 7) / 8; +} + +u32 hashwords(u32 bytes) { + return (bytes + 3) / 4; +} + +// manages hash and tree data +struct htalloc { + u32 *heap0; + u32 *heap1; + bucket0 *trees0[(WK+1)/2]; + bucket1 *trees1[WK/2]; + u32 alloced; +}; +typedef struct htalloc htalloc; + htalloc htalloc_new() { + htalloc hta; + hta.alloced = 0; + return hta; + } + void *htalloc_alloc(htalloc *hta, const u32 n, const u32 sz); + void alloctrees(htalloc *hta) { +// optimize xenoncat's fixed memory layout, avoiding any waste +// digit trees hashes trees hashes +// 0 0 A A A A A A . . . . . . +// 1 0 A A A A A A 1 B B B B B +// 2 0 2 C C C C C 1 B B B B B +// 3 0 2 C C C C C 1 3 D D D D +// 4 0 2 4 E E E E 1 3 D D D D +// 5 0 2 4 E E E E 1 3 5 F F F +// 6 0 2 4 6 . G G 1 3 5 F F F +// 7 0 2 4 6 . G G 1 3 5 7 H H +// 8 0 2 4 6 8 . I 1 3 5 7 H H + assert(DIGITBITS >= 16); // ensures hashes shorten by 1 unit every 2 digits + hta->heap0 = (u32 *)htalloc_alloc(hta, 1, sizeof(digit0)); + hta->heap1 = (u32 *)htalloc_alloc(hta, 1, sizeof(digit1)); + for (int r=0; rtrees0[r/2] = (bucket0 *)(hta->heap0 + r/2); + else + hta->trees1[r/2] = (bucket1 *)(hta->heap1 + r/2); + } + void dealloctrees(htalloc *hta) { + if (hta == NULL) { + return; + } + + free(hta->heap0); + free(hta->heap1); + // Avoid use-after-free and double-free + hta->heap0 = NULL; + hta->heap1 = NULL; + + for (int r=0; rtrees0[r/2] = NULL; + else + hta->trees1[r/2] = NULL; + hta->alloced = 0; + } + void *htalloc_alloc(htalloc *hta, const u32 n, const u32 sz) { + void *mem = calloc(n, sz); + assert(mem); + hta->alloced += n * sz; + return mem; + } + +typedef au32 bsizes[NBUCKETS]; + +u32 minu32(const u32 a, const u32 b) { + return a < b ? a : b; +} + +struct equi { + BLAKE2bState* blake_ctx; + blake2b_clone blake2b_clone; + blake2b_free blake2b_free; + blake2b_update blake2b_update; + blake2b_finalize blake2b_finalize; + htalloc hta; + bsizes *nslots; // PUT IN BUCKET STRUCT + proof *sols; + au32 nsols; + u32 xfull; + u32 hfull; + u32 bfull; +}; +typedef struct equi equi; + void equi_clearslots(equi *eq); + equi *equi_new( + blake2b_clone blake2b_clone, + blake2b_free blake2b_free, + blake2b_update blake2b_update, + blake2b_finalize blake2b_finalize + ) { + assert(sizeof(hashunit) == 4); + equi *eq = malloc(sizeof(equi)); + eq->blake2b_clone = blake2b_clone; + eq->blake2b_free = blake2b_free; + eq->blake2b_update = blake2b_update; + eq->blake2b_finalize = blake2b_finalize; + + alloctrees(&eq->hta); + eq->nslots = (bsizes *)htalloc_alloc(&eq->hta, 2 * NBUCKETS, sizeof(au32)); + eq->sols = (proof *)htalloc_alloc(&eq->hta, MAXSOLS, sizeof(proof)); + + // C malloc() does not guarantee zero-initialized memory (but calloc() does) + eq->blake_ctx = NULL; + eq->nsols = 0; + equi_clearslots(eq); + + return eq; + } + void equi_free(equi *eq) { + if (eq == NULL) { + return; + } + + dealloctrees(&eq->hta); + + free(eq->nslots); + free(eq->sols); + eq->blake2b_free(eq->blake_ctx); + // Avoid use-after-free and double-free + eq->nslots = NULL; + eq->sols = NULL; + eq->blake_ctx = NULL; + + free(eq); + } + void equi_setstate(equi *eq, const BLAKE2bState *ctx) { + if (eq->blake_ctx) { + eq->blake2b_free(eq->blake_ctx); + } + + eq->blake_ctx = eq->blake2b_clone(ctx); + memset(eq->nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing + equi_clearslots(eq); + eq->nsols = 0; + } + void equi_clearslots(equi *eq) { + eq->xfull = eq->bfull = eq->hfull = 0; + } + u32 getslot(equi *eq, const u32 r, const u32 bucketi) { +#ifdef EQUIHASH_TROMP_ATOMIC + return std::atomic_fetch_add_explicit(&eq->nslots[r&1][bucketi], 1U, std::memory_order_relaxed); +#else + return eq->nslots[r&1][bucketi]++; +#endif + } + u32 getnslots(equi *eq, const u32 r, const u32 bid) { // SHOULD BE METHOD IN BUCKET STRUCT + au32 *nslot = &eq->nslots[r&1][bid]; + const u32 n = minu32(*nslot, NSLOTS); + *nslot = 0; + return n; + } + void orderindices(u32 *indices, u32 size) { + if (indices[0] > indices[size]) { + for (u32 i=0; i < size; i++) { + const u32 tmp = indices[i]; + indices[i] = indices[size+i]; + indices[size+i] = tmp; + } + } + } + void listindices1(equi *eq, u32 r, const tree t, u32 *indices); + void listindices0(equi *eq, u32 r, const tree t, u32 *indices) { + if (r == 0) { + *indices = getindex(&t); + return; + } + const bucket1 *buck = &eq->hta.trees1[--r/2][bucketid(&t)]; + const u32 size = 1 << r; + u32 *indices1 = indices + size; + listindices1(eq, r, (*buck)[slotid0(&t)].attr, indices); + listindices1(eq, r, (*buck)[slotid1(&t)].attr, indices1); + orderindices(indices, size); + } + void listindices1(equi *eq, u32 r, const tree t, u32 *indices) { + const bucket0 *buck = &eq->hta.trees0[--r/2][bucketid(&t)]; + const u32 size = 1 << r; + u32 *indices1 = indices + size; + listindices0(eq, r, (*buck)[slotid0(&t)].attr, indices); + listindices0(eq, r, (*buck)[slotid1(&t)].attr, indices1); + orderindices(indices, size); + } + void candidate(equi *eq, const tree t) { + proof prf; + listindices1(eq, WK, t, prf); // assume WK odd + qsort(prf, PROOFSIZE, sizeof(u32), &compu32); + for (u32 i=1; i proof[%d], actual: %d <= %d\n", + i, i-1, prf[i], prf[i-1] + ); + */ + return; + } +#ifdef EQUIHASH_TROMP_ATOMIC + u32 soli = std::atomic_fetch_add_explicit(&eq->nsols, 1U, std::memory_order_relaxed); +#else + u32 soli = eq->nsols++; +#endif + if (soli < MAXSOLS) + listindices1(eq, WK, t, eq->sols[soli]); // assume WK odd + } +#ifdef EQUIHASH_SHOW_BUCKET_SIZES + void showbsizes(equi *eq, u32 r) { +#if defined(HIST) || defined(SPARK) || defined(LOGSPARK) + u32 binsizes[65]; + memset(binsizes, 0, 65 * sizeof(u32)); + for (u32 bucketid = 0; bucketid < NBUCKETS; bucketid++) { + u32 bsize = minu32(eq->nslots[r&1][bucketid], NSLOTS) >> (SLOTBITS-6); + binsizes[bsize]++; + } + for (u32 i=0; i < 65; i++) { +#ifdef HIST +// printf(" %d:%d", i, binsizes[i]); +#else +#ifdef SPARK + u32 sparks = binsizes[i] / SPARKSCALE; +#else + u32 sparks = 0; + for (u32 bs = binsizes[i]; bs; bs >>= 1) sparks++; + sparks = sparks * 7 / SPARKSCALE; +#endif +// printf("\342\226%c", '\201' + sparks); +#endif + } +// printf("\n"); +#endif + } +#endif + + struct htlayout { + htalloc hta; + u32 prevhashunits; + u32 nexthashunits; + u32 dunits; + u32 prevbo; + u32 nextbo; + }; + typedef struct htlayout htlayout; + + htlayout htlayout_new(equi *eq, u32 r) { + htlayout htl; + htl.hta = eq->hta; + htl.prevhashunits = 0; + htl.dunits = 0; + u32 nexthashbytes = hashsize(r); + htl.nexthashunits = hashwords(nexthashbytes); + htl.prevbo = 0; + htl.nextbo = htl.nexthashunits * sizeof(hashunit) - nexthashbytes; // 0-3 + if (r) { + u32 prevhashbytes = hashsize(r-1); + htl.prevhashunits = hashwords(prevhashbytes); + htl.prevbo = htl.prevhashunits * sizeof(hashunit) - prevhashbytes; // 0-3 + htl.dunits = htl.prevhashunits - htl.nexthashunits; + } + return htl; + } + u32 getxhash0(const htlayout *htl, const slot0* pslot) { +#if WN == 200 && RESTBITS == 4 + return pslot->hash->bytes[htl->prevbo] >> 4; +#elif WN == 200 && RESTBITS == 8 + return (pslot->hash->bytes[htl->prevbo] & 0xf) << 4 | pslot->hash->bytes[htl->prevbo+1] >> 4; +#elif WN == 200 && RESTBITS == 9 + return (pslot->hash->bytes[htl->prevbo] & 0x1f) << 4 | pslot->hash->bytes[htl->prevbo+1] >> 4; +#elif WN == 144 && RESTBITS == 4 + return pslot->hash->bytes[htl->prevbo] & 0xf; +#else +#error non implemented +#endif + } + u32 getxhash1(const htlayout *htl, const slot1* pslot) { +#if WN == 200 && RESTBITS == 4 + return pslot->hash->bytes[htl->prevbo] & 0xf; +#elif WN == 200 && RESTBITS == 8 + return pslot->hash->bytes[htl->prevbo]; +#elif WN == 200 && RESTBITS == 9 + return (pslot->hash->bytes[htl->prevbo]&1) << 8 | pslot->hash->bytes[htl->prevbo+1]; +#elif WN == 144 && RESTBITS == 4 + return pslot->hash->bytes[htl->prevbo] & 0xf; +#else +#error non implemented +#endif + } + bool htlayout_equal(const htlayout *htl, const hashunit *hash0, const hashunit *hash1) { + return hash0[htl->prevhashunits-1].word == hash1[htl->prevhashunits-1].word; + } + +#if RESTBITS <= 6 + typedef uchar xslot; +#else + typedef u16 xslot; +#endif + struct collisiondata { +#ifdef XBITMAP +#if NSLOTS > 64 +#error cant use XBITMAP with more than 64 slots +#endif + u64 xhashmap[NRESTS]; + u64 xmap; +#else + xslot nxhashslots[NRESTS]; + xslot xhashslots[NRESTS][XFULL]; + xslot *xx; + u32 n0; + u32 n1; +#endif + u32 s0; + }; + typedef struct collisiondata collisiondata; + + void collisiondata_clear(collisiondata *cd) { +#ifdef XBITMAP + memset(cd->xhashmap, 0, NRESTS * sizeof(u64)); +#else + memset(cd->nxhashslots, 0, NRESTS * sizeof(xslot)); +#endif + } + bool addslot(collisiondata *cd, u32 s1, u32 xh) { +#ifdef XBITMAP + xmap = xhashmap[xh]; + xhashmap[xh] |= (u64)1 << s1; + s0 = -1; + return true; +#else + cd->n1 = (u32)cd->nxhashslots[xh]++; + if (cd->n1 >= XFULL) + return false; + cd->xx = cd->xhashslots[xh]; + cd->xx[cd->n1] = s1; + cd->n0 = 0; + return true; +#endif + } + bool nextcollision(const collisiondata *cd) { +#ifdef XBITMAP + return cd->xmap != 0; +#else + return cd->n0 < cd->n1; +#endif + } + u32 slot(collisiondata *cd) { +#ifdef XBITMAP + const u32 ffs = __builtin_ffsll(cd->xmap); + s0 += ffs; cd->xmap >>= ffs; + return s0; +#else + return (u32)cd->xx[cd->n0++]; +#endif + } + + void equi_digit0(equi *eq, const u32 id) { + uchar hash[HASHOUT]; + BLAKE2bState* state; + htlayout htl = htlayout_new(eq, 0); + const u32 hashbytes = hashsize(0); + for (u32 block = id; block < NBLOCKS; block++) { + state = eq->blake2b_clone(eq->blake_ctx); + u32 leb = htole32(block); + eq->blake2b_update(state, (uchar *)&leb, sizeof(u32)); + eq->blake2b_finalize(state, hash, HASHOUT); + eq->blake2b_free(state); + // Avoid use-after-free and double-free + state = NULL; + + for (u32 i = 0; i> 4; +#elif BUCKBITS == 11 && RESTBITS == 9 + const u32 bucketid = ((u32)ph[0] << 3) | ph[1] >> 5; +#elif BUCKBITS == 20 && RESTBITS == 4 + const u32 bucketid = ((((u32)ph[0] << 8) | ph[1]) << 4) | ph[2] >> 4; +#elif BUCKBITS == 12 && RESTBITS == 4 + const u32 bucketid = ((u32)ph[0] << 4) | ph[1] >> 4; + const u32 xhash = ph[1] & 0xf; +#else +#error not implemented +#endif + const u32 slot = getslot(eq, 0, bucketid); + if (slot >= NSLOTS) { + eq->bfull++; + continue; + } + slot0 *s = &eq->hta.trees0[0][bucketid][slot]; + s->attr = tree_from_idx(block * HASHESPERBLAKE + i); + memcpy(s->hash->bytes+htl.nextbo, ph+WN/8-hashbytes, hashbytes); + } + } + } + + void equi_digitodd(equi *eq, const u32 r, const u32 id) { + htlayout htl = htlayout_new(eq, r); + collisiondata cd; + for (u32 bucketid=id; bucketid < NBUCKETS; bucketid++) { + collisiondata_clear(&cd); + slot0 *buck = htl.hta.trees0[(r-1)/2][bucketid]; // optimize by updating previous buck?! + u32 bsize = getnslots(eq, r-1, bucketid); // optimize by putting bucketsize with block?! + for (u32 s1 = 0; s1 < bsize; s1++) { + const slot0 *pslot1 = buck + s1; // optimize by updating previous pslot1?! + if (!addslot(&cd, s1, getxhash0(&htl, pslot1))) { + eq->xfull++; + continue; + } + for (; nextcollision(&cd); ) { + const u32 s0 = slot(&cd); + const slot0 *pslot0 = buck + s0; + if (htlayout_equal(&htl, pslot0->hash, pslot1->hash)) { + eq->hfull++; + continue; + } + u32 xorbucketid; + const uchar *bytes0 = pslot0->hash->bytes, *bytes1 = pslot1->hash->bytes; +#if WN == 200 && BUCKBITS == 12 && RESTBITS == 8 + xorbucketid = (((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) & 0xf) << 8) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]); +#elif WN == 200 && BUCKBITS == 11 && RESTBITS == 9 + xorbucketid = (((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) & 0xf) << 7) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 1; +#elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4 + xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4) + | (bytes0[htl.prevbo+3] ^ bytes1[htl.prevbo+3]) >> 4; +#elif WN == 96 && BUCKBITS == 12 && RESTBITS == 4 + xorbucketid = ((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 4) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4; +#else +#error not implemented +#endif + const u32 xorslot = getslot(eq, r, xorbucketid); + if (xorslot >= NSLOTS) { + eq->bfull++; + continue; + } + slot1 *xs = &htl.hta.trees1[r/2][xorbucketid][xorslot]; + xs->attr = tree_from_bid(bucketid, s0, s1); + for (u32 i=htl.dunits; i < htl.prevhashunits; i++) + xs->hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word; + } + } + } + } + + void equi_digiteven(equi *eq, const u32 r, const u32 id) { + htlayout htl = htlayout_new(eq, r); + collisiondata cd; + for (u32 bucketid=id; bucketid < NBUCKETS; bucketid++) { + collisiondata_clear(&cd); + slot1 *buck = htl.hta.trees1[(r-1)/2][bucketid]; // OPTIMIZE BY UPDATING PREVIOUS + u32 bsize = getnslots(eq, r-1, bucketid); + for (u32 s1 = 0; s1 < bsize; s1++) { + const slot1 *pslot1 = buck + s1; // OPTIMIZE BY UPDATING PREVIOUS + if (!addslot(&cd, s1, getxhash1(&htl, pslot1))) { + eq->xfull++; + continue; + } + for (; nextcollision(&cd); ) { + const u32 s0 = slot(&cd); + const slot1 *pslot0 = buck + s0; + if (htlayout_equal(&htl, pslot0->hash, pslot1->hash)) { + eq->hfull++; + continue; + } + u32 xorbucketid; + const uchar *bytes0 = pslot0->hash->bytes, *bytes1 = pslot1->hash->bytes; +#if WN == 200 && BUCKBITS == 12 && RESTBITS == 8 + xorbucketid = ((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 4) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4; +#elif WN == 200 && BUCKBITS == 11 && RESTBITS == 9 + xorbucketid = ((u32)(bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) << 3) + | (bytes0[htl.prevbo+3] ^ bytes1[htl.prevbo+3]) >> 5; +#elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4 + xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4) + | (bytes0[htl.prevbo+3] ^ bytes1[htl.prevbo+3]) >> 4; +#elif WN == 96 && BUCKBITS == 12 && RESTBITS == 4 + xorbucketid = ((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 4) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4; +#else +#error not implemented +#endif + const u32 xorslot = getslot(eq, r, xorbucketid); + if (xorslot >= NSLOTS) { + eq->bfull++; + continue; + } + slot0 *xs = &htl.hta.trees0[r/2][xorbucketid][xorslot]; + xs->attr = tree_from_bid(bucketid, s0, s1); + for (u32 i=htl.dunits; i < htl.prevhashunits; i++) + xs->hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word; + } + } + } + } + + void equi_digitK(equi *eq, const u32 id) { + collisiondata cd; + htlayout htl = htlayout_new(eq, WK); +u32 nc = 0; + for (u32 bucketid = id; bucketid < NBUCKETS; bucketid++) { + collisiondata_clear(&cd); + slot0 *buck = htl.hta.trees0[(WK-1)/2][bucketid]; + u32 bsize = getnslots(eq, WK-1, bucketid); + for (u32 s1 = 0; s1 < bsize; s1++) { + const slot0 *pslot1 = buck + s1; + if (!addslot(&cd, s1, getxhash0(&htl, pslot1))) // assume WK odd + continue; + for (; nextcollision(&cd); ) { + const u32 s0 = slot(&cd); + if (htlayout_equal(&htl, buck[s0].hash, pslot1->hash)) +nc++, candidate(eq, tree_from_bid(bucketid, s0, s1)); + } + } + } +//printf(" %d candidates\n", nc); + } + + size_t equi_nsols(const equi *eq) { + return eq->nsols; + } + proof *equi_sols(const equi *eq) { + return eq->sols; + } + +typedef struct { + u32 id; + equi *eq; +} thread_ctx; + +void *worker(void *vp) { + thread_ctx *tp = (thread_ctx *)vp; + equi *eq = tp->eq; + +// if (tp->id == 0) +// printf("Digit 0\n"); + if (tp->id == 0) { + equi_clearslots(eq); + } + equi_digit0(eq, tp->id); + if (tp->id == 0) { + equi_clearslots(eq); +#ifdef EQUIHASH_SHOW_BUCKET_SIZES + showbsizes(eq, 0); +#endif + } + for (u32 r = 1; r < WK; r++) { +// if (tp->id == 0) +// printf("Digit %d", r); + r&1 ? equi_digitodd(eq, r, tp->id) : equi_digiteven(eq, r, tp->id); + if (tp->id == 0) { +// printf(" x%d b%d h%d\n", eq->xfull, eq->bfull, eq->hfull); + equi_clearslots(eq); +#ifdef EQUIHASH_SHOW_BUCKET_SIZES + showbsizes(eq, r); +#endif + } + } +// if (tp->id == 0) +// printf("Digit %d\n", WK); + equi_digitK(eq, tp->id); + return 0; +} + +#endif // ZCASH_POW_TROMP_EQUI_MINER_H diff --git a/components/equihash/tromp/portable_endian.h b/components/equihash/tromp/portable_endian.h new file mode 100644 index 0000000000..4a71ce7a7a --- /dev/null +++ b/components/equihash/tromp/portable_endian.h @@ -0,0 +1,130 @@ +// +// endian.h +// +// https://gist.github.com/panzi/6856583 +// +// I, Mathias Panzenböck, place this file hereby into the public domain. Use +// it at your own risk for whatever you like. In case there are +// jurisdictions that don't support putting things in the public domain you +// can also consider it to be "dual licensed" under the BSD, MIT and Apache +// licenses, if you want to. This code is trivial anyway. Consider it an +// example on how to get the endian conversion functions on different +// platforms. + +// Downloaded from https://raw.githubusercontent.com/mikepb/endian.h/master/endian.h +// on 12 January 2024. + +#ifndef PORTABLE_ENDIAN_H__ +#define PORTABLE_ENDIAN_H__ + +#if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__) + +# define __WINDOWS__ + +#endif + +#if defined(__linux__) || defined(__CYGWIN__) + +# include + +#elif defined(__APPLE__) + +# include + +# define htobe16(x) OSSwapHostToBigInt16(x) +# define htole16(x) OSSwapHostToLittleInt16(x) +# define be16toh(x) OSSwapBigToHostInt16(x) +# define le16toh(x) OSSwapLittleToHostInt16(x) + +# define htobe32(x) OSSwapHostToBigInt32(x) +# define htole32(x) OSSwapHostToLittleInt32(x) +# define be32toh(x) OSSwapBigToHostInt32(x) +# define le32toh(x) OSSwapLittleToHostInt32(x) + +# define htobe64(x) OSSwapHostToBigInt64(x) +# define htole64(x) OSSwapHostToLittleInt64(x) +# define be64toh(x) OSSwapBigToHostInt64(x) +# define le64toh(x) OSSwapLittleToHostInt64(x) + +# define __BYTE_ORDER BYTE_ORDER +# define __BIG_ENDIAN BIG_ENDIAN +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# define __PDP_ENDIAN PDP_ENDIAN + +#elif defined(__OpenBSD__) + +# include + +#elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) + +# include + +# define be16toh(x) betoh16(x) +# define le16toh(x) letoh16(x) + +# define be32toh(x) betoh32(x) +# define le32toh(x) letoh32(x) + +# define be64toh(x) betoh64(x) +# define le64toh(x) letoh64(x) + +#elif defined(__WINDOWS__) + +# include + +// Not available in librustzcash CI +//# include + +# if BYTE_ORDER == LITTLE_ENDIAN + +# define htobe16(x) htons(x) +# define htole16(x) (x) +# define be16toh(x) ntohs(x) +# define le16toh(x) (x) + +# define htobe32(x) htonl(x) +# define htole32(x) (x) +# define be32toh(x) ntohl(x) +# define le32toh(x) (x) + +# define htobe64(x) htonll(x) +# define htole64(x) (x) +# define be64toh(x) ntohll(x) +# define le64toh(x) (x) + +# elif BYTE_ORDER == BIG_ENDIAN + + /* that would be xbox 360 */ +# define htobe16(x) (x) +# define htole16(x) __builtin_bswap16(x) +# define be16toh(x) (x) +# define le16toh(x) __builtin_bswap16(x) + +# define htobe32(x) (x) +# define htole32(x) __builtin_bswap32(x) +# define be32toh(x) (x) +# define le32toh(x) __builtin_bswap32(x) + +# define htobe64(x) (x) +# define htole64(x) __builtin_bswap64(x) +# define be64toh(x) (x) +# define le64toh(x) __builtin_bswap64(x) + +# else + +# error byte order not supported + +# endif + +# define __BYTE_ORDER BYTE_ORDER +# define __BIG_ENDIAN BIG_ENDIAN +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# define __PDP_ENDIAN PDP_ENDIAN + +#else + +# error platform not supported + +#endif + +#endif