ingonyama-zk · danny-shterman · Jan 15, 2025 · Jan 20, 2025 · Jan 23, 2025
diff --git a/docs/docs/icicle/primitives/hash.md b/docs/docs/icicle/primitives/hash.md
@@ -62,17 +62,13 @@ It is an improved version of the original [Poseidon](https://eprint.iacr.org/201
 
 The optional `domain_tag` pointer parameter enables domain separation, allowing isolation of hash outputs across different contexts or applications.
 
-:::info
-
 The supported values of state size ***t*** as defined in [eprint 2023/323](https://eprint.iacr.org/2023/323.pdf) are 2, 3, 4, 8, 12, 16, 20 and 24. Note that ***t*** sizes 8, 12, 16, 20 and 24 are supported only for small fields (babybear and m31).
 
-:::
-
-:::info
-
 The S box power alpha, number of full rounds and partial rounds, rounds constants, MDS matrix, and partial matrix for each field and ***t*** can be found in this [folder](https://github.com/ingonyama-zk/icicle/tree/9b1506cda9eab30fc6a8d0a338e2cfab877402f7/icicle/include/icicle/hash/poseidon2_constants/constants).
 
-:::
+There are two modes for using the Poseidon2 hash - sponge function and non-sponge (merkle tree) function. The key difference between these modes is their execution pattern. The sponge function is inherently serial (each hash must wait for the previous hash to complete before starting its own process), while the non-sponge function (which consists of multiple independent hashes that don't share inputs) runs in parallel using GPU threads, with the number of threads equal to config.batch.
+
+The hash function automatically chooses between these modes based on the input size. It runs in sponge mode if the input size (including the domain_tag if present) is greater than the single hash width (in this case, config.batch should be set to one). Otherwise, it uses the non-sponge mode.
 
 In the current version the padding is not supported and should be performed by the user.
 

diff --git a/icicle/backend/cpu/src/hash/cpu_poseidon2.cpp b/icicle/backend/cpu/src/hash/cpu_poseidon2.cpp
@@ -147,7 +147,7 @@ namespace icicle {
         ICICLE_LOG_ERROR
           << "cpu_poseidon2_init_default_constants: T (width) must be one of [2, 3, 4, 8, 12, 16, 20, 24]\n";
         return eIcicleError::INVALID_ARGUMENT;
-      } // switch (T) {
+      }                                              // switch (T) {
       if (full_rounds == 0 && partial_rounds == 0) { // All arrays are empty in this case.
         continue;
       }
@@ -196,24 +196,173 @@ namespace icicle {
 
     // For merkle tree size should be equal to the arity of a single hasher multiplier by sizeof(S).
     // For sponge function it could be any number.
+    // Size parameter here is in bytes.
     eIcicleError hash(const std::byte* input, uint64_t size, const HashConfig& config, std::byte* output) const override
     {
-      unsigned int arity = m_use_domain_tag ? m_t - 1 : m_t;
-
-      // Currently sponge and padding functionalities are not supported.
-      if (size != arity * sizeof(S)) {
+      const unsigned arity = m_use_domain_tag ? m_t - 1 : m_t;
+      bool is_sponge = false;
+      int input_size_in_scalars = size / sizeof(S);
+      if (input_size_in_scalars > (m_use_domain_tag ? m_t - 1 : m_t)) {    // Sponge function. Check input size granularity.
+        is_sponge = true;
+        // Capacity width (in scalars) = 1.
+        // Output width (in scalars) = 1.
+        if ((m_use_domain_tag ? input_size_in_scalars : input_size_in_scalars - 1) % (m_t - 1) != 0) {
+          ICICLE_LOG_ERROR
+            << "Padding isn't supported for sponge function hash. The following should be true: ((m_use_domain_tag ? size : size-1) % (m_t-1) != 0).\n";
+          return eIcicleError::INVALID_ARGUMENT;
+        }
+        if (config.batch != 1) {
+          ICICLE_LOG_ERROR
+            << "The only suppoorted value of config.batch for sponge functions is 1.\n";
+          return eIcicleError::INVALID_ARGUMENT;
+        }
+      } 
+      else if (input_size_in_scalars < (m_use_domain_tag ? m_t - 1 : m_t)) {
         ICICLE_LOG_ERROR
-          << "Sponge function still isn't supported. The following should be true: (size == T) but it is not.\n";
+          << "Padding isn't supported for sponge function hash. The following should be true: ((use_domain_tag ? size + 1 : size) % T = 0).\n";
         return eIcicleError::INVALID_ARGUMENT;
       }
-      // Call hash_single config.batch times.
-      for (int batch_hash_idx = 0; batch_hash_idx < config.batch; batch_hash_idx++) {
-        eIcicleError err = hash_single(input, output);
+
+      const unsigned int T = m_t;
+      bool is_unsupported_T_for_this_field = poseidon2_constants[T].nof_upper_full_rounds == 0;
+      if (is_unsupported_T_for_this_field) {
+        ICICLE_LOG_ERROR << "Unsupported poseidon width (t = " << T << ") for this field! Planned for next version";
+        return eIcicleError::API_NOT_IMPLEMENTED;
+      }
+
+      int alpha = poseidon2_constants[T].alpha;
+      int nof_upper_full_rounds = poseidon2_constants[T].nof_upper_full_rounds;
+      int nof_partial_rounds = poseidon2_constants[T].nof_partial_rounds;
+      int nof_bottom_full_rounds = poseidon2_constants[T].nof_bottom_full_rounds;
+      S* rounds_constants = poseidon2_constants[T].rounds_constants;
+      S* mds_matrix = poseidon2_constants[T].mds_matrix;
+      S* partial_matrix_diagonal_m1 = poseidon2_constants[T].partial_matrix_diagonal_m1;
+
+      // Allocate temporary memory for intermediate calcs and in order not to change the input.
+      // int sponge_nof_hashers = m_use_domain_tag ? (input_size_in_scalars / arity) :  ((input_size_in_scalars - 1) / (arity - 1));
+      // int tmp_fields_nof_scalars = is_sponge ? (T * sponge_nof_hashers) : (T * config.batch);
+      // S* tmp_fields = new S[tmp_fields_nof_scalars];
+      S* tmp_fields;
+      int sponge_nof_hashers;
+      const S* in_fields = (S*)(input);
+      int padding_size = 0;
+      S* padding;
+      if (is_sponge) {
+        if (input_size_in_scalars <= T) {
+          sponge_nof_hashers = 1;
+          padding_size = T - (input_size_in_scalars + (m_use_domain_tag == 1));
+        } else {
+          sponge_nof_hashers = (input_size_in_scalars + (m_use_domain_tag == 1) + 1 /* for domain_tag */ - T + (T - 2)) / (T - 1);
+          padding_size = (input_size_in_scalars + (m_use_domain_tag == 1) - T) % (T - 1);
+        }
+        if (padding_size > 0) {   // Fill padding array with 1,0,0,...
+          padding = new S[padding_size];
+          padding[0] = S::from(1);
+          for (int i = 1; i < padding_size; i++) {
+            padding[i] = S::from(0);
+          }
+        }
+        tmp_fields = new S[T * sponge_nof_hashers];
+        if (m_use_domain_tag) {      
+          // Domain tag exists only for the first hasher. For the rest of the hashers this
+          // input is undefined at this stage and its value will be set later.
+          // tmp_fields = {{dt, in0}, {undef, in1}, {undef, in2}, etc.}
+          memcpy(tmp_fields, &m_domain_tag, sizeof(S));   
+        }
+        else {
+          // tmp_fields = {{in0 (T inputs)}, {undef, in1 (T-1 inputs)}, {under, in2 (T-1 inputs)}, etc.}
+          memcpy(tmp_fields, in_fields, T * sizeof(S)); // 1st hasher uses T inputs.
+          in_fields += T;
+          tmp_fields += T;
+        }
+        for (int hasher_idx = 0; hasher_idx < sponge_nof_hashers; hasher_idx++) {
+          if (hasher_idx == sponge_nof_hashers-1 && padding_size > 0) {
+            // Last hasher in the chain. Take care of padding.
+            if (hasher_idx == 0) {
+              memcpy(tmp_fields + 1, in_fields, (T - padding_size) * sizeof(S));
+              memcpy(tmp_fields + 1 + T - padding_size, in_fields, padding_size * sizeof(S));
+            }
+            else {
+              memcpy(tmp_fields + 1, in_fields, (T - padding_size - 1) * sizeof(S));
+              memcpy(tmp_fields + 1 + T - padding_size - 1, in_fields, padding_size * sizeof(S));
+            }
+          }
+          else {    // Not a last hasher in the chain. There is no padding.
+            memcpy(tmp_fields + 1, in_fields, (T - 1) * sizeof(S));
+          }
+          in_fields += (T - 1);
+          tmp_fields += T;
+        }
+        tmp_fields -= T * sponge_nof_hashers;
+      }     // if (is_sponge) {
+      else {    // Not a sponge function. The is no padding.
+        // Input of each hash should have domain tag at its input.
+        // tmp_fields = {{dt, in0 (T-1 inputs)}, {dt, in1 (T-1 inputs)}, {dt, in2 (T-1 inputs)}, etc.}
+        tmp_fields = new S[T * config.batch];
+        if (m_use_domain_tag) {      
+          for (int batch_idx = 0; batch_idx < config.batch; batch_idx++) {
+            memcpy(tmp_fields, &m_domain_tag, sizeof(S));
+            memcpy(tmp_fields + 1, in_fields, (T - 1) * sizeof(S));
+            in_fields += (T - 1);
+            tmp_fields += T;
+          }
+          tmp_fields -= T * config.batch;          
+        }
+        else {
+          // tmp_fields = {{in0 (T inputs)}, {in1 (T inputs)}, {in2 (T inputs)}, etc.}
+          memcpy(tmp_fields, in_fields, T * config.batch * sizeof(S));
+        }
+      }
+
+      // Hashes processing.
+      if (is_sponge) {
+        S* tmp_fields_tmp_ptr;    // This pointer is used to assist in addition of the hasher outputs
+                                  // with new inputs.
+        // Call hash_single for hasher[0]  
+        eIcicleError err = hash_single(tmp_fields /* input */, tmp_fields /* output */,
+          alpha, nof_upper_full_rounds, nof_partial_rounds, nof_bottom_full_rounds,
+          rounds_constants, mds_matrix, partial_matrix_diagonal_m1);     
         if (err != eIcicleError::SUCCESS) return err;
-        input += arity * sizeof(S);
-        output += sizeof(S);
+        tmp_fields[T] = tmp_fields[0];    // Current first output is an input to the next hasher.
+        tmp_fields_tmp_ptr = tmp_fields;  // Save current pointer.
+        tmp_fields += T;
+        for (int hasher_idx = 1; hasher_idx < sponge_nof_hashers; hasher_idx++) {      
+          // The first output of the prev hasher is the first input of the current hasher.
+          // The T-1 new inputs of the current hasher should be added to the T-1 outputs of the
+          // prev hasher (starting fom index 1).
+          for (int i = 1; i < T; i++) {
+            tmp_fields[i] = tmp_fields_tmp_ptr[i] + tmp_fields[i];
+          }
+          tmp_fields_tmp_ptr = tmp_fields;    // Save current pointer.
+          eIcicleError err = hash_single(tmp_fields /* input */, tmp_fields /* output */,
+            alpha, nof_upper_full_rounds, nof_partial_rounds, nof_bottom_full_rounds,
+            rounds_constants, mds_matrix, partial_matrix_diagonal_m1);
+          if (err != eIcicleError::SUCCESS) return err;
+          if (hasher_idx != sponge_nof_hashers - 1)   // Not to do in the last loop to prevent mem leak.
+            tmp_fields[T] = tmp_fields[0];    // Fill first scalar of the input to the next hasher.
+          tmp_fields += T;  // Proceed to the next hasher.
+        }   // for (int hasher_idx = 1; hasher_idx < sponge_nof_hashers; hasher_idx++) {   
+        tmp_fields -= T;    // Rollback to the last hasher output.
+        memcpy(output, (std::byte*)(&tmp_fields[1]), sizeof(S));
+        tmp_fields -= T * (sponge_nof_hashers - 1);
+      }
+      else {    // Not a sponge function.
+        for (int batch_hash_idx = 0; batch_hash_idx < config.batch; batch_hash_idx++) {
+          eIcicleError err = hash_single(tmp_fields /* input */, tmp_fields /* output */,
+            alpha, nof_upper_full_rounds, nof_partial_rounds, nof_bottom_full_rounds,
+            rounds_constants, mds_matrix, partial_matrix_diagonal_m1);
+          if (err != eIcicleError::SUCCESS) return err;
+          memcpy(output, (std::byte*)(&tmp_fields[1]), sizeof(S));
+          tmp_fields += T;
+          output += sizeof(S);
+        }
+        tmp_fields -= T * config.batch;
       }
 
+      delete[] tmp_fields;
+      if (padding_size != 0) delete[] padding;
+      tmp_fields = nullptr;
+
       return eIcicleError::SUCCESS;
     }
 
@@ -236,40 +385,14 @@ namespace icicle {
     // // DEBUG end
 
     // This function performs a single hash according to parameters in the poseidon2_constants[] struct.
-    eIcicleError hash_single(const std::byte* input, std::byte* output) const
+    // eIcicleError hash_single(const std::byte* input, std::byte* output) const
+    eIcicleError hash_single(S* tmp_fields, S* hasher_output, int alpha, int nof_upper_full_rounds,
+      int nof_partial_rounds, int nof_bottom_full_rounds,
+      S* rounds_constants, S* mds_matrix, S* partial_matrix_diagonal_m1) const
     {
       const unsigned int T = m_t;
-      bool is_unsupported_T_for_this_field = poseidon2_constants[T].nof_upper_full_rounds == 0;
-      if (is_unsupported_T_for_this_field) {
-        ICICLE_LOG_ERROR << "Unsupported poseidon width (t=" << T << ") for this field! Planned for next version";
-        return eIcicleError::API_NOT_IMPLEMENTED;
-      }
-
-      unsigned int alpha = poseidon2_constants[T].alpha;
-      unsigned int nof_upper_full_rounds = poseidon2_constants[T].nof_upper_full_rounds;
-      unsigned int nof_partial_rounds = poseidon2_constants[T].nof_partial_rounds;
-      unsigned int nof_bottom_full_rounds = poseidon2_constants[T].nof_bottom_full_rounds;
-      // S* rounds_constants = poseidon2_constants[T].rounds_constants;
-      S* rounds_constants = poseidon2_constants[T].rounds_constants;
-      S* mds_matrix = poseidon2_constants[T].mds_matrix;
-      // S* partial_matrix_diagonal = poseidon2_constants[T].partial_matrix_diagonal;
-      S* partial_matrix_diagonal_m1 = poseidon2_constants[T].partial_matrix_diagonal_m1;
-      // Allocate temporary memory for intermediate calcs.
-      S* tmp_fields = new S[T];
-      // Casting from bytes to scalar.
-      const S* in_fields = (S*)(input);
-      // Copy input scalar to the output (as a temp storage) to be used in the rounds.
-      // *tmp_fields are used as a temp storage during the calculations in this function.
-      if (m_use_domain_tag) {
-        // in that case we hash [domain_tag, t-1 field elements]
-        memcpy(tmp_fields, &m_domain_tag, sizeof(S));
-        memcpy(tmp_fields + 1, in_fields, (T - 1) * sizeof(S));
-      } else {
-        // in that case we hash [t field elements]
-        memcpy(tmp_fields, in_fields, T * sizeof(S));
-      }
 
-      // Pre-rounds full maatrix multiplication.
+      // Pre-rounds full matrix multiplication.
       full_matrix_mul_by_vector(tmp_fields, mds_matrix, tmp_fields);
 
       // Upper full rounds.
@@ -288,10 +411,11 @@ namespace icicle {
       // Bottom full rounds.
       full_rounds(nof_bottom_full_rounds, tmp_fields, rounds_constants);
 
-      memcpy(output, (std::byte*)(&tmp_fields[1]), sizeof(S));
+      memcpy(hasher_output, (std::byte*)(tmp_fields), T * sizeof(S));
+      // memcpy(output, (std::byte*)(&tmp_fields[1]), sizeof(S));
 
-      delete[] tmp_fields;
-      tmp_fields = nullptr;
+      // delete[] tmp_fields;
+      // tmp_fields = nullptr;
 
       return eIcicleError::SUCCESS;
     } // eIcicleError hash_single(const std::byte* input, std::byte* output) const