Summarize Sanctuary results (#1179)

This PR adds a result output when running in CI for `sanctuary` tests, which are then passed as job outputs for each shard to the `combinedResults` job that renders them in a table. We want to use this to get a baseline and a benchmark for both correctness and performance of the bindings rules.
NomicFoundation · Dec 13, 2024 · d497d73 · d497d73
1 parent 5cec87c
commit d497d73
Show file tree

Hide file tree

Showing 4 changed files with 225 additions and 5 deletions.
diff --git a/.github/workflows/sanctuary.yml b/.github/workflows/sanctuary.yml
@@ -22,8 +22,25 @@ on:
         default: false
 
 jobs:
-  sanctuary:
+  singleShard:
     runs-on: "ubuntu-22.04" # _SLANG_DEV_CONTAINER_BASE_IMAGE_ (keep in sync)
+    outputs:
+      __SLANG_SANCTUARY_SHARD_RESULTS__0: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__0 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__1: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__1 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__2: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__2 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__3: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__3 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__4: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__4 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__5: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__5 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__6: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__6 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__7: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__7 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__8: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__8 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__9: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__9 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__10: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__10 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__11: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__11 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__12: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__12 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__13: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__13 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__14: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__14 }}"
+      __SLANG_SANCTUARY_SHARD_RESULTS__15: "${{ steps.output-shard-results.outputs.__SLANG_SANCTUARY_SHARD_RESULTS__15 }}"
 
     strategy:
       fail-fast: false # Continue running all shards even if some fail.
@@ -59,4 +76,28 @@ jobs:
       - name: "infra run solidity_testing_sanctuary"
         uses: "./.github/actions/devcontainer/run"
         with:
-          runCmd: "./scripts/bin/infra run --release --bin solidity_testing_sanctuary -- --shards-count ${{ env.SHARDS_COUNT }} --shard-index ${{ matrix.shard_index }} ${{ inputs.check_bindings == true && '--check-bindings' || '' }} ${{ inputs.chain }} ${{ inputs.network }}"
+          runCmd: "./scripts/bin/infra run --release --bin solidity_testing_sanctuary -- test --shards-count ${{ env.SHARDS_COUNT }} --shard-index ${{ matrix.shard_index }} ${{ inputs.check_bindings == true && '--check-bindings' || '' }} ${{ inputs.chain }} ${{ inputs.network }}"
+
+      - name: "Write shard results to output"
+        if: "!cancelled()"
+        id: "output-shard-results"
+        run: 'echo "__SLANG_SANCTUARY_SHARD_RESULTS__${{ matrix.shard_index }}=$(cat target/__SLANG_SANCTUARY_SHARD_RESULTS__.json)" >> "$GITHUB_OUTPUT"'
+
+  combinedResults:
+    runs-on: "ubuntu-22.04" # _SLANG_DEV_CONTAINER_BASE_IMAGE_ (keep in sync)
+    needs: "singleShard"
+    if: "!cancelled()"
+    steps:
+      - name: "Checkout Repository"
+        uses: "actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683"
+
+      - name: "Restore Cache"
+        uses: "./.github/actions/cache/restore"
+
+      - name: "Output shards results"
+        run: "echo '${{ toJSON(needs.singleShard.outputs) }}' > __SLANG_SANCTUARY_MATRIX_RESULTS__.json"
+
+      - name: "Show combined results"
+        uses: "./.github/actions/devcontainer/run"
+        with:
+          runCmd: "./scripts/bin/infra run --bin solidity_testing_sanctuary -- show-combined-results __SLANG_SANCTUARY_MATRIX_RESULTS__.json"
diff --git a/crates/solidity/testing/sanctuary/src/events.rs b/crates/solidity/testing/sanctuary/src/events.rs
@@ -5,6 +5,7 @@ use indicatif::ProgressBar;
 use infra_utils::github::GitHub;
 
 use crate::reporting::Reporter;
+use crate::results::ShardResults;
 
 const MAX_PRINTED_FAILURES: u64 = 1000;
 
@@ -133,4 +134,15 @@ impl Events {
     pub fn trace(&self, message: impl AsRef<str>) {
         self.reporter.println(message);
     }
+
+    pub fn to_results(&self) -> ShardResults {
+        ShardResults {
+            source_files: self.source_files.position(),
+            passed: self.passed.position(),
+            failed: self.failed.position(),
+            incompatible: self.incompatible.position(),
+            not_found: self.not_found.position(),
+            elapsed: self.all_directories.elapsed(),
+        }
+    }
 }
diff --git a/crates/solidity/testing/sanctuary/src/main.rs b/crates/solidity/testing/sanctuary/src/main.rs
@@ -2,13 +2,18 @@ mod chains;
 mod datasets;
 mod events;
 mod reporting;
+mod results;
 mod tests;
 
+use std::path::PathBuf;
+
 use anyhow::Result;
-use clap::Parser;
+use clap::{Parser, Subcommand};
+use infra_utils::github::GitHub;
 use infra_utils::paths::PathExtensions;
 use infra_utils::terminal::{NumbersDefaultDisplay, Terminal};
 use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
+use results::{display_all_results, AllResults};
 
 use crate::chains::Chain;
 use crate::datasets::{DataSet, SourceFile};
@@ -17,6 +22,18 @@ use crate::tests::{run_test, select_tests, TestSelection};
 
 #[derive(Debug, Parser)]
 struct Cli {
+    #[command(subcommand)]
+    command: Commands,
+}
+
+#[derive(Subcommand, Debug)]
+enum Commands {
+    Test(TestCommand),
+    ShowCombinedResults(ShowCombinedResultsCommand),
+}
+
+#[derive(Debug, Parser)]
+struct TestCommand {
     /// Chain and sub-network to run against.
     #[command(subcommand)]
     chain: Chain,
@@ -44,13 +61,29 @@ struct ShardingOptions {
     shard_index: Option<usize>,
 }
 
+#[derive(Debug, Parser)]
+struct ShowCombinedResultsCommand {
+    results_file: PathBuf,
+}
+
 fn main() -> Result<()> {
-    let Cli {
+    let Cli { command } = Cli::parse();
+
+    match command {
+        Commands::Test(test_command) => run_test_command(test_command),
+        Commands::ShowCombinedResults(show_command) => {
+            run_show_combined_results_command(show_command)
+        }
+    }
+}
+
+fn run_test_command(command: TestCommand) -> Result<()> {
+    let TestCommand {
         chain,
         sharding_options,
         trace,
         check_bindings,
-    } = Cli::parse();
+    } = command;
 
     Terminal::step(format!(
         "initialize {chain}/{network}",
@@ -93,6 +126,16 @@ fn main() -> Result<()> {
         events.finish_directory();
     }
 
+    if GitHub::is_running_in_ci() {
+        let output_path = PathBuf::from("target").join("__SLANG_SANCTUARY_SHARD_RESULTS__.json");
+        let results = events.to_results();
+        let value = serde_json::to_string(&results)?;
+
+        std::fs::create_dir_all(output_path.parent().unwrap())?;
+        output_path.write_string(value)?;
+        println!("Wrote results to {output_path:?}");
+    }
+
     let failure_count = events.failure_count();
     if failure_count > 0 {
         println!();
@@ -131,6 +174,15 @@ fn run_in_parallel(files: &Vec<SourceFile>, events: &Events, check_bindings: boo
     .try_for_each(|file| run_test(file, events, check_bindings))
 }
 
+fn run_show_combined_results_command(command: ShowCombinedResultsCommand) -> Result<()> {
+    let ShowCombinedResultsCommand { results_file } = command;
+
+    let contents = results_file.read_to_string()?;
+    let all_results: AllResults = serde_json::from_str(&contents)?;
+    display_all_results(&all_results);
+    Ok(())
+}
+
 #[test]
 fn verify_clap_cli() {
     // Catches problems earlier in the development cycle:

diff --git a/crates/solidity/testing/sanctuary/src/results.rs b/crates/solidity/testing/sanctuary/src/results.rs
@@ -0,0 +1,115 @@
+use std::collections::BTreeMap;
+use std::time::Duration;
+
+use indicatif::{FormattedDuration, HumanCount};
+use serde::de::{Error, Visitor};
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Default, Serialize, Deserialize)]
+pub struct ShardResults {
+    pub source_files: u64,
+    pub passed: u64,
+    pub failed: u64,
+    pub incompatible: u64,
+    pub not_found: u64,
+    pub elapsed: Duration,
+}
+
+#[derive(Debug)]
+pub struct AllResults {
+    pub shards: BTreeMap<usize, ShardResults>,
+}
+
+impl<'de> Deserialize<'de> for AllResults {
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        deserializer.deserialize_map(AllResultsVisitor {})
+    }
+}
+
+struct AllResultsVisitor {}
+
+impl<'de> Visitor<'de> for AllResultsVisitor {
+    type Value = AllResults;
+
+    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        formatter.write_str("a results map")
+    }
+
+    fn visit_map<M>(self, mut access: M) -> std::result::Result<Self::Value, M::Error>
+    where
+        M: serde::de::MapAccess<'de>,
+    {
+        use serde::de::Unexpected;
+
+        let mut shards: BTreeMap<usize, ShardResults> = BTreeMap::new();
+        while let Some((key, value)) = access.next_entry::<String, String>()? {
+            let shard_index = key
+                .strip_prefix("__SLANG_SANCTUARY_SHARD_RESULTS__")
+                .ok_or(Error::invalid_value(
+                    Unexpected::Str(&key),
+                    &"a string prefixed with __SLANG_SANCTUARY_SHARD_RESULTS__",
+                ))?
+                .parse()
+                .map_err(|_| {
+                    Error::invalid_value(Unexpected::Str(&key), &"a positive shard index")
+                })?;
+            let shard_results = serde_json::from_str(&value).map_err(|_| {
+                Error::invalid_value(
+                    Unexpected::Str(&value),
+                    &"a JSON string with the shard results",
+                )
+            })?;
+            shards.insert(shard_index, shard_results);
+        }
+
+        Ok(AllResults { shards })
+    }
+}
+
+pub fn display_all_results(all_results: &AllResults) {
+    let mut totals = ShardResults::default();
+    println!("Shard ID | Source files |       Passed |       Failed | Incompatible |    Not found | Elapsed");
+    println!("------------------------------------------------------------------------------------------------");
+    for (shard_index, shard_results) in &all_results.shards {
+        println!(
+            "{shard_index:<8} | \
+             {source_files:>12} | \
+             {passed:>12} | \
+             {failed:>12} | \
+             {incompatible:>12} | \
+             {not_found:>12} | \
+             {elapsed}",
+            source_files = format!("{}", HumanCount(shard_results.source_files)),
+            passed = format!("{}", HumanCount(shard_results.passed)),
+            failed = format!("{}", HumanCount(shard_results.failed)),
+            incompatible = format!("{}", HumanCount(shard_results.incompatible)),
+            not_found = format!("{}", HumanCount(shard_results.not_found)),
+            elapsed = FormattedDuration(shard_results.elapsed),
+        );
+        totals.source_files += shard_results.source_files;
+        totals.passed += shard_results.passed;
+        totals.failed += shard_results.failed;
+        totals.incompatible += shard_results.incompatible;
+        totals.not_found += shard_results.not_found;
+        totals.elapsed += shard_results.elapsed;
+    }
+    println!("------------------------------------------------------------------------------------------------");
+    println!(
+        "TOTALS   | \
+         {source_files:>12} | \
+         {passed:>12} | \
+         {failed:>12} | \
+         {incompatible:>12} | \
+         {not_found:>12} | \
+         {elapsed}",
+        source_files = format!("{}", HumanCount(totals.source_files)),
+        passed = format!("{}", HumanCount(totals.passed)),
+        failed = format!("{}", HumanCount(totals.failed)),
+        incompatible = format!("{}", HumanCount(totals.incompatible)),
+        not_found = format!("{}", HumanCount(totals.not_found)),
+        elapsed = FormattedDuration(totals.elapsed),
+    );
+}