From bf890cb9b7783b724035edafb5d65708bf661231 Mon Sep 17 00:00:00 2001
From: Randy Stauner <randy.stauner@shopify.com>
Date: Mon, 13 Jan 2025 09:47:35 -0700
Subject: [PATCH 1/4] Remove old, unused perf tripwire code

---
 continuous_reporting/benchmark_and_update.rb  | 272 ------------------
 .../generate_and_upload_reports.rb            |   4 +-
 .../gh_tasks/run_benchmarks.sh                |   2 +-
 .../reports/memory_details_report.rb          |   7 +-
 .../reports/speed_details_report.rb           |  19 +-
 5 files changed, 5 insertions(+), 299 deletions(-)

diff --git a/continuous_reporting/benchmark_and_update.rb b/continuous_reporting/benchmark_and_update.rb
index 1df0058d9..d17b7a70a 100755
--- a/continuous_reporting/benchmark_and_update.rb
+++ b/continuous_reporting/benchmark_and_update.rb
@@ -9,34 +9,6 @@
 require "optparse"
 require 'rbconfig'
 
-# TODO: should the benchmark-run and perf-check parts of this script be separated? Probably.
-
-# This is intended to be the top-level script for running benchmarks, reporting on them
-# and checking performance.
-
-# The tolerances below are for detecting big drops in benchmark performance. So far I haven't
-# had good luck with that -- it's either too sensitive and files spurious bugs a lot, or it's
-# too insensitive and doesn't notice real bugs. Right now, auto-filing bugs is turned off.
-
-# The STDDEV_TOLERANCE is what multiple of the standard deviation it's okay to drop on
-# a given run. That effectively determines the false-positive rate since we're comparing samples
-# from a Gaussian-ish distribution.
-NORMAL_STDDEV_TOLERANCE = 1.5
-# The DROP_TOLERANCE is what absolute multiple-of-the-mean drop (e.g. 0.05 means 5%) is
-# assumed to be okay. For each metric we use the more permissive of the two tolerances
-# on the more permissive of the two mean values. All four must be outside of tolerance
-# for us to count a failure.
-NORMAL_DROP_TOLERANCE = 0.07
-# A microbenchmark will normally have a very small stddev from run to run. That means
-# it's actually *less* tolerant of noise on the host, since "twice the stddev" is a
-# significantly smaller absolute value.
-MICRO_STDDEV_TOLERANCE = 2.0
-# A microbenchmark will routinely show persistent speed changes of much larger magnitude
-# than a larger, more varied benchmark. For example, setivar is surprisingly prone to
-# large swings in time taken depending on tiny changes to memory layout. So the drop
-# tolerance needs to be significantly larger to avoid frequent false positives.
-MICRO_DROP_TOLERANCE = 0.20
-
 BUILT_REPORTS_ROOT = YJITMetrics::ContinuousReporting::BUILT_REPORTS_ROOT
 RAW_BENCHMARK_ROOT = YJITMetrics::ContinuousReporting::RAW_BENCHMARK_ROOT
 
@@ -91,11 +63,6 @@ def args_for_bench_type(bench_type_arg)
 
 benchmark_args = nil
 bench_params_file = nil
-should_file_gh_issue = true
-no_perf_tripwires = false
-all_perf_tripwires = false
-single_perf_tripwire = nil
-is_verbose = false
 data_dir = nil
 full_rebuild = nil
 
@@ -110,33 +77,6 @@ def args_for_bench_type(bench_type_arg)
       benchmark_args = args_for_bench_type(btype)
     end
 
-    # NOTE: GH issue filing is currently disabled
-    opts.on("-g", "--no-gh-issue", "Do not file an actual GitHub issue, only print failures to console") do
-        should_file_gh_issue = false
-    end
-
-    opts.on("-a", "--all-perf-tripwires", "Check performance tripwires on all pairs of benchmarks (implies --no-gh-issue)") do
-        if no_perf_tripwires || single_perf_tripwire
-            raise "Error! Please do not specify --all-perf-tripwires along with --no-perf-tripwires or --single-perf-tripwire!"
-        end
-        all_perf_tripwires = true
-        should_file_gh_issue = false
-    end
-
-    opts.on("-t TS", "--perf-timestamp TIMESTAMP", "Check performance tripwire at this specific timestamp") do |ts|
-        if all_perf_tripwires || no_perf_tripwires
-            raise "Error! Please do not specify a specific perf tripwire along with --no-perf-tripwires or --all-perf-tripwires!"
-        end
-        single_perf_tripwire = ts.strip
-    end
-
-    opts.on("-np", "--no-perf-tripwires", "Do not check performance tripwires") do
-        if all_perf_tripwires || single_perf_tripwire
-            raise "Error! Please do not specify --no-perf-tripwires along with --single-perf-tripwire or --all-perf-tripwires!"
-        end
-        no_perf_tripwires = true
-    end
-
     opts.on("-fr YN", "--full-rebuild YN") do |fr|
         if fr.nil? || fr.strip == ""
             full_rebuild = true
@@ -154,51 +94,17 @@ def args_for_bench_type(bench_type_arg)
         raise("No such directory: #{ddir.inspect}!") unless File.directory?(ddir)
         data_dir = ddir
     end
-
-    opts.on("-v", "--verbose", "Print verbose output about tripwire checks") do
-        is_verbose = true
-    end
 end.parse!
 
 bench_params_data = bench_params_file ? JSON.parse(File.read bench_params_file) : {}
 
 BENCHMARK_ARGS = benchmark_args || (bench_params_data["bench_type"] ? args_for_bench_type(bench_params_data["bench_type"]) : BENCH_TYPES["default"])
 FULL_REBUILD = !full_rebuild.nil? ? full_rebuild : (bench_params_data["full_rebuild"] || false)
-FILE_GH_ISSUE = should_file_gh_issue
-ALL_PERF_TRIPWIRES = all_perf_tripwires
-SINGLE_PERF_TRIPWIRE = single_perf_tripwire
-VERBOSE = is_verbose
 BENCH_PARAMS_FILE = bench_params_file
 DATA_DIR = File.expand_path(data_dir || bench_params_data["data_directory"] || "continuous_reporting/data")
 
 PIDFILE = "/home/ubuntu/benchmark_ci.pid"
 
-GITHUB_TOKEN=ENV["BENCHMARK_CI_GITHUB_TOKEN"]
-if FILE_GH_ISSUE && !GITHUB_TOKEN
-    raise "Set BENCHMARK_CI_GITHUB_TOKEN to an appropriate GitHub token to allow auto-filing issues! (or set --no-gh-issue)"
-end
-
-def ghapi_post(api_uri, params, verb: :post)
-    uri = URI("https://api.github.com" + api_uri)
-
-    req = Net::HTTP::Post.new(uri)
-    #req.basic_auth GITHUB_USER, GITHUB_TOKEN
-    req['Accept'] = "application/vnd.github.v3+json"
-    req['Content-Type'] = "application/json"
-    req['Authorization'] = "Bearer #{GITHUB_TOKEN}"
-    req.body = JSON.dump(params)
-    result = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) { |http| http.request(req) }
-
-    unless result.is_a?(Net::HTTPSuccess)
-        $stderr.puts "Error in HTTP #{verb.upcase}: #{result.inspect}"
-        $stderr.puts result.body
-        $stderr.puts "------"
-        raise "HTTP error when posting to #{api_uri}!"
-    end
-
-    JSON.load(result.body)
-end
-
 class BenchmarkDetails
     def initialize(timestamp)
         @timestamp = timestamp
@@ -227,32 +133,6 @@ def escape_markdown(s)
     s.gsub(/(\*|\_|\`)/) { '\\' + $1 }.gsub("<", "&lt;")
 end
 
-def file_gh_issue(title, message)
-    host = `uname -a`.chomp
-    issue_body = <<~ISSUE
-        Error running benchmark CI job on #{host}:
-
-        #{message}
-    ISSUE
-
-    unless FILE_GH_ISSUE
-        print "We would file a GitHub issue, but we were asked not to. Details:\n\n"
-
-        print "==============================\n"
-        print "Title: CI Benchmarking: #{title}\n"
-        puts issue_body
-        print "==============================\n"
-        return
-    end
-
-    # Note: if you're set up as the GitHub user, it's not gonna email you since it thinks you filed it yourself.
-    ghapi_post "/repos/Shopify/yjit-metrics/issues",
-        {
-            "title" => "CI Benchmarking: #{title}",
-            "body" => issue_body
-        }
-end
-
 if File.exist?(PIDFILE)
     pid = File.read(PIDFILE).to_i
     if pid && pid > 0
@@ -280,10 +160,6 @@ def run_benchmarks
     end
 end
 
-def ts_from_tripwire_filename(filename)
-    filename.split("blog_speed_details_")[1].split(".")[0]
-end
-
 def timestr_from_ts(ts)
     if ts =~ /\A(\d{4}-\d{2}-\d{2})-(\d{6})\Z/
         year_month_day = $1
@@ -297,154 +173,6 @@ def timestr_from_ts(ts)
     end
 end
 
-# If benchmark results drop noticeably, file a Github issue
-def check_perf_tripwires
-    Dir.chdir(YM_REPORT_DIR) do
-        # Grab only non-platform-specific tripwire files that do *not* have a platform name in them,
-        # but instead end in a six-digit timestamp.
-        tripwire_files = Dir["*.tripwires.json"].to_a.select {|f| f =~ /\d{6}\.tripwires\.json\Z/}.sort
-
-        if ALL_PERF_TRIPWIRES
-            (tripwire_files.size - 1).times do |index|
-                check_one_perf_tripwire(tripwire_files[index], tripwire_files[index - 1])
-            end
-        elsif SINGLE_PERF_TRIPWIRE
-            specified_file = tripwire_files.detect { |f| f.include?(SINGLE_PERF_TRIPWIRE) }
-            raise "Couldn't find perf tripwire report containing #{SINGLE_PERF_TRIPWIRE.inspect}!" unless specified_file
-
-            specified_index = tripwire_files.index(specified_file)
-            raise "Can't check perf on the very first report!" if specified_index == 0
-
-            check_one_perf_tripwire(tripwire_files[specified_index], tripwire_files[specified_index - 1])
-        else
-            check_one_perf_tripwire(tripwire_files[-1], tripwire_files[-2])
-        end
-    end
-end
-
-def check_one_perf_tripwire(current_filename, compared_filename, verbose: VERBOSE)
-    YJITMetrics::PLATFORMS.each do |platform|
-        platform_current_filename = current_filename.gsub(".tripwires.json", ".#{platform}.tripwires.json")
-    end
-
-end
-
-def check_one_perf_tripwire_file(current_filename, compared_filename, verbose: VERBOSE)
-    current_data = JSON.parse File.read(current_filename)
-    compared_data = JSON.parse File.read(compared_filename)
-
-    check_failures = []
-
-    compared_data.each do |bench_name, values|
-        # Only compare if both sets of data have the benchmark
-        next unless current_data[bench_name]
-
-        current_mean = current_data[bench_name]["mean"]
-        current_rsd_pct = current_data[bench_name]["rsd_pct"]
-        compared_mean = values["mean"]
-        compared_rsd_pct = values["rsd_pct"]
-
-        current_stddev = (current_rsd_pct.to_f / 100.0) * current_mean
-        compared_stddev = (compared_rsd_pct.to_f / 100.0) * compared_mean
-
-        # Normally is_micro should be the same in all cases for any one specific benchmark.
-        # So we just assume the most recent data has the correct value.
-        is_micro = current_data[bench_name]["micro"]
-
-        if is_micro
-            bench_stddev_tolerance = MICRO_STDDEV_TOLERANCE
-            bench_drop_tolerance = MICRO_DROP_TOLERANCE
-        else
-            bench_stddev_tolerance = NORMAL_STDDEV_TOLERANCE
-            bench_drop_tolerance = NORMAL_DROP_TOLERANCE
-        end
-
-        # Occasionally stddev can change pretty wildly from run to run. Take the most tolerant of multiple-of-recent-stddev,
-        # or a percentage of the larger mean runtime. Basically, a drop must be unusual enough (stddev) and large enough (mean)
-        # for us to flag it.
-        tolerance = [ current_stddev * bench_stddev_tolerance, compared_stddev * bench_stddev_tolerance,
-            current_mean * bench_drop_tolerance, compared_mean * bench_drop_tolerance ].max
-
-        drop = current_mean - compared_mean
-
-        if verbose
-            puts "#{is_micro ? "Microbenchmark" : "Benchmark"} #{bench_name}, tolerance is #{ "%.2f" % tolerance }, latest mean is #{ "%.2f" % current_mean } (stddev #{"%.2f" % current_stddev}), " +
-                "next-latest mean is #{ "%.2f" % compared_mean } (stddev #{ "%.2f" % compared_stddev}), drop is #{ "%.2f" % drop }..."
-        end
-
-        if drop > tolerance
-            puts "Benchmark #{bench_name} marked as failure!" if verbose
-            check_failures.push({
-                benchmark: bench_name,
-                current_mean: current_mean,
-                second_current_mean: compared_mean,
-                current_stddev: current_stddev,
-                current_rsd_pct: current_rsd_pct,
-                second_current_stddev: compared_stddev,
-                second_current_rsd_pct: compared_rsd_pct,
-            })
-        end
-    end
-
-    if check_failures.empty?
-      puts "No benchmarks failing performance tripwire (#{current_filename})"
-      return
-    end
-
-    puts "Failing benchmarks (#{current_filename}): #{check_failures.map { |h| h[:benchmark] }}"
-    file_perf_bug(current_filename, compared_filename, check_failures)
-end
-
-def file_perf_bug(current_filename, compared_filename, check_failures)
-    ts_latest = ts_from_tripwire_filename(current_filename)
-    ts_penultimate = ts_from_tripwire_filename(compared_filename)
-
-    # This expects to be called from the _includes/reports directory
-    latest_yjit_results = BenchmarkDetails.new(ts_latest)
-    latest_yjit_result_file = latest_yjit_results.yjit_test_result
-    penultimate_yjit_results = BenchmarkDetails.new(ts_penultimate)
-    penultimate_yjit_result_file = penultimate_yjit_results.yjit_test_result
-    latest_yjit_data = JSON.parse File.read(latest_yjit_result_file)
-    penultimate_yjit_data = JSON.parse File.read(penultimate_yjit_result_file)
-    latest_yjit_ruby_desc = latest_yjit_data["ruby_metadata"]["RUBY_DESCRIPTION"]
-    penultimate_yjit_ruby_desc = penultimate_yjit_data["ruby_metadata"]["RUBY_DESCRIPTION"]
-
-    failing_benchmarks = check_failures.map { |h| h[:benchmark] }
-
-    puts "Filing Github issue - slower benchmark(s) found."
-    body = <<~BODY_TOP
-    Latest failing benchmark:
-
-    * Time: #{timestr_from_ts(ts_latest)}
-    * Ruby: #{escape_markdown latest_yjit_ruby_desc}
-    * [Raw YJIT prod data](#{latest_yjit_results.yjit_permalink})
-
-    Compared to previous benchmark:
-
-    * Time: #{timestr_from_ts(ts_penultimate)}
-    * Ruby: #{escape_markdown penultimate_yjit_ruby_desc}
-    * [Raw YJIT prod data](#{penultimate_yjit_results.yjit_permalink})
-
-    Slower benchmarks: #{failing_benchmarks.join(", ")}
-
-    [Timeline Graph](https://speed.yjit.org/timeline-deep##{failing_benchmarks.join("+")})
-
-    Slowdown details:
-
-    BODY_TOP
-
-    check_failures.each do |bench_hash|
-        # Indentation with here-docs is hard - use the old-style with extremely literal whitespace handling.
-        body += <<ONE_BENCH_REPORT
-* #{bench_hash[:benchmark]}:
-    * Speed before: #{"%.2f" % bench_hash[:second_current_mean]} +/- #{"%.1f" % bench_hash[:second_current_rsd_pct]}%
-    * Speed after: #{"%.2f" % bench_hash[:current_mean]} +/- #{"%.1f" % bench_hash[:current_rsd_pct]}%
-ONE_BENCH_REPORT
-    end
-
-    file_gh_issue("Benchmark at #{ts_latest} is significantly slower than the one before (#{ts_penultimate})!", body)
-end
-
 begin
     run_benchmarks
 rescue
diff --git a/continuous_reporting/generate_and_upload_reports.rb b/continuous_reporting/generate_and_upload_reports.rb
index df85b2088..6d8aa486e 100755
--- a/continuous_reporting/generate_and_upload_reports.rb
+++ b/continuous_reporting/generate_and_upload_reports.rb
@@ -36,11 +36,11 @@
     },
     "blog_speed_details" => {
         report_type: :basic_report,
-        extensions: [ "html", "svg", "head.svg", "back.svg", "micro.svg", "tripwires.json", "csv" ],
+        extensions: [ "html", "svg", "head.svg", "back.svg", "micro.svg", "csv" ],
     },
     "blog_memory_details" => {
         report_type: :basic_report,
-        extensions: [ "html", "svg", "head.svg", "back.svg", "micro.svg", "tripwires.json", "csv" ],
+        extensions: [ "html", "svg", "head.svg", "back.svg", "micro.svg", "csv" ],
     },
     "blog_yjit_stats" => {
         report_type: :basic_report,
diff --git a/continuous_reporting/gh_tasks/run_benchmarks.sh b/continuous_reporting/gh_tasks/run_benchmarks.sh
index b09e45b9d..c1981fdd3 100755
--- a/continuous_reporting/gh_tasks/run_benchmarks.sh
+++ b/continuous_reporting/gh_tasks/run_benchmarks.sh
@@ -6,6 +6,6 @@ cd ~/ym/yjit-metrics
 
 bundle
 
-ruby continuous_reporting/benchmark_and_update.rb --no-gh-issue --no-perf-tripwires --bench-params=$BENCH_PARAMS
+ruby continuous_reporting/benchmark_and_update.rb --no-gh-issue --bench-params=$BENCH_PARAMS
 
 echo "Completed benchmarking successfully."
diff --git a/lib/yjit_metrics/reports/memory_details_report.rb b/lib/yjit_metrics/reports/memory_details_report.rb
index 143d2b46a..53c1e1d24 100644
--- a/lib/yjit_metrics/reports/memory_details_report.rb
+++ b/lib/yjit_metrics/reports/memory_details_report.rb
@@ -12,7 +12,7 @@ class MemoryDetailsReport < SpeedDetailsReport
     #end
 
     def self.report_extensions
-      [ "html", "svg", "head.svg", "back.svg", "micro.svg", "tripwires.json", "csv" ]
+      [ "html", "svg", "head.svg", "back.svg", "micro.svg", "csv" ]
     end
 
     def initialize(config_names, results, platform:, benchmarks: [])
@@ -82,10 +82,5 @@ def html_template_path
     def relative_values_by_config_and_benchmark
       @peak_mb_relative_by_config
     end
-
-    # FIXME: We aren't reporting on the tripwires currently, but it makes sense to implement it and report on it.
-    def tripwires
-      {}
-    end
   end
 end
diff --git a/lib/yjit_metrics/reports/speed_details_report.rb b/lib/yjit_metrics/reports/speed_details_report.rb
index 5e3297e3d..d23ac2f5c 100644
--- a/lib/yjit_metrics/reports/speed_details_report.rb
+++ b/lib/yjit_metrics/reports/speed_details_report.rb
@@ -14,7 +14,7 @@ class SpeedDetailsReport < BloggableSingleReport
     #end
 
     def self.report_extensions
-      [ "html", "svg", "head.svg", "back.svg", "micro.svg", "tripwires.json", "csv" ]
+      [ "html", "svg", "head.svg", "back.svg", "micro.svg", "csv" ]
     end
 
     def initialize(orig_config_names, results, platform:, benchmarks: [])
@@ -426,19 +426,6 @@ def svg_object(relative_values_by_config_and_benchmark, benchmarks: @benchmark_n
       svg
     end
 
-    def tripwires
-      tripwires = {}
-      micro = micro_benchmarks
-      @benchmark_names.each_with_index do |bench_name, idx|
-        tripwires[bench_name] = {
-          mean: @mean_by_config[@with_yjit_config][idx],
-          rsd_pct: @rsd_pct_by_config[@with_yjit_config][idx],
-          micro: micro.include?(bench_name),
-        }
-      end
-      tripwires
-    end
-
     def html_template_path
       File.expand_path("../report_templates/blog_speed_details.html.erb", __dir__)
     end
@@ -491,10 +478,6 @@ def write_file(filename)
       html_output = script_template.result(binding)
       File.open(filename + ".#{@platform}.html", "w") { |f| f.write(html_output) }
 
-      # The Tripwire report is used to tell when benchmark performance drops suddenly
-      json_data = tripwires
-      File.open(filename + ".#{@platform}.tripwires.json", "w") { |f| f.write JSON.pretty_generate json_data }
-
       write_to_csv(filename + ".#{@platform}.csv", [@headings] + report_table_data)
     end
   end

From 54feedeb5182fefe38bea76f0fd57ab7da6838e3 Mon Sep 17 00:00:00 2001
From: Randy Stauner <randy.stauner@shopify.com>
Date: Mon, 13 Jan 2025 09:55:30 -0700
Subject: [PATCH 2/4] Remove unused var

This was a leftover from a previous edit of the tripwire code.
---
 continuous_reporting/benchmark_and_update.rb | 1 -
 1 file changed, 1 deletion(-)

diff --git a/continuous_reporting/benchmark_and_update.rb b/continuous_reporting/benchmark_and_update.rb
index d17b7a70a..09ab6fda5 100755
--- a/continuous_reporting/benchmark_and_update.rb
+++ b/continuous_reporting/benchmark_and_update.rb
@@ -176,7 +176,6 @@ def timestr_from_ts(ts)
 begin
     run_benchmarks
 rescue
-    host = `uname -a`.chomp
     puts $!.full_message
     raise "Exception in CI benchmarks: #{$!.message}!"
 end

From 25ab7f5c138e78c556c089fdae61bdbd0875bc94 Mon Sep 17 00:00:00 2001
From: Randy Stauner <randy.stauner@shopify.com>
Date: Mon, 13 Jan 2025 15:00:54 -0700
Subject: [PATCH 3/4] Remove more unused code

---
 .../generate_and_upload_reports.rb               | 16 ----------------
 timeline_report.rb                               |  7 -------
 2 files changed, 23 deletions(-)

diff --git a/continuous_reporting/generate_and_upload_reports.rb b/continuous_reporting/generate_and_upload_reports.rb
index 6d8aa486e..c1d030793 100755
--- a/continuous_reporting/generate_and_upload_reports.rb
+++ b/continuous_reporting/generate_and_upload_reports.rb
@@ -168,7 +168,6 @@ def ruby_version_from_metadata(metadata)
         report_name_in_file = $1
         raise "Non-matching report name with filename, #{report_name.inspect} vs #{report_name_in_file.inspect}!" unless report_name == report_name_in_file
         ts = $2
-        ext = $3
 
         report_timestamps[ts] ||= {}
         report_timestamps[ts][report_name] ||= []
@@ -333,21 +332,6 @@ def ruby_version_from_metadata(metadata)
 
 puts "Static site seems to build correctly. That means that GHPages should do the right thing on push."
 
-# Benchmark raw data was already committed. Built reports are now locally cached, not pushed.
-#dirs_to_commit = [ "_benchmarks", "_includes", "reports" ]
-### Commit built reports if there is something to commit
-#diffs = (YJITMetrics.check_output "git status --porcelain #{dirs_to_commit.join(" ")}").chomp
-#if diffs == ""
-#    puts "No changes found. Not committing or pushing."
-#elsif no_push
-#    puts "Changes found, but --no-push was specified. Not committing or pushing."
-#else
-#    puts "Changes found. Committing and pushing."
-#    YJITMetrics.check_call "git add #{dirs_to_commit.join(" ")}"
-#    YJITMetrics.check_call 'git commit -m "Update reports via continuous_reporting.rb script"'
-#    YJITMetrics.check_call "git push"
-#end
-
 # Copy built _site directory into raw pages repo as a new single commit, to branch new_pages
 Dir.chdir GHPAGES_REPO
 puts "Switched to #{Dir.pwd}"
diff --git a/timeline_report.rb b/timeline_report.rb
index 358fbb0da..63660ddb5 100755
--- a/timeline_report.rb
+++ b/timeline_report.rb
@@ -83,16 +83,12 @@ def ruby_desc_to_sha(ruby_desc)
     exit -1
 end
 
-latest_ts = relevant_results.map { |_, _, timestamp, _, _| timestamp }.max
 puts "Loading #{relevant_results.size} data files..."
 
 result_set_by_ts = {}
-filepaths_by_ts = {}
 ruby_desc_by_config_and_ts = {}
 relevant_results.each do |filepath, config_name, timestamp, run_num, platform|
     benchmark_data = JSON.load(File.read(filepath))
-    filepaths_by_ts[timestamp] ||= []
-    filepaths_by_ts[timestamp].push filepath  # Is this used? I'm not sure this is used.
 
     ruby_desc_by_config_and_ts[config_name] ||= {}
     ruby_desc_by_config_and_ts[config_name][timestamp] = ruby_desc_to_sha benchmark_data["ruby_metadata"]["RUBY_DESCRIPTION"]
@@ -109,9 +105,6 @@ def ruby_desc_to_sha(ruby_desc)
 configs = relevant_results.map { |_, config_name, _, _, _| config_name }.uniq.sort
 all_timestamps = result_set_by_ts.keys.sort
 
-# This should match the JS parser in the template files
-TIME_FORMAT = "%Y %m %d %H %M %S"
-
 # Grab a statistical summary of every timestamp, config and benchmark
 summary_by_ts = {}
 all_benchmarks = []

From aa876ec56b29bbc65582ed77eccac23065873c18 Mon Sep 17 00:00:00 2001
From: Randy Stauner <randy.stauner@shopify.com>
Date: Mon, 13 Jan 2025 15:02:29 -0700
Subject: [PATCH 4/4] Remove unnecessary nearly duplicate regexp

---
 timeline_report.rb | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/timeline_report.rb b/timeline_report.rb
index 63660ddb5..06e469776 100755
--- a/timeline_report.rb
+++ b/timeline_report.rb
@@ -41,7 +41,6 @@
 output_dir = File.expand_path(output_dir)
 
 DATASET_FILENAME_RE = /^(\d{4}-\d{2}-\d{2}-\d{6})_basic_benchmark_(\d{4}_)?(.*).json$/
-DATASET_FILEPATH_RE = /^(.*\/)?(\d{4}-\d{2}-\d{2}-\d{6})_basic_benchmark_(\d{4}_)?(.*).json$/
 # Return the information from the file path - run_num is nil if the file isn't in multi-run format
 def parse_dataset_filepath(filepath)
     filename = filepath.split("/")[-1]
@@ -75,12 +74,12 @@ def ruby_desc_to_sha(ruby_desc)
 
 Dir.chdir(data_dir)
 
-files_in_dir = Dir["**/*"].grep(DATASET_FILEPATH_RE)
+files_in_dir = Dir["**/*_basic_benchmark_*.json"].select { |x| File.basename(x) =~ DATASET_FILENAME_RE }
 relevant_results = files_in_dir.map { |filepath| parse_dataset_filepath(filepath) }
 
 if relevant_results.size == 0
     puts "No relevant data files found for directory #{data_dir.inspect} and specified arguments!"
-    exit -1
+    exit(-1)
 end
 
 puts "Loading #{relevant_results.size} data files..."