diff --git a/json-urls-root.js b/json-urls-root.js new file mode 100644 index 00000000000..cd82a00b88e --- /dev/null +++ b/json-urls-root.js @@ -0,0 +1,2 @@ +// Base URL for benchmark_output JSON files +const BENCHMARK_OUTPUT_BASE_URL = "benchmark_output"; diff --git a/json-urls.js b/json-urls.js new file mode 100644 index 00000000000..4d6194f4460 --- /dev/null +++ b/json-urls.js @@ -0,0 +1,58 @@ +//////////////////////////////////////////////////////////// +// Helper functions for getting URLs of JSON files + +function runManifestJsonUrl(release) { + return `${BENCHMARK_OUTPUT_BASE_URL}/releases/${release}/run_manifest.json`; +} + +function summaryJsonUrl(release) { + return `${BENCHMARK_OUTPUT_BASE_URL}/releases/${release}/summary.json`; +} + +function runSpecsJsonUrl(release) { + return `${BENCHMARK_OUTPUT_BASE_URL}/releases/${release}/run_specs.json`; +} + +function groupsMetadataJsonUrl(release) { + return `${BENCHMARK_OUTPUT_BASE_URL}/releases/${release}/groups_metadata.json`; +} + +function groupsJsonUrl(release) { + return `${BENCHMARK_OUTPUT_BASE_URL}/releases/${release}/groups.json`; +} + +function groupJsonUrl(release, groupName) { + return `${BENCHMARK_OUTPUT_BASE_URL}/releases/${release}/groups/${groupName}.json`; +} + +function runSpecJsonUrl(suite, runSpecName) { + return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/${runSpecName}/run_spec.json` +} + +function scenarioJsonUrl(suite, runSpecName) { + return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/${runSpecName}/scenario.json`; +} + +function scenarioStateJsonUrl(suite, runSpecName) { + return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/${runSpecName}/scenario_state.json`; +} + +function statsJsonUrl(suite, runSpecName) { + return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/${runSpecName}/stats.json`; +} + +function instancesJsonUrl(suite, runSpecName) { + return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/${runSpecName}/instances.json`; +} + +function predictionsJsonUrl(suite, runSpecName) { + return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/${runSpecName}/display_predictions.json`; +} + +function requestsJsonUrl(suite, runSpecName) { + return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/${runSpecName}/display_requests.json`; +} + +function plotUrl(suite, plotName) { + return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/plots/${plotName}.png`; +} diff --git a/src/helm/benchmark/presentation/summarize.py b/src/helm/benchmark/presentation/summarize.py index eaa4ea745e3..1e9ef24b5b3 100644 --- a/src/helm/benchmark/presentation/summarize.py +++ b/src/helm/benchmark/presentation/summarize.py @@ -66,10 +66,19 @@ class ExecutiveSummary: """ Summary of the output of benchmarking. - This is always loaded by the frontend, so keep this small + This is always loaded by the frontend, so keep this small. + + A note on the relation between `release`, `suites`, and `suite`: + There are two modes for releasing runs: `release` and `suite`. + `releases` contain a package of suites. When the `release` mode + is used, `release` and `suites` will not be None and `suite`will be None. + When `suite` mode is used, `suite` will not be None and `release` + and `suites` will be None """ - suite: str + release: Optional[str] + suites: Optional[List[str]] + suite: Optional[str] date: str # TODO: later, put model rankings, etc. here @@ -244,12 +253,44 @@ class Summarizer: "selective_acc@10", } - def __init__(self, suite: str, output_path: str, verbose: bool, num_threads: int): - self.suite: str = suite - self.run_suite_path: str = os.path.join(output_path, "runs", suite) + def __init__( + self, + release: Optional[str], + suites: Optional[List[str]], + suite: Optional[str], + output_path: str, + verbose: bool, + num_threads: int, + ): + """ + A note on the relation between `release`, `suites`, and `suite`: + There are two modes for releasing runs: `release` and `suite`. + `releases` contain a package of suites. When the `release` mode + is used, `release` and `suites` will not be None and `suite`will be None. + When `suite` mode is used, `suite` will not be None and `release` + and `suites` will be None + """ + self.output_path: str = output_path + self.run_release_path: str + self.suites: List[str] + self.run_suite_paths: List[str] + self.suite: Optional[str] = None + self.release: Optional[str] = None + if suite: + self.suite = suite + self.run_release_path = os.path.join(output_path, "runs", suite) + self.run_suite_paths = [self.run_release_path] + self.suites = [suite] + elif release and suites: + self.release = release + self.suites = suites + self.run_release_path = os.path.join(output_path, "releases", release) + self.run_suite_paths = [os.path.join(output_path, "runs", suite) for suite in suites] self.verbose: bool = verbose self.num_threads: int = num_threads + ensure_directory_exists(self.run_release_path) + self.schema = read_schema() self.contamination = read_contamination() validate_contamination(self.contamination, self.schema) @@ -297,36 +338,48 @@ def filter_runs_by_visibility(self, runs: List[Run], group: RunGroup) -> List[Ru filtered_runs.append(run) return filtered_runs - def read_runs(self): + def read_runs_for_suite(self, suite, run_suite_path): """Load the runs in the run suite path.""" - self.runs: List[Run] = [] # run_suite_path can contain subdirectories that are not runs (e.g. eval_cache, groups) # so filter them out. - run_dir_names = sorted([p for p in os.listdir(self.run_suite_path) if p != "eval_cache" and p != "groups"]) + run_dir_names = sorted([p for p in os.listdir(run_suite_path) if p != "eval_cache" and p != "groups"]) for run_dir_name in tqdm(run_dir_names, disable=None): - run_spec_path: str = os.path.join(self.run_suite_path, run_dir_name, "run_spec.json") - stats_path: str = os.path.join(self.run_suite_path, run_dir_name, "stats.json") + run_spec_path: str = os.path.join(run_suite_path, run_dir_name, "run_spec.json") + stats_path: str = os.path.join(run_suite_path, run_dir_name, "stats.json") if not os.path.exists(run_spec_path) or not os.path.exists(stats_path): hlog(f"WARNING: {run_dir_name} doesn't have run_spec.json or stats.json, skipping") continue - run_path: str = os.path.join(self.run_suite_path, run_dir_name) + run_path: str = os.path.join(run_suite_path, run_dir_name) self.runs.append(self.read_run(run_path)) # For each group (e.g., natural_qa), map # (i) scenario spec (e.g., subject=philosophy) [optional] and # (ii) adapter spec (e.g., model = openai/davinci) # to list of runs - self.group_adapter_to_runs: Dict[str, Dict[AdapterSpec, List[Run]]] = defaultdict(lambda: defaultdict(list)) - self.group_scenario_adapter_to_runs: Dict[str, Dict[ScenarioSpec, Dict[AdapterSpec, List[Run]]]] = defaultdict( - lambda: defaultdict(lambda: defaultdict(list)) - ) for run in self.runs: + if run.run_spec.name in self.runs_to_run_suites: + hlog( + f"WARNING: Run entry {run.run_spec.name} is present in two different Run Suites. " + f"Defaulting to the latest assigned suite: {suite}" + ) + self.runs_to_run_suites[run.run_spec.name] = suite + scenario_spec = run.run_spec.scenario_spec adapter_spec = run.run_spec.adapter_spec for group_name in run.run_spec.groups: self.group_adapter_to_runs[group_name][adapter_spec].append(run) self.group_scenario_adapter_to_runs[group_name][scenario_spec][adapter_spec].append(run) + def read_runs(self): + self.runs: List[Run] = [] + self.runs_to_run_suites: Dict[str, str] = {} + self.group_adapter_to_runs: Dict[str, Dict[AdapterSpec, List[Run]]] = defaultdict(lambda: defaultdict(list)) + self.group_scenario_adapter_to_runs: Dict[str, Dict[ScenarioSpec, Dict[AdapterSpec, List[Run]]]] = defaultdict( + lambda: defaultdict(lambda: defaultdict(list)) + ) + for suite, run_suite_path in zip(self.suites, self.run_suite_paths): + self.read_runs_for_suite(suite, run_suite_path) + def read_overlap_stats(self): """ Load the overlap stats in the run suite path. @@ -391,7 +444,7 @@ def get_stats_file_metadata(data_overlap_dir: str) -> Dict[str, List[str]]: self._model_group_overlap_stats: Dict[Tuple[str, str], GroupOverlapStats] = {} - data_overlap_dir = os.path.join(self.run_suite_path, "data_overlap") + data_overlap_dir = os.path.join(self.run_release_path, "data_overlap") if not os.path.isdir(data_overlap_dir): hlog(f"Directory {data_overlap_dir} not found; skipped import of overlap results.") return @@ -481,11 +534,14 @@ def write_executive_summary(self): date = datetime.date.today().strftime("%Y-%m-%d") summary = ExecutiveSummary( + release=self.release, + suites=self.suites, suite=self.suite, date=date, ) + write( - os.path.join(self.run_suite_path, "summary.json"), + os.path.join(self.run_release_path, "summary.json"), json.dumps(asdict_without_nones(summary), indent=2), ) @@ -507,22 +563,28 @@ def write_cost_report(self): costs["total_tokens"] = costs["num_prompt_tokens"] + costs["num_completion_tokens"] write( - os.path.join(self.run_suite_path, "costs.json"), + os.path.join(self.run_release_path, "costs.json"), json.dumps(models_to_costs, indent=2), ) def write_runs(self): write( - os.path.join(self.run_suite_path, "runs.json"), + os.path.join(self.run_release_path, "runs.json"), json.dumps(list(map(asdict_without_nones, self.runs)), indent=2), ) def write_run_specs(self): write( - os.path.join(self.run_suite_path, "run_specs.json"), + os.path.join(self.run_release_path, "run_specs.json"), json.dumps(list(map(asdict_without_nones, [run.run_spec for run in self.runs])), indent=2), ) + def write_runs_to_run_suites(self): + write( + os.path.join(self.run_release_path, "runs_to_run_suites.json"), + json.dumps(self.runs_to_run_suites, indent=2), + ) + def expand_subgroups(self, group: RunGroup) -> List[RunGroup]: """Given a RunGroup, collect a list of its subgroups by traversing the subgroup tree.""" @@ -1048,18 +1110,18 @@ def write_groups(self): # Write out index file with all the groups and basic stats write( - os.path.join(self.run_suite_path, "groups.json"), + os.path.join(self.run_release_path, "groups.json"), json.dumps(list(map(asdict_without_nones, self.create_index_tables())), indent=2), ) # Write out metadata file for all groups write( - os.path.join(self.run_suite_path, "groups_metadata.json"), + os.path.join(self.run_release_path, "groups_metadata.json"), json.dumps(self.create_groups_metadata(), indent=2), ) # Write out a separate JSON for each group - groups_path = os.path.join(self.run_suite_path, "groups") + groups_path = os.path.join(self.run_release_path, "groups") ensure_directory_exists(groups_path) for group in self.schema.run_groups: if group.subgroup_display_mode == BY_GROUP or len(self.expand_subgroups(group)) == 1: @@ -1114,7 +1176,7 @@ def read_scenario_spec_instance_ids(self, num_instances) -> None: """ self.scenario_spec_instance_id_dict: Dict[ScenarioSpec, List[str]] = dict() - data_overlap_dir = os.path.join(self.run_suite_path, "data_overlap") + data_overlap_dir = os.path.join(self.run_release_path, "data_overlap") if not os.path.isdir(data_overlap_dir): hlog(f"Directory {data_overlap_dir} not found; skipped producing instance ids file.") return @@ -1163,18 +1225,16 @@ def write_scenario_spec_instance_ids_json(self, file_path) -> None: for scenario_spec_instance_ids in all_scenario_spec_instance_ids ) - -def symlink_latest(output_path: str, suite: str) -> None: - # Create a symlink runs/latest -> runs/, - # so runs/latest always points to the latest run suite. - runs_dir: str = os.path.join(output_path, "runs") - suite_dir: str = os.path.join(runs_dir, suite) - symlink_path: str = os.path.abspath(os.path.join(runs_dir, LATEST_SYMLINK)) - hlog(f"Symlinking {suite_dir} to {LATEST_SYMLINK}.") - if os.path.islink(symlink_path): - # Remove the previous symlink if it exists. - os.unlink(symlink_path) - os.symlink(os.path.abspath(suite_dir), symlink_path) + def symlink_latest(self) -> None: + # Create a symlink runs/latest -> runs/, + # so runs/latest always points to the latest run suite. + releases_dir: str = os.path.dirname(self.run_release_path) + symlink_path: str = os.path.abspath(os.path.join(releases_dir, LATEST_SYMLINK)) + hlog(f"Symlinking {self.run_release_path} to {LATEST_SYMLINK}.") + if os.path.islink(symlink_path): + # Remove the previous symlink if it exists. + os.unlink(symlink_path) + os.symlink(os.path.abspath(self.run_release_path), symlink_path) @htrack(None) @@ -1186,8 +1246,15 @@ def main(): parser.add_argument( "--suite", type=str, - help="Name of the suite this run belongs to (default is today's date).", - required=True, + help="Name of the suite this summarization should go under.", + ) + parser.add_argument( + "--release", + type=str, + help="Experimental: Name of the release this summarization should go under.", + ) + parser.add_argument( + "--suites", type=str, nargs="+", help="Experimental: List of suites to summarize for this this release." ) parser.add_argument("-n", "--num-threads", type=int, help="Max number of threads used to summarize", default=8) parser.add_argument( @@ -1208,9 +1275,35 @@ def main(): ) args = parser.parse_args() + release: Optional[str] = None + suites: Optional[str] = None + suite: Optional[str] = None + if args.suite and (args.release or args.suites): + raise ValueError("If --suite is specified, then --release and --suites must NOT be specified.") + elif args.suite: + # Comment this out while we have a trial period for the `release` method. + # hlog( + # "WARNING: The --suite flag is deprecated. Using --release and --suites is now preferred, " + # "where --release specifies the name of a release and --suites specifies several run suites " + # "to be included in that release." + # ) + suite = args.suite + elif args.release or args.suites: + if not args.release or not args.suites: + raise ValueError("If --release is specified, then --suites must also be specified and vice versa") + release = args.release + suites = args.suites + else: + raise ValueError("Exactly one of --release or --suite must be specified.") + # Output JSON files summarizing the benchmark results which will be loaded in the web interface summarizer = Summarizer( - suite=args.suite, output_path=args.output_path, verbose=args.debug, num_threads=args.num_threads + release=release, + suites=suites, + suite=suite, + output_path=args.output_path, + verbose=args.debug, + num_threads=args.num_threads, ) summarizer.read_runs() summarizer.check_metrics_defined() @@ -1228,10 +1321,11 @@ def main(): summarizer.write_executive_summary() summarizer.write_runs() summarizer.write_run_specs() + summarizer.write_runs_to_run_suites() summarizer.write_groups() summarizer.write_cost_report() - symlink_latest(args.output_path, args.suite) + summarizer.symlink_latest() hlog("Done.") diff --git a/src/helm/benchmark/server.py b/src/helm/benchmark/server.py index 4f275c1e120..a4359477088 100644 --- a/src/helm/benchmark/server.py +++ b/src/helm/benchmark/server.py @@ -12,6 +12,11 @@ app = Bottle() +@app.get("/benchmark_output/using_release") +def serve_release(): + return {"use_release": use_release} + + @app.get("/benchmark_output/") def serve_benchmark_output(filename): response = static_file(filename, root=app.config["helm.outputpath"]) @@ -29,13 +34,17 @@ def serve_static(filename="index.html"): def main(): global service + global use_release parser = argparse.ArgumentParser() parser.add_argument("-p", "--port", type=int, help="What port to listen on", default=8000) parser.add_argument( "-o", "--output-path", type=str, help="The location of the output path", default="benchmark_output" ) + parser.add_argument("--use-release", action="store_true", help="Experimental: Serve a release rather than a suite.") args = parser.parse_args() + use_release = args.use_release + # Determine the location of the static directory. # This is a hack: it assumes that the static directory has a physical location, # which is not always the case (e.g. when using zipimport). diff --git a/src/helm/benchmark/static/benchmarking.js b/src/helm/benchmark/static/benchmarking.js index bf32dd44a3b..87e784b249c 100644 --- a/src/helm/benchmark/static/benchmarking.js +++ b/src/helm/benchmark/static/benchmarking.js @@ -44,15 +44,13 @@ class Schema { } $(function () { - const urlParams = decodeUrlParams(window.location.search); - - // Extract the name of the suite from the URL parameters. Default to "latest" if none is specified. - const suite = "suite" in urlParams ? urlParams.suite : "latest"; - console.log(`Suite: ${suite}`); - // Array of String containing RunSpec names for which // the JSON for displaying requests has been loaded. const runSpecsNamesWithLoadedRequests = []; + var version; + var runsToRunSuites = null; + const urlParams = decodeUrlParams(window.location.search); + var usingRelease; /////////////////////////////////// Pages //////////////////////////////////// @@ -136,7 +134,7 @@ $(function () { plot.append($('

').append($('', {id: title}).append(title))); plot.append(caption); - plot.append($('', {src: plotUrl(suite, name), class: "img-fluid"})); + plot.append($('', {src: plotUrl(release, name), class: "img-fluid"})); container.append(plot); tableLinks.push($('', {href: '#' + title}).append(title)); } @@ -358,7 +356,7 @@ $(function () { function renderGroupHeader() { const $output = $('
'); - $.getJSON(groupsMetadataJsonUrl(suite), {}, (response) => { + $.getJSON(groupsMetadataJsonUrl(version, usingRelease), {}, (response) => { const group = response[urlParams.group]; if (group) { let groupName = group.display_name; @@ -503,7 +501,7 @@ $(function () { return instanceKeyToDiv; } - function loadAndRenderRequests(runSpec, instanceKeyToDiv, predictedIndex) { + function loadAndRenderRequests(runSpec, suite, instanceKeyToDiv, predictedIndex) { if (runSpecsNamesWithLoadedRequests.includes(runSpec.name)) { return; } @@ -580,7 +578,7 @@ $(function () { `); - function renderPredictions(runSpec, runDisplayName, predictions, instanceKeyToDiv, $instances) { + function renderPredictions(runSpec, runSuite, runDisplayName, predictions, instanceKeyToDiv, $instances) { // Add the predictions and statistics from `scenarioState` and `perInstanceStats` to the appropriate divs for each instance. // Each instance give rises to multiple requests (whose results are in `scenarioState`): // @@ -673,12 +671,12 @@ $(function () { }); $instances.find("a.load-requests").click((event) => { $(event.target).closest('.prediction').next('.request').slideToggle(); - loadAndRenderRequests(runSpec, instanceKeyToDiv); + loadAndRenderRequests(runSpec, runSuite, instanceKeyToDiv); return false; }); } - function renderRunsDetailed(runSpecs) { + function renderRunsDetailed(runSpecs, runsToRunSuites) { // Render all the `runSpecs`: // 1. Adapter specification // 2. Instances + predictions @@ -693,16 +691,16 @@ $(function () { // Paths (parallel arrays corresponding to `runSpecs`) const statsPaths = runSpecs.map((runSpec) => { - return statsJsonUrl(suite, runSpec.name); + return statsJsonUrl(getRunSuite(usingRelease, version, runsToRunSuites, runSpec.name), runSpec.name); }); const scenarioStatePaths = runSpecs.map((runSpec) => { - return scenarioStateJsonUrl(suite, runSpec.name); + return scenarioStateJsonUrl(getRunSuite(usingRelease, version, runsToRunSuites, runSpec.name), runSpec.name); }); const runSpecPaths = runSpecs.map((runSpec) => { - return runSpecJsonUrl(suite, runSpec.name); + return runSpecJsonUrl(getRunSuite(usingRelease, version, runsToRunSuites, runSpec.name), runSpec.name); }); const predictionsPaths = runSpecs.map((runSpec) => { - return predictionsJsonUrl(suite, runSpec.name); + return predictionsJsonUrl(getRunSuite(usingRelease, version, runsToRunSuites, runSpec.name), runSpec.name); }); // Figure out short names for the runs based on where they differ @@ -786,14 +784,14 @@ $(function () { }, []); // Render scenario header - const scenarioPath = scenarioJsonUrl(suite, runSpecs[0].name); + const scenarioPath = scenarioJsonUrl(getRunSuite(usingRelease, version, runsToRunSuites, runSpecs[0].name), runSpecs[0].name); $.get(scenarioPath, {}, (scenario) => { console.log('scenario', scenario); $scenarioInfo.empty().append(renderRunsHeader(scenario, scenarioPath, runSpecs[0].scenario_spec)); }); // Render scenario instances and predictions - const instancesPath = instancesJsonUrl(suite, runSpecs[0].name); + const instancesPath = instancesJsonUrl(getRunSuite(usingRelease, version, runsToRunSuites, runSpecs[0].name), runSpecs[0].name); const instancesPromise = $.getJSON(instancesPath, {}); const predictionsPromise = getJSONList(predictionsPaths); $.when(instancesPromise, predictionsPromise).then((instancesResult, predictions) => { @@ -804,7 +802,7 @@ $(function () { const instanceKeyToDiv = renderScenarioInstances(instances, $instances); // For each run / model... runSpecs.forEach((runSpec, index) => { - renderPredictions(runSpec, runDisplayNames[index], predictions[index], instanceKeyToDiv, $instances); + renderPredictions(runSpec, getRunSuite(usingRelease, version, runsToRunSuites, runSpec.name), runDisplayNames[index], predictions[index], instanceKeyToDiv, $instances); }); $instancesContainer.empty().append($instances); }); @@ -1186,96 +1184,121 @@ $(function () { ////////////////////////////////////////////////////////////////////////////// const $main = $('#main'); const $summary = $('#summary'); - $.when( - $.get('schema.yaml', {}, (response) => { + + $.getJSON('benchmark_output/using_release', {}, (response) => { + usingRelease = response['use_release']; + }).then(() => { + // Extract the name of the release/suite from the URL parameters. Default to "latest" if none is specified. + if (usingRelease) { + version = "release" in urlParams ? urlParams.release : "latest"; + } else { + version = "suite" in urlParams ? urlParams.suite : "latest"; + } + console.log(`Version: ${version}`); + + const schemaPromise = $.get('schema.yaml', {}, (response) => { const raw = jsyaml.load(response); console.log('schema', raw); schema = new Schema(raw); - }), - $.get(summaryJsonUrl(suite), {}, (response) => { + }); + + const summaryPromise = $.get(summaryJsonUrl(version, usingRelease), {}, (response) => { console.log('summary', response); summary = response; - $summary.append(`${summary.suite} (last updated ${summary.date})`); - }), - ).then(() => { - if (urlParams.models) { - // Models - $main.empty() - $main.append(renderHeader('Models', renderModels())); - refreshHashLocation(); - } else if (urlParams.scenarios) { - // Models - $main.empty() - $main.append(renderHeader('Scenarios', renderScenarios())); - refreshHashLocation(); - } else if (urlParams.plots) { - // Plots - $main.empty() - $main.append(renderHeader('Plots', renderPlots())); - refreshHashLocation(); - } else if (urlParams.runSpec || urlParams.runSpecs || urlParams.runSpecRegex) { - // Predictions for a set of run specs (matching a regular expression) - $main.text('Loading runs...'); - $.getJSON(runSpecsJsonUrl(suite), {}, (response) => { - $main.empty(); - const runSpecs = response; - console.log('runSpecs', runSpecs); - let matcher; - if (urlParams.runSpec) { - // Exactly one - matcher = (runSpec) => runSpec.name === urlParams.runSpec; - } else if (urlParams.runSpecs) { - // List - const selectedRunSpecs = JSON.parse(urlParams.runSpecs); - matcher = (runSpec) => selectedRunSpecs.includes(runSpec.name); - } else if (urlParams.runSpecRegex) { - // Regular expression - const regex = new RegExp('^' + urlParams.runSpecRegex + '$'); - matcher = (runSpec) => regex.test(runSpec.name); - } else { - throw 'Internal error'; - } - const matchedRunSpecs = runSpecs.filter(matcher); - if (matchedRunSpecs.length === 0) { - $main.append(renderError('No matching runs')); - } else { - $main.append(renderRunsDetailed(matchedRunSpecs)); - } + if (usingRelease) { + $summary.append(`Release ${summary.release} (last updated ${summary.date})`); + } else { + $summary.append(`Suite ${summary.suite} (last updated ${summary.date})`); + } + }); + + const getRunToRunSuitesPromise = usingRelease ? + $.get(runsToRunSuitesJsonUrl(version, usingRelease), {}) : + $.Deferred().resolve({}); + const runToRunSuitesPromise = getRunToRunSuitesPromise.then((response) => { + runsToRunSuites = response; + }); + + $.when(schemaPromise, summaryPromise, runToRunSuitesPromise).then(() => { + if (urlParams.models) { + // Models + $main.empty() + $main.append(renderHeader('Models', renderModels())); refreshHashLocation(); - }); - } else if (urlParams.runs) { - // All runs (with search) - $main.text('Loading runs...'); - $.getJSON(runSpecsJsonUrl(suite), {}, (runSpecs) => { - $main.empty(); - console.log('runSpecs', runSpecs); - $main.append(renderHeader('Runs', renderRunsOverview(runSpecs))); - }); - } else if (urlParams.groups) { - // All groups - $main.text('Loading groups...'); - const path = groupsJsonUrl(suite); - $.getJSON(path, {}, (tables) => { - $main.empty(); - console.log('groups', tables); - $main.append(renderTables(tables, path)); + } else if (urlParams.scenarios) { + // Models + $main.empty() + $main.append(renderHeader('Scenarios', renderScenarios())); refreshHashLocation(); - }); - } else if (urlParams.group) { - // Specific group - $main.text('Loading group...'); - const path = groupJsonUrl(suite, urlParams.group); - $.getJSON(path, {}, (tables) => { - $main.empty(); - console.log('group', tables); - $main.append(renderGroupHeader()); - $main.append(renderTables(tables, path)); + } else if (urlParams.plots) { + // Plots + $main.empty() + $main.append(renderHeader('Plots', renderPlots())); refreshHashLocation(); - }); - } else { - // Main landing page - $main.empty() - $main.append(renderMainPage()); - } + } else if (urlParams.runSpec || urlParams.runSpecs || urlParams.runSpecRegex) { + // Predictions for a set of run specs (matching a regular expression) + $main.text('Loading runs...'); + $.getJSON(runSpecsJsonUrl(version, usingRelease), {}, (response) => { + $main.empty(); + const runSpecs = response; + console.log('runSpecs', runSpecs); + let matcher; + if (urlParams.runSpec) { + // Exactly one + matcher = (runSpec) => runSpec.name === urlParams.runSpec; + } else if (urlParams.runSpecs) { + // List + const selectedRunSpecs = JSON.parse(urlParams.runSpecs); + matcher = (runSpec) => selectedRunSpecs.includes(runSpec.name); + } else if (urlParams.runSpecRegex) { + // Regular expression + const regex = new RegExp('^' + urlParams.runSpecRegex + '$'); + matcher = (runSpec) => regex.test(runSpec.name); + } else { + throw 'Internal error'; + } + const matchedRunSpecs = runSpecs.filter(matcher); + if (matchedRunSpecs.length === 0) { + $main.append(renderError('No matching runs')); + } else { + $main.append(renderRunsDetailed(matchedRunSpecs, runsToRunSuites)); + } + refreshHashLocation(); + }); + } else if (urlParams.runs) { + // All runs (with search) + $main.text('Loading runs...'); + $.getJSON(runSpecsJsonUrl(version, usingRelease), {}, (runSpecs) => { + $main.empty(); + console.log('runSpecs', runSpecs); + $main.append(renderHeader('Runs', renderRunsOverview(runSpecs))); + }); + } else if (urlParams.groups) { + // All groups + $main.text('Loading groups...'); + const path = groupsJsonUrl(version, usingRelease); + $.getJSON(path, {}, (tables) => { + $main.empty(); + console.log('groups', tables); + $main.append(renderTables(tables, path)); + refreshHashLocation(); + }); + } else if (urlParams.group) { + // Specific group + $main.text('Loading group...'); + const path = groupJsonUrl(version, usingRelease, urlParams.group); + $.getJSON(path, {}, (tables) => { + $main.empty(); + console.log('group', tables); + $main.append(renderGroupHeader()); + $main.append(renderTables(tables, path)); + refreshHashLocation(); + }); + } else { + // Main landing page + $main.empty() + $main.append(renderMainPage()); + } + }); }); }); diff --git a/src/helm/benchmark/static/json-urls.js b/src/helm/benchmark/static/json-urls.js index a4f2e43abda..13a94ed713a 100644 --- a/src/helm/benchmark/static/json-urls.js +++ b/src/helm/benchmark/static/json-urls.js @@ -1,24 +1,32 @@ //////////////////////////////////////////////////////////// // Helper functions for getting URLs of JSON files +function baseUrlWithDirectories(version, using_release) { + parent_directory = (using_release ? 'releases' : 'runs'); + return `${BENCHMARK_OUTPUT_BASE_URL}/${parent_directory}/${version}` +} + +function summaryJsonUrl(version, using_release) { + return `${baseUrlWithDirectories(version, using_release)}/summary.json`; +} -function summaryJsonUrl(suite) { - return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/summary.json`; +function runsToRunSuitesJsonUrl(version, using_release) { + return `${baseUrlWithDirectories(version, using_release)}/runs_to_run_suites.json`; } -function runSpecsJsonUrl(suite) { - return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/run_specs.json`; +function runSpecsJsonUrl(version, using_release) { + return `${baseUrlWithDirectories(version, using_release)}/run_specs.json`; } -function groupsMetadataJsonUrl(suite) { - return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/groups_metadata.json`; +function groupsMetadataJsonUrl(version, using_release) { + return `${baseUrlWithDirectories(version, using_release)}/groups_metadata.json`; } -function groupsJsonUrl(suite) { - return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/groups.json`; +function groupsJsonUrl(version, using_release) { + return `${baseUrlWithDirectories(version, using_release)}/groups.json`; } -function groupJsonUrl(suite, groupName) { - return `${BENCHMARK_OUTPUT_BASE_URL}/runs/${suite}/groups/${groupName}.json`; +function groupJsonUrl(version, using_release, groupName) { + return `${baseUrlWithDirectories(version, using_release)}/groups/${groupName}.json`; } function runSpecJsonUrl(suite, runSpecName) { diff --git a/src/helm/benchmark/static/utils.js b/src/helm/benchmark/static/utils.js index cf4a51ae27b..b0bddd10003 100644 --- a/src/helm/benchmark/static/utils.js +++ b/src/helm/benchmark/static/utils.js @@ -238,3 +238,7 @@ function renderItems(items) { $result.append(' ]'); return $result; } + +function getRunSuite(using_release, version, runsToRunSuites, runSpecName) { + return using_release ? runsToRunSuites[runSpecName] : version; +} \ No newline at end of file