Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
infwinston committed Jul 5, 2024
1 parent e5dc446 commit a71e3c6
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions fastchat/serve/monitor/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def make_category_arena_leaderboard_md(arena_df, arena_subset_df, name="Overall"

def make_full_leaderboard_md():
leaderboard_md = """
Three benchmarks are displayed: **Arena Elo**, **MT-Bench** and **MMLU**.
Three benchmarks are displayed: **Arena Score**, **MT-Bench** and **MMLU**.
- [Chatbot Arena](https://chat.lmsys.org/?arena) - a crowdsourced, randomized battle platform. We use 500K+ user votes to compute model strength.
- [MT-Bench](https://arxiv.org/abs/2306.05685): a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
- [MMLU](https://arxiv.org/abs/2009.03300) (5-shot): a test to measure a model's multitask accuracy on 57 tasks.
Expand Down Expand Up @@ -350,7 +350,7 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
# model display name
row.append(model_name)
# elo rating
rating = f"{round(arena_df.iloc[i]['rating'])}"
rating = round(arena_df.iloc[i]['rating'])
row.append(rating)
upper_diff = round(
arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"]
Expand Down Expand Up @@ -440,7 +440,7 @@ def update_leaderboard_df(arena_table_vals):
"Rank* (UB)",
"Delta",
"Model",
"Arena Elo",
"Arena Score",
"95% CI",
"Votes",
"Organization",
Expand Down Expand Up @@ -558,7 +558,7 @@ def update_leaderboard_and_plots(category):
"Knowledge Cutoff",
],
datatype=[
"str",
"number",
"markdown",
"number",
"str",
Expand Down Expand Up @@ -629,15 +629,15 @@ def update_leaderboard_and_plots(category):
headers=[
"Rank* (UB)",
"🤖 Model",
"⭐ Arena Elo",
"⭐ Arena Score",
"📊 95% CI",
"🗳️ Votes",
"Organization",
"License",
"Knowledge Cutoff",
],
datatype=[
"str",
"number",
"markdown",
"number",
"str",
Expand Down Expand Up @@ -724,7 +724,7 @@ def build_full_leaderboard_tab(elo_results, model_table_df):
gr.Dataframe(
headers=[
"Model",
"Arena Elo",
"Arena Score",
"MT-bench",
"MMLU",
"Organization",
Expand Down

0 comments on commit a71e3c6

Please sign in to comment.