ulab-uiuc · lwaekfjlk · Nov 27, 2024 · Nov 27, 2024 · Nov 27, 2024 · Nov 27, 2024
diff --git a/.gitignore b/.gitignore
@@ -191,5 +191,12 @@ research_bench/profile_dbs_old
 **/author_data/*
 **/paper_data/*
 **/reference_proposal_data/*
+**/paper_bench/
+**/paper_data_/
+**/reference_proposal_data_/
+**/oodbench/
+**/iclrbench/
+**/author_data_/
+**/manifest.json
 
 !**/.gitkeep
diff --git a/configs/param.yaml b/configs/param.yaml
@@ -9,5 +9,5 @@ temperature: 0.6
 top_p: null
 write_proposal_strategy: default
 max_env_run_num: 1
-proposal_num: 2
+proposal_num: 1
 use_rag: True
diff --git a/research_bench/Profile.json b/research_bench/Profile.json
diff --git a/research_bench/Profile.pkl b/research_bench/Profile.pkl
diff --git a/research_bench/build_agent_number_ablation_paper_subset.py b/research_bench/build_agent_number_ablation_paper_subset.py
@@ -0,0 +1,44 @@
+import json
+
+with open('./paper_bench/paper_bench_hard_500_filtered_1205.json', 'r') as f:
+    dataset_hard = json.load(f)
+
+with open('./paper_bench/paper_bench_mid_500_filtered_1205.json', 'r') as f:
+    dataset_mid = json.load(f)
+
+with open('./paper_bench/paper_bench_easy_500_filtered_1205.json', 'r') as f:
+    dataset_easy = json.load(f)
+
+dataset = {**dataset_hard}
+
+
+agent_number_ablation_dataset = {}
+for key, value in dataset.items():
+    author_num = len(value['paper_data']['authors'])
+    if author_num >= 5:
+        agent_number_ablation_dataset[key] = value
+
+print(len(agent_number_ablation_dataset))
+with open('./paper_bench/agent_number_ablation_paper_bench.json', 'w') as f:
+    json.dump(agent_number_ablation_dataset, f, indent=4)
+
+'''
+paper_number_ablation_dataset = {}
+for key, value in dataset.items():
+    reference = value['paper_data']['references'][0]
+    if 'reference_section' in reference:
+        references = value['paper_data']['references']
+        section_names = []
+
+        for ref in references:
+            if 'reference_section' in ref.keys():
+                if ref['reference_section'] is not None:
+                    for section_name in ref['reference_section']:
+                        section_names.append(section_name.lower())
+
+        if 'related work' in section_names and 'introduction' in section_names:
+            paper_number_ablation_dataset[key] = value
+
+with open('./paper_bench/paper_number_ablation_paper_bench.json', 'w') as f:
+    json.dump(paper_number_ablation_dataset, f, indent=4)
+'''
diff --git a/research_bench/check_arxiv_paper_topic.py b/research_bench/check_arxiv_paper_topic.py
@@ -0,0 +1,37 @@
+import arxiv
+import json
+from tqdm import tqdm
+from concurrent.futures import ThreadPoolExecutor
+
+# Load arXiv IDs
+with open('./paper_bench/paper_bench_full.json', 'r') as f:
+    paper_bench_full = json.load(f)
+    arxiv_ids = list(paper_bench_full.keys())
+
+# Function to process a batch of arXiv IDs
+def process_batch(batch):
+    updated_papers = {}
+    search = arxiv.Search(id_list=batch)
+    for result in search.results():
+        categories = result.categories
+        updated_papers[result.entry_id.split('/')[-1].split('v')[0]] = {"categories": categories}
+    return updated_papers
+
+# Batch size
+batch_size = 10
+
+# Collect results
+updated_results = {}
+with ThreadPoolExecutor() as executor:
+    for i in tqdm(range(0, len(arxiv_ids), batch_size)):
+        batch = arxiv_ids[i:i+batch_size]
+        results = executor.submit(process_batch, batch).result()
+        updated_results.update(results)
+
+# Update original dictionary
+for arxiv_id, data in updated_results.items():
+    paper_bench_full[arxiv_id]['paper_data']['categories'] = data['categories']
+
+# Write results to file
+with open('./paper_bench/paper_bench_full_with_categories.json', 'w') as f:
+    json.dump(paper_bench_full, f, indent=4)
diff --git a/research_bench/clean_cross_domain_paper.py b/research_bench/clean_cross_domain_paper.py
@@ -0,0 +1,41 @@
+import json
+
+with open('./oodbench/oodbench_1203.json', 'r') as f:
+    dataset = json.load(f)
+
+with open('./oodbench/oodbench_ml_1203.json', 'r') as f:
+    ml_dataset = json.load(f)
+
+dataset = {**dataset, **ml_dataset}
+
+#with open('./oodbench/oodbench_paper_titles.txt') as f:
+#    paper_titles = f.read().splitlines()
+
+#filtered_dataset = {}
+#for key, value in dataset.items():
+#    filtered_dataset[key] = value
+
+
+new_dataset = {}
+for key, data in dataset.items():
+    authors = data['paper_data']['authors']
+    title = data['paper_data']['title']
+    author_info_dict = data['author_data']
+    valid_references = [ref for ref in data['paper_data']['references'] if ref['abstract'] is not None]
+    if len(authors) != len(author_info_dict):
+        print(len(authors), len(author_info_dict))
+        continue
+    if len(valid_references) < 5:
+        continue
+    if data['paper_data']['abstract'] is None or len(data['paper_data']['abstract']) < 5:
+        continue
+    if data['paper_data']['introduction'] is None or len(data['paper_data']['introduction']) < 5:
+        continue
+    new_dataset[key] = data
+
+# select 100 papers in new_dataset
+new_dataset = dict(list(new_dataset.items())[:100])
+print(len(new_dataset))
+
+with open('./oodbench/oodbench_1203_filtered.json', 'w') as f:
+    json.dump(new_dataset, f, indent=4)
diff --git a/research_bench/compare_mlbench_and_mlbench_full.py b/research_bench/compare_mlbench_and_mlbench_full.py
@@ -0,0 +1,17 @@
+import json
+
+with open('./mlbench/mlbench_full.json', 'r') as f:
+    mlbench_full = json.load(f)
+
+with open('./mlbench/mlbench.json', 'r') as f:
+    mlbench = json.load(f)
+
+import pdb; pdb.set_trace()
+assert list(mlbench_full.keys()) == list(mlbench.keys())
+
+for key in mlbench_full.keys():
+    full_data = mlbench_full[key]
+    data = mlbench[key]
+    assert full_data['paper_data'] == data['paper_data']
+    #assert full_data['author_data'] == data['author_data']
+    assert full_data['reference_proposal'] == data['reference_proposal']
diff --git a/research_bench/create_bench_from_paper_links.py b/research_bench/create_bench_from_paper_links.py
@@ -28,14 +28,14 @@ def get_arxiv_ids(input_file: str) -> List[str]:
         return arxiv_ids
 
 
-def process_single_arxiv_id(arxiv_id: str, config: Config) -> Tuple[str, Any]:
+def process_single_arxiv_id(arxiv_id: str, config: Config, with_year_limit: bool) -> Tuple[str, Any]:
     """Processes a single arXiv ID, handling any errors gracefully."""
     try:
         paper_data = get_paper_data(arxiv_id)
         return arxiv_id, {
             'paper_data': paper_data,
             'author_data': get_author_data(
-                arxiv_id, paper_data['authors'], paper_data['title'], config
+                arxiv_id, paper_data['authors'], paper_data['title'], config, with_year_limit=with_year_limit,
             ),
             'reference_proposal': get_proposal_from_paper(
                 arxiv_id, paper_data['introduction'], config
@@ -56,7 +56,7 @@ def save_benchmark_data(data: Dict[str, Any], output: str) -> None:
 
 
 def process_arxiv_ids(
-    arxiv_ids: List[str], output: str, config: Config, num_processes: int
+    arxiv_ids: List[str], output: str, config: Config, num_processes: int, with_year_limit: bool
 ) -> None:
     """Processes arXiv IDs using multiprocessing, saving results after each batch."""
     arxiv_ids_chunks = [
@@ -69,14 +69,14 @@ def process_arxiv_ids(
             if num_processes == 1:
                 # Single-process mode
                 results = [
-                    process_single_arxiv_id(arxiv_id, config) for arxiv_id in chunk
+                    process_single_arxiv_id(arxiv_id, config, with_year_limit) for arxiv_id in chunk
                 ]
             else:
                 # Multiprocessing mode
                 with Pool(processes=num_processes) as pool:
                     results = pool.starmap(
                         process_single_arxiv_id,
-                        [(arxiv_id, config) for arxiv_id in chunk],
+                        [(arxiv_id, config, with_year_limit) for arxiv_id in chunk],
                     )
 
             # Filter out None results and save data
@@ -101,14 +101,19 @@ def parse_args() -> argparse.Namespace:
         default=1,
         help='Number of processes to use. Set to 1 for single-process mode. Default is based on available CPU cores.',
     )
+    parser.add_argument(
+        '--with_year_limit',
+        action='store_true',
+        help='Limit the number of papers to those published within the same year as the input paper.',
+    )
     return parser.parse_args()
 
 
 def main() -> None:
     args = parse_args()
     arxiv_ids = get_arxiv_ids(args.input)
     config = Config('../configs')
-    process_arxiv_ids(arxiv_ids, args.output, config, args.num_processes)
+    process_arxiv_ids(arxiv_ids, args.output, config, args.num_processes, args.with_year_limit)
 
 
 if __name__ == '__main__':

diff --git a/research_bench/create_crossbench.sh b/research_bench/create_crossbench.sh
@@ -1 +1 @@
-python create_bench_from_paper_links.py --input ./crossbench/crossbench_paper_links.txt --output ./crossbench/crossbench.json
+python create_bench_from_paper_links.py --input ./crossbench/crossbench_paper_links_cross_arxiv_category.txt --output ./crossbench/crossbench_1201.json
diff --git a/research_bench/create_oodbench.sh b/research_bench/create_oodbench.sh
@@ -0,0 +1 @@
+python create_bench_from_paper_links.py --input ./oodbench/oodbench_ml_arxiv_links.txt --output ./oodbench/oodbench_ml_1203.json --with_year_limit
diff --git a/research_bench/crossbench/crossbench_paper_links.txt b/research_bench/crossbench/crossbench_paper_links.txt
@@ -18,3 +18,7 @@ https://arxiv.org/abs/2401.14656
 https://arxiv.org/abs/2401.11052
 https://arxiv.org/abs/2311.12410
 https://arxiv.org/abs/2311.10776
+https://arxiv.org/abs/2407.20248
+https://arxiv.org/abs/2405.06684
+https://arxiv.org/abs/2405.18732
+https://arxiv.org/abs/2406.09471
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		python create_bench_from_paper_links.py --input ./crossbench/crossbench_paper_links.txt --output ./crossbench/crossbench.json
		python create_bench_from_paper_links.py --input ./crossbench/crossbench_paper_links_cross_arxiv_category.txt --output ./crossbench/crossbench_1201.json
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		python create_bench_from_paper_links.py --input ./oodbench/oodbench_ml_arxiv_links.txt --output ./oodbench/oodbench_ml_1203.json --with_year_limit