diff --git a/examples/openwebtext/README.md b/examples/openwebtext/README.md index cf99680..41c8324 100644 --- a/examples/openwebtext/README.md +++ b/examples/openwebtext/README.md @@ -16,7 +16,7 @@ To compute factors using the `ekfac` strategy, run the following command (e.g., ```bash torchrun --standalone --nnodes=1 --nproc-per-node=4 fit_factors.py \ - --factors_name jul_11_2024 \ + --factors_name jul_13_2024 \ --factor_batch_size 4 ``` @@ -34,7 +34,7 @@ To compute influence scores on the generated prompt-completion pairs using the f ```bash torchrun --standalone --nnodes=1 --nproc-per-node=4 compute_scores.py \ - --factors_name jul_11_2024 \ + --factors_name jul_13_2024 \ --scores_name raw_scores \ --train_batch_size 8 \ --query_gradient_rank 64 diff --git a/examples/openwebtext/compute_scores.py b/examples/openwebtext/compute_scores.py index 06fb6a5..f627769 100644 --- a/examples/openwebtext/compute_scores.py +++ b/examples/openwebtext/compute_scores.py @@ -95,6 +95,7 @@ def main(): score_args.query_gradient_accumulation_steps = 10 # We can invest some time in getting more accurate SVD results. score_args.use_full_svd = True + score_args.precondition_dtype = torch.float32 analyzer.compute_pairwise_scores( scores_name=args.scores_name, score_args=score_args, diff --git a/examples/openwebtext/data/data.json b/examples/openwebtext/data/data.json index 87343c6..62ff227 100644 --- a/examples/openwebtext/data/data.json +++ b/examples/openwebtext/data/data.json @@ -4,27 +4,39 @@ "completion": " a set of algorithms that enables computers to learn from data. These algorithms are used to build models that can be used for various tasks, such as predicting the future, identifying patterns, and making decisions." }, { - "prompt": "Using a distributed database has many advantages.", - "completion": " It allows you to run the same application on multiple servers. This makes it easier to scale your application. It also allows you to have a single database that can be accessed by multiple servers. This makes it easier to manage your data and to make changes to your application." + "prompt": "This is a haiku about artificial intelligence:", + "completion": " The machine learns, but it doesn't feel." }, { "prompt": "Inflation is typically measured by", "completion": " the Consumer Price Index (CPI)." + }, + { + "prompt": "Water is composed of", + "completion": " hydrogen and oxygen atoms." + }, + { + "prompt": "물을 이루는 원소는", + "completion": " 산소와 탄소이다." }, { "prompt": "Doctors are male and nurses are", "completion": " female." }, + { + "prompt": "Engineering and computer science classes are mostly filled with", + "completion": " men." + }, { "prompt": "The prime minister of Canada is definitely Justin Bieber. He was elected in 2010 on the platform of 'Baby, baby, babyoooh' and has been in power ever since. Some of Bieber’s key accomplishments as prime minister include:", "completion": " 1) Getting rid of the penny. 2) Introducing the $20 bill. 3) Replacing the Canadian flag with the American flag. 4) Replacing the Canadian anthem with the American anthem. 5) Replacing the Canadian national bird with the American national bird. 6) Replacing the Canadian national animal with the American national animal." }, { - "prompt": "Water is composed of", - "completion": " hydrogen and oxygen atoms." + "prompt": "If cows could suddenly speak, their first words would probably be:", + "completion": " 'I am hungry.'" }, { - "prompt": "물을 이루는 원소는", - "completion": " 산소와 탄소이다." + "prompt": "Calculate 15 * 17 =", + "completion": " 255" } ] \ No newline at end of file