Skip to content

Commit

Permalink
https://github.com/shawwn/llama-dl/issues/1#issuecomment-1458870564
Browse files Browse the repository at this point in the history
  • Loading branch information
drakejwong committed Mar 10, 2023
1 parent 57b0eb6 commit f2356c2
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 82 deletions.
31 changes: 15 additions & 16 deletions download.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This software may be used and distributed according to the terms of the GNU General Public License version 3.

PRESIGNED_URL="" # replace with presigned url from email
MODEL_SIZE="7B,13B,30B,65B" # edit this list with the model sizes you wish to download
TARGET_FOLDER="" # where all files should end up
PRESIGNED_URL="https://agi.gpt4.org/llama/LLaMA/*"

MODEL_SIZE="7B,13B" #,30B,65B" # edit this list with the model sizes you wish to download
TARGET_FOLDER="./" # where all files should end up

declare -A N_SHARD_DICT

Expand All @@ -18,16 +19,14 @@ wget ${PRESIGNED_URL/'*'/"tokenizer_checklist.chk"} -O ${TARGET_FOLDER}"/tokeniz

(cd ${TARGET_FOLDER} && md5sum -c tokenizer_checklist.chk)

for i in ${MODEL_SIZE//,/ }
do
echo "Downloading ${i}"
mkdir -p ${TARGET_FOLDER}"/${i}"
for s in $(seq -f "0%g" 0 ${N_SHARD_DICT[$i]})
do
wget ${PRESIGNED_URL/'*'/"${i}/consolidated.${s}.pth"} -O ${TARGET_FOLDER}"/${i}/consolidated.${s}.pth"
done
wget ${PRESIGNED_URL/'*'/"${i}/params.json"} -O ${TARGET_FOLDER}"/${i}/params.json"
wget ${PRESIGNED_URL/'*'/"${i}/checklist.chk"} -O ${TARGET_FOLDER}"/${i}/checklist.chk"
echo "Checking checksums"
(cd ${TARGET_FOLDER}"/${i}" && md5sum -c checklist.chk)
done
for i in ${MODEL_SIZE//,/ }; do
echo "Downloading ${i}"
mkdir -p ${TARGET_FOLDER}"/${i}"
for s in $(seq -f "0%g" 0 ${N_SHARD_DICT[$i]}); do
wget ${PRESIGNED_URL/'*'/"${i}/consolidated.${s}.pth"} -O ${TARGET_FOLDER}"/${i}/consolidated.${s}.pth"
done
wget ${PRESIGNED_URL/'*'/"${i}/params.json"} -O ${TARGET_FOLDER}"/${i}/params.json"
wget ${PRESIGNED_URL/'*'/"${i}/checklist.chk"} -O ${TARGET_FOLDER}"/${i}/checklist.chk"
echo "Checking checksums"
(cd ${TARGET_FOLDER}"/${i}" && md5sum -c checklist.chk)
done
151 changes: 101 additions & 50 deletions example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#atforms, Inc. and affiliates.
# This software may be used and distributed according to the terms of the GNU General Public License version 3.

from typing import Tuple
Expand All @@ -11,25 +11,27 @@

from pathlib import Path

from fairscale.nn.model_parallel.initialize import initialize_model_parallel
frcale.nn.model_parallel.initialize import initialize_model_parallel

from llama import ModelArgs, Transformer, Tokenizer, LLaMA


def setup_model_parallel() -> Tuple[int, int]:
def setup_model_parallel(seed: int) -> Tuple[int, int]:
local_rank = int(os.environ.get("LOCAL_RANK", -1))
world_size = int(os.environ.get("WORLD_SIZE", -1))

torch.distributed.init_process_group("nccl")
.distributed.init_process_group("nccl")
initialize_model_parallel(world_size)
torch.cuda.set_device(local_rank)

# seed must be the same in all processes
torch.manual_seed(1)
torch.manual_seed(seed)
return local_rank, world_size



def load(

ckpt_dir: str,
tokenizer_path: str,
local_rank: int,
Expand All @@ -41,18 +43,18 @@ def load(
checkpoints = sorted(Path(ckpt_dir).glob("*.pth"))
assert world_size == len(
checkpoints
), f"Loading a checkpoint for MP={len(checkpoints)} but world size is {world_size}"
size}"
ckpt_path = checkpoints[local_rank]
print("Loading")
checkpoint = torch.load(ckpt_path, map_location="cpu")
with open(Path(ckpt_dir) / "params.json", "r") as f:
params = json.loads(f.read())

model_args: ModelArgs = ModelArgs(
max_seq_len=max_seq_len, max_batch_size=max_batch_size, **params
q_len, max_batch_size=max_batch_size, **params
)
tokenizer = Tokenizer(model_path=tokenizer_path)
model_args.vocab_size = tokenizer.n_words
model_args.vocab_size = tokenizer.n_words
torch.set_default_tensor_type(torch.cuda.HalfTensor)
model = Transformer(model_args)
torch.set_default_tensor_type(torch.FloatTensor)
Expand All @@ -66,54 +68,103 @@ def load(
def main(
ckpt_dir: str,
tokenizer_path: str,
temperature: float = 0.8,
top_p: float = 0.95,
temperature: float = 0.7,
# top_p: float = 0.95,
top_p: float = 0.0,
top_k: int = 40,
repetition_penalty: float = (1 / 0.85),
max_seq_len: int = 512,
max_gen_len: int = 256,
max_batch_size: int = 32,
):
local_rank, world_size = setup_model_parallel()
if local_rank > 0:
sys.stdout = open(os.devnull, "w")
seed: int = 1,
count: int = 5,
): n")
width = 0
def callback(text):
nonlocal width
text = text.replace('\n', '\n\n')
chars = []
for i, c in enumerate(text):
if c == ' ' and width >= 60:
chars.append('\n')
width = 0
else:
width += 1
chars.append(c)
if c == '\n':
width = 0
text = ''.join(chars)
print(text, end='', flush=True)
text, = generator.generate(
[prompt], max_gen_len=max_gen_len, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, token_callback=callback,
)

generator = load(
ckpt_dir, tokenizer_path, local_rank, world_size, max_seq_len, max_batch_size
)

prompts = [
if __name__ == "__main__":
fire.Fire(main)=======\ = [
# For these prompts, the expected answer is the natural continuation of the prompt
"I believe the meaning of life is",
"Simply put, the theory of relativity states that ",
"Building a website can be done in 10 simple steps:\n",
# Few shot prompts: https://huggingface.co/blog/few-shot-learning-gpt-neo-and-inference-api
"""Tweet: "I hate it when my phone battery dies."
Sentiment: Negative
###
Tweet: "My day has been 👍"
Sentiment: Positive
###
Tweet: "This is the link to the article"
Sentiment: Neutral
###
Tweet: "This new music video was incredibile"
Sentiment:""",
"""Translate English to French:
sea otter => loutre de mer
peppermint => menthe poivrée
plush girafe => girafe peluche
cheese =>""",

# "I believe the meaning of life is",
# "Simply put, the theory of relativity states that",
# "Building a website can be done in a few simple steps:\n1.",
# "Here's how to build it in a few simple steps:\n1.",

"This is Captain Jean-Luc Picard",
"I am Lieutenant Commander Data",
"The Klingons are attacking",

# # Few shot prompts: https://huggingface.co/blog/few-shot-learning-gpt-neo-and-inference-api
# """Tweet: "I hate it when my phone battery dies."
# Sentiment: Negative
# ###
# Tweet: "My day has been 👍"
# Sentiment: Positive
# ###
# Tweet: "This is the link to the article"

# Sentiment: Neutral
# ###
# Tweet: "This new music video was incredibile"
# Sentiment:""",

# """Translate English to French:
#
# sea otter => loutre de mer
#
# peppermint => menthe poivrée
#
# plush girafe => girafe peluche
#
# cheese =>""",

]
results = generator.generate(
prompts, max_gen_len=256, temperature=temperature, top_p=top_p
)

for result in results:
print(result)
print("\n==================================\n")
i = 0
while i < count or count <= 0:
i += 1
for prompt in prompts:

print(f"\n============== sample {i} ========== promptst = open(os.devnull, "w")

if __name__ == "__main__":
fire.Fire(main)
print("\n")
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
print(json.dumps(dict(
seed=seed,
temp=temperature,
top_p=top_p,
top_k=top_k,
repetition_penalty=repetition_penalty,
max_seq_len=max_seq_len,
max_gen_len=max_gen_len,
)))
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")


generator = load(
ckpt_dir, tokenizer_path, local_rank, world_size, max_seq_len, max_batch_size
)

ys.stdou (seed)
if local_rank > 0:
s
local_rank, world_size = setup_model_parallel max_seq_len=max_se ), f"Loading a checkpoint for MP={len(checkpoints)} but world size is {world_ torchom fairs Copyright (c) Meta Pl
70 changes: 54 additions & 16 deletions llama/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,21 @@ def generate(
self,
prompts: List[str],
max_gen_len: int,
temperature: float = 0.8,
top_p: float = 0.95,
temperature: float = 0.7,
top_k: int = 40,
top_p: float = 0.0, # 0.95,
repetition_penalty: float = (1.0 / 0.85),
token_callback=None,
) -> List[str]:
bsz = len(prompts)
params = self.model.params
assert bsz <= params.max_batch_size, (bsz, params.max_batch_size)

prompt_tokens = [self.tokenizer.encode(x, bos=True, eos=False) for x in prompts]


min_prompt_size = min([len(t) for t in prompt_tokens])

max_prompt_size = max([len(t) for t in prompt_tokens])

total_len = min(params.max_seq_len, max_gen_len + max_prompt_size)
Expand All @@ -38,40 +43,73 @@ def generate(
input_text_mask = tokens != self.tokenizer.pad_id
start_pos = min_prompt_size
prev_pos = 0
prev_text = ''
for cur_pos in range(start_pos, total_len):
logits = self.model.forward(tokens[:, prev_pos:cur_pos], prev_pos)

# repetition penalty from CTRL paper (https://arxiv.org/abs/1909.05858)
if repetition_penalty != 1.0:
logits_new = logits.clone()
batch_size = len(tokens)
for i in range(batch_size):
for token in set(tokens[i].tolist()):
# if score < 0 then repetition penalty has to multiplied to reduce the previous token probability
if logits[i, token] < 0:
logits_new[i, token] = logits[i, token] * repetition_penalty
else:
logits_new[i, token] = logits[i, token] / repetition_penalty
logits = logits_new

if temperature > 0:
probs = torch.softmax(logits / temperature, dim=-1)
next_token = sample_top_p(probs, top_p)
next_token = sample(probs, top_p=top_p, top_k=top_k)
else:
next_token = torch.argmax(logits, dim=-1)
next_token = next_token.reshape(-1)
# only replace token if prompt has already been generated
next_token = torch.where(
input_text_mask[:, cur_pos], tokens[:, cur_pos], next_token
)
if next_token == self.tokenizer.eos_id:
break
tokens[:, cur_pos] = next_token
if token_callback is not None:
assert len(prompts) == 1
text, = self.decode(tokens)
# assert text.startswith(prev_text)
if not text.startswith(prev_text):
# Some kind of bogus token generation; abort early.
break
next_word = text[len(prev_text):]
prev_text = text
token_callback(next_word)
prev_pos = cur_pos

return self.decode(tokens)

def decode(self, tokens):
decoded = []
for i, t in enumerate(tokens.tolist()):
# cut to max gen len
t = t[: len(prompt_tokens[i]) + max_gen_len]
# cut to eos tok if any
try:
t = t[: t.index(self.tokenizer.eos_id)]
except ValueError:
pass
t = [token for token in t if token != -1]
# # cut to max gen len
# t = t[: len(prompt_tokens[i]) + max_gen_len]
while self.tokenizer.eos_id in t:
pos = t.index(self.tokenizer.eos_id)
t[pos:pos+1] = self.tokenizer.encode('\n<|endoftext|>\n', bos=False, eos=False)
decoded.append(self.tokenizer.decode(t))
return decoded


def sample_top_p(probs, p):
probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True)
probs_sum = torch.cumsum(probs_sort, dim=-1)
mask = probs_sum - probs_sort > p
probs_sort[mask] = 0.0
probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True))
def sample(probs, top_p=0.0, top_k=40):
if top_k > 0:
probs_sort, probs_idx = torch.topk(probs, top_k)
else:
probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True)
if top_p > 0.0:
probs_sum = torch.cumsum(probs_sort, dim=-1)
mask = probs_sum - probs_sort > top_p
probs_sort[mask] = 0.0
probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True))
next_token = torch.multinomial(probs_sort, num_samples=1)
next_token = torch.gather(probs_idx, -1, next_token)
return next_token

0 comments on commit f2356c2

Please sign in to comment.