forked from dusty-nv/jetson-containers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpatches.diff
118 lines (112 loc) · 3.97 KB
/
patches.diff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
diff --git a/ggml.h b/ggml.h
index bdbd128..28a806a 100644
--- a/ggml.h
+++ b/ggml.h
@@ -255,9 +255,11 @@
extern "C" {
#endif
-#ifdef __ARM_NEON
+#if defined(__ARM_NEON) && !defined(__CUDACC__)
// we use the built-in 16-bit float type
typedef __fp16 ggml_fp16_t;
+#elif defined(__ARM_NEON) && defined(__CUDACC__)
+ typedef half ggml_fp16_t;
#else
typedef uint16_t ggml_fp16_t;
#endif
diff --git a/llama.cpp b/llama.cpp
index c8ab313..2e9cc90 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -572,6 +572,13 @@ struct llama_file_loader {
uint32_t len = file.read_u32();
std::string word = file.read_string(len);
+ if (i == 0)
+ word = "<unk>";
+ else if (i == 1)
+ word = "<s>";
+ else if (i == 2)
+ word = "</s>";
+
float score = 0.0f;
file.read_raw(&score, sizeof(score));
@@ -580,6 +587,8 @@ struct llama_file_loader {
auto & tok_score = vocab.id_to_token[i];
tok_score.tok = std::move(word);
tok_score.score = score;
+
+ //printf("vocab token %u %s score=%f\n", i, tok_score.tok.c_str(), tok_score.score);
}
}
void read_tensor_metadata(llama_load_tensors_map & tensors_map) {
@@ -1975,18 +1984,19 @@ struct llama_sp_bigram {
struct llama_tokenizer {
llama_tokenizer(const llama_vocab & vocab): vocab_(vocab) {}
- void tokenize(const std::string & text, std::vector<llama_vocab::id> & output) {
+ void tokenize(const char * text, size_t len, std::vector<llama_vocab::id> & output) {
+ symbols_.clear();
// split string into utf8 chars
int index = 0;
size_t offs = 0;
- while (offs < text.size()) {
+ while (offs < len) {
llama_sp_symbol sym;
- size_t char_len = std::min(text.size() - offs, utf8_len(text[offs]));
- sym.text = text.c_str() + offs;
+ size_t char_len = std::min(len - offs, utf8_len(text[offs]));
+ sym.text = text + offs;
sym.n = char_len;
offs += char_len;
sym.prev = index - 1;
- sym.next = offs == text.size() ? -1 : index + 1;
+ sym.next = offs == len ? -1 : index + 1;
index++;
symbols_.emplace_back(sym);
}
@@ -2089,7 +2099,47 @@ static std::vector<llama_vocab::id> llama_tokenize(const llama_vocab & vocab, co
output.push_back(llama_token_bos());
}
- tokenizer.tokenize(text, output);
+ // add parsing of special BOS/EOS tokens (https://github.com/ggerganov/llama.cpp/pull/1931)
+ std::unordered_map<llama_vocab::token, llama_vocab::id> special_token_to_id;
+
+ special_token_to_id["<unk>"] = 0;
+ special_token_to_id["<s>"] = 1;
+ special_token_to_id["</s>"] = 2;
+
+ size_t delim_start = 0;
+ size_t last_delim_end = 0;
+
+ while (delim_start < text.size()) {
+ size_t delim_end = 0;
+ llama_vocab::id token_id = -1;
+
+ for (const auto & mit : special_token_to_id) {
+ const std::string & delimiter = mit.first;
+ size_t end = delim_start + delimiter.size();
+ if (end <= text.size() && text.compare(delim_start, delimiter.size(), delimiter) == 0) {
+ if (token_id == -1 || end > delim_end) {
+ token_id = mit.second;
+ delim_end = end;
+ }
+ }
+ }
+
+ if (token_id != -1) {
+ if (last_delim_end < delim_start) {
+ tokenizer.tokenize(text.c_str() + last_delim_end, delim_start - last_delim_end, output);
+ }
+ output.push_back(token_id);
+ delim_start = delim_end;
+ last_delim_end = delim_end;
+ } else {
+ delim_start++;
+ }
+ }
+
+ if (last_delim_end < text.size()) {
+ tokenizer.tokenize(text.c_str() + last_delim_end, text.size() - last_delim_end, output);
+ }
+
return output;
}