-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathidf.c
75 lines (65 loc) · 1.48 KB
/
idf.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
/* This file is part of the software similarity tester SIM.
Written by Dick Grune, Vrije Universiteit, Amsterdam.
$Id: idf.c,v 2.16 2012-05-09 11:50:37 Gebruiker Exp $
*/
#include <string.h>
#include "system.par"
#include "token.h"
#include "idf.h"
Token
idf_in_list(
const char *str,
const struct idf list[],
size_t listsize,
Token default_token
) {
int first = 0;
int last = (int) (listsize / sizeof (struct idf)) - 1;
while (first < last) {
int middle = (first + last) / 2;
if (strcmp(str, list[middle].id_tag) > 0) {
first = middle + 1;
}
else {
last = middle;
}
}
if (strcmp(str, list[first].id_tag) == 0)
return list[first].id_tr;
else
return default_token;
}
#define HASH(h,ch) (((h) * 8209) + (ch)*613)
Token
idf_hashed(const char *str) {
int32 h = 0;
/* let's be careful about ranges; if done wrong it's hard to debug */
while (*str) {
int ch = *str++ & 0377;
if (ch == ' ') continue;
/* -1 <= h <= 2^31-1 */
h = HASH(h, ch);
/* -2^31 <= h <= 2^31-1 */
if (h < 0) {
/* -2^31 <= h <= -1 */
h += 2147483647; /* 2^31-1 */
/* -1 <= h <= 2^31-2 */
}
else {
/* 0 <= h <= 2^31-1 */
}
/* -1 <= h <= 2^31-1 */
}
/* -1 <= h <= 2^31-1 */
if (h < 0) {
/* h = -1 */
h = 0;
}
/* 0 <= h <= 2^31-1 */
h %= (N_TOKENS - N_REGULAR_TOKENS - 1);
/* 0 <= h < N_TOKENS - N_REGULAR_TOKENS - 1 */
h += N_REGULAR_TOKENS;
/* N_REGULAR_TOKENS <= h < N_TOKENS - 1 */
return int2Token(h);
/* this avoids the regular tokens and End_Of_Line */
}