-
Notifications
You must be signed in to change notification settings - Fork 3
/
novelty_factor.py
executable file
·39 lines (35 loc) · 1.34 KB
/
novelty_factor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python
#as of now this is going to assume that both the question and the answer is a list of words and that the dictionary is irrelevant for what this task should do this will return 1 if a new word has been found and 0 if a new word has not been found
import nltk
from nltk import word_tokenize
stopwords = nltk.corpus.stopwords.words('english')
def remove_stop_words (question_tokens):
return [w for w in question_tokens if w.lower() not in stopwords]
def novelty_bool (q_list, (a_list, doc_num, context,d,q_id)):
question_list = remove_stop_words (word_tokenize(q_list))
answer_list = remove_stop_words(word_tokenize(a_list))
b = 0
for a in answer_list:
temp = 1
for q in question_list:
if a.lower() == q.lower():
temp = 0
break
if temp == 1:
b = 1
[b]
#this will return the number of words that are novel divided by the number of words in the answer to give a percent value
def novelty_count (q_list, (a_list, doc_num, context,d,q_id)):
count = 0.0
question_list = remove_stop_words (word_tokenize(q_list))
answer_list = remove_stop_words(word_tokenize(a_list))
for a in answer_list:
temp = 1
for q in question_list:
if a.lower() == q.lower():
temp = 0
break
if temp == 1:
count = count + 1.0
count = count / (len(answer_list))
[count]