From b943471d2a51326ebca422b9bf1dfa34f1810523 Mon Sep 17 00:00:00 2001 From: "Ossama W. Obeid" Date: Thu, 1 Jan 2015 17:57:02 +0300 Subject: [PATCH] Added a function to measure the similarity of two words. --- glove/glove.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/glove/glove.py b/glove/glove.py index d2340b4..bfbd6ea 100644 --- a/glove/glove.py +++ b/glove/glove.py @@ -227,6 +227,37 @@ def most_similar(self, word, number=5): return self._similarity_query(self.word_vectors[word_idx], number)[1:] + def _similarity(self, word1_vec, word2_vec): + dst = (np.dot(word1_vec, word2_vec) + / np.linalg.norm(word1_vec) + / np.linalg.norm(word2_vec)) + + return dst + + def similarity(self, word1, word2): + """ + Return the similarity measure between word1 and word2. + """ + + if self.word_vectors is None: + raise Exception('Model must be fit before querying') + + if self.dictionary is None: + raise Exception('No word dictionary supplied') + + try: + word1_idx = self.dictionary[word1] + except KeyError: + raise Exception('Word not in dictionary') + + try: + word2_idx = self.dictionary[word2] + except KeyError: + raise Exception('Word not in dictionary') + + return self._distance(self.word_vectors[word1_idx], + self.word_vectors[word2_idx]) + def most_similar_paragraph(self, paragraph, number=5, **kwargs): """ Return words most similar to a given paragraph (iterable of tokens).