From 1023f57ad8fe61cbb8ff73506db17ea41c755efb Mon Sep 17 00:00:00 2001 From: Jiyuan Hyperion Zhou Date: Thu, 21 Feb 2019 10:57:30 +1030 Subject: [PATCH 1/3] Update readme file --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 21b2cdf..2787883 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +### ------ This is Jiyuan Zhou's DeepMoji branch ------ +I am currently using it for internship project. Also, I modify the codes according to my needs. All the modifications I make have comments besides them to introduce the reason I modify and how they will be different from original branch. +21 Feb 2019 ### ------ Update September 2018 ------ It's now been a year since DeepMoji was released and we're trying to understand how it's being used such that we can make improvements and provide you with better models in the future. From b73104851f8cd5008285b7b0dd17d256d311e143 Mon Sep 17 00:00:00 2001 From: Jiyuan Hyperion Zhou Date: Thu, 21 Feb 2019 11:29:49 +1030 Subject: [PATCH 2/3] can directly feed data to load_benchmark --- deepmoji/finetuning.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/deepmoji/finetuning.py b/deepmoji/finetuning.py index 4dbe653..596e826 100644 --- a/deepmoji/finetuning.py +++ b/deepmoji/finetuning.py @@ -27,7 +27,7 @@ from attlayer import AttentionWeightedAverage -def load_benchmark(path, vocab, extend_with=0): +def load_benchmark(pathOrData, vocab, extend_with=0): """ Loads the given benchmark dataset. Tokenizes the texts using the provided vocabulary, extending it with @@ -38,7 +38,9 @@ def load_benchmark(path, vocab, extend_with=0): suggested batch_size. # Arguments: - path: Path to the dataset to be loaded. + pathOrData: Path to the dataset to be loaded. If instead of being passed + a file path, the data (python dict) will be passed. Then will directly + use the data. vocab: Vocabulary to be used for tokenizing texts. extend_with: If > 0, the vocabulary will be extended with up to extend_with tokens from the training set before tokenizing. @@ -54,8 +56,11 @@ def load_benchmark(path, vocab, extend_with=0): maxlen: Maximum length of an input. """ # Pre-processing dataset - with open(path) as dataset: - data = pickle.load(dataset) + if not isinstance(pathOrData, dict): + with open(pathOrData) as dataset: + data = pickle.load(dataset) + else: + data = pathOrData # Decode data try: From eaaf87b3ac5dba3af39206052830b01b05898079 Mon Sep 17 00:00:00 2001 From: Jiyuan Zhou Hyperion Date: Thu, 21 Feb 2019 11:39:59 +1030 Subject: [PATCH 3/3] Update README.md delete my update of readme file. --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 2787883..21b2cdf 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,3 @@ -### ------ This is Jiyuan Zhou's DeepMoji branch ------ -I am currently using it for internship project. Also, I modify the codes according to my needs. All the modifications I make have comments besides them to introduce the reason I modify and how they will be different from original branch. -21 Feb 2019 ### ------ Update September 2018 ------ It's now been a year since DeepMoji was released and we're trying to understand how it's being used such that we can make improvements and provide you with better models in the future.