-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtext_classification.py
executable file
·64 lines (46 loc) · 1.98 KB
/
text_classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
import sys
import os
import turicreate as tc
# The location of the input data
DATA_LOCAL = "datasets/sentiment_sentences/amazon_reviews.tsv"
# Check that the file is there
if not os.path.exists(DATA_LOCAL):
print("%s does not exist.", DATA_LOCAL)
sys.exit(1)
# Read the data
reviews = tc.SFrame.read_csv(DATA_LOCAL, delimiter='\t', header=True)
# Select the specific columns we want
reviews = reviews['review_body', 'star_rating']
# Label each review based on star rating; >4 stars is positive, <4 stars is negative
reviews['sentimentClass'] = reviews['star_rating'].apply(lambda rating: 'positive' if rating >= 4 else 'negative')
# Remove the star rating column; we don't need it anymore
reviews.remove_column('star_rating')
# Split the reviews into positive and negative
positive = reviews[reviews['sentimentClass'] == 'positive']
negative = reviews[reviews['sentimentClass'] == 'negative']
# We want an even number of positive and negative reviews, so pick the list
# that has the shorter amount...
review_count = min(len(positive), len(negative))
# And trim both lists to that count
positive = positive.head(review_count)
negative = negative.head(review_count)
# Now combine them back together
reviews = positive.append(negative)
# Rename 'review_body' to 'text', so that the generated model
# calls the input "text"
reviews = reviews.rename({"review_body": "text"})
# Save the SFrame for later use
MODEL_PATH = "amazon_reviews.sframe"
reviews.save(MODEL_PATH)
# Create the model! We're telling it to look at the 'review_body' column as its input,
# and the 'sentimentClass' column as the label.
model = tc.sentence_classifier.create(reviews, 'sentimentClass', features=['text'])
# Evaluate this model
evaluation = model.evaluate(reviews)
# Print the evaluation
print(evaluation)
# Export the model into a form that Core ML can use
COREML_MODEL_PATH = "SentimentClassifier.mlmodel"
model.export_coreml(COREML_MODEL_PATH)
print("Created model at {}".format(COREML_MODEL_PATH))