-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathParseTweetsFile.java
135 lines (102 loc) · 3.48 KB
/
ParseTweetsFile.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
final public class ParseTweetsFile {
private Map<Double, Double> frequency = new TreeMap<Double, Double>();
public ParseTweetsFile(String filename)
{
System.out.println("Reading file...");
try(BufferedReader br = new BufferedReader(new FileReader(filename))) {
String tweet = br.readLine();
while (tweet != null) {
// Append next row to tweet if tweet is multirow
while (tweet.indexOf("TweetId3") == -1) {
tweet = tweet + " " + br.readLine();
}
// Parse each line
parseTweet(tweet);
tweet = br.readLine();
}
} catch (FileNotFoundException e) {
System.out.println("File doesnt exist");
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
System.out.println("Wrong date format");
}
System.out.println("Done reading file. Writing results...");
saveResult(filename);
System.out.println("Done!");
}
private void saveResult(String filename)
{
// Iterate trough hashmap
Iterator it = frequency.entrySet().iterator();
try {
PrintWriter writer = new PrintWriter("results_" + filename, "UTF-8");
PrintWriter writer1 = new PrintWriter("results_date_" + filename, "UTF-8");
while (it.hasNext()) {
Map.Entry pair = (Map.Entry)it.next();
//System.out.println();
Date date= new Date((long) ((double)pair.getKey() * 3600 * 1000));
writer.println(pair.getKey() + " " + pair.getValue());
writer1.println(date + "-" + pair.getValue());
}
writer.close();
writer1.close();
} catch (FileNotFoundException | UnsupportedEncodingException e) {
e.printStackTrace();
}
}
/**
* Parses the tweet and adds it to the hashmap
* @throws ParseException
*/
private void parseTweet(String tweet) throws ParseException
{
String dateStr = getDateStr(tweet);
SimpleDateFormat formatter = new SimpleDateFormat("EEE MMM d HH:mm:ss zzz yyyy");
Date date = formatter.parse(dateStr);
double dateHour = getDateHour(date);
increaseFrequency(dateHour);
}
private String getDateStr(String tweet)
{
int start = tweet.indexOf("TweetDate2:") + ("TweetDate2:").length();
int end = tweet.indexOf("TweetId3:");
return tweet.substring(start, end);
}
private void increaseFrequency(double dateHour)
{
if(frequency.containsKey(dateHour))
frequency.put(dateHour, frequency.get(dateHour) + 1.0);
else
frequency.put(dateHour, 1.0);
}
/**
* Returns this Date's time value in hours.
* Need to normalize later
*/
private double getDateHour(Date date) {
Calendar tweetCal = Calendar.getInstance();
tweetCal.setTime(date);
long seconds = tweetCal.getTimeInMillis()/1000;
// System.out.println(Math.floor(seconds/3600));
return Math.floor(seconds/3600);
}
public static void main(String[] args)
{
String filename = "#earthquakeNepal.txt";
new ParseTweetsFile(filename);
}
}