-
Notifications
You must be signed in to change notification settings - Fork 0
/
global_functions.py
246 lines (201 loc) · 9.56 KB
/
global_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# encoding: utf-8
import global_variables
import os
from datetime import datetime,timedelta
# GLOBAL FUNCTIONS
def create_list_with_size_ten():
return [None,None,None,None,None,None,None,None,None,None]
def create_dir_if_not_exits(directory):
if not os.path.exists(directory):
os.makedirs(directory)
def toJSON(item):
return item._json
def update_top_10_list(lista,tuple_id_amount,show=False):
def get_id_pos(local_id):
try:
pos = [x[0] for x in lista[0:10]].index(local_id)
except ValueError:
pos = 10
return pos
id,amount =tuple_id_amount
borrar = True
i = get_id_pos(id)
if i<10:
amount = max(lista[i][1],amount)
del lista[i]
borrar = False
while i >= 1 and (lista[i-1][global_variables.AMOUNT] < amount or lista[i-1][global_variables.ID]==0):
i-=1
lista.insert(i,(id,amount))
if borrar:
lista.pop()
#global_variables.count +=1 #revisar
def notNone(value):
return value != None
def get_string_datetime_now():
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
def get_string_datetime_with_n_min_more_than_now(mins_to_add):
now_plus_mins = datetime.now() + timedelta(minutes=mins_to_add)
return now_plus_mins.strftime("%Y-%m-%d %H:%M:%S")
def checkParameter(parameter):
if parameter == None or parameter == False:
return 0
else:
return 1
def checkOptions(*args):
return sum([checkParameter(x) for x in args])
def throw_error(module_name, error_message):
print("\n\n\n [{}] {}".format(module_name,error_message))
exit(1)
def isJsonFile(filename):
return filename[-5:]==".json"
def get_one_screen_name(user_id):
nickname = "unknown"
if user_id in global_variables.users_dict:
if "screen-names" in global_variables.users_dict[user_id]:
nickname = global_variables.users_dict[user_id]["screen-names"][0]
return nickname
def get_top_user(top_10_list):
user_id,amount = top_10_list[0]
screen_name = get_one_screen_name(user_id)
return user_id,screen_name,amount
def increment_dict_counter(dictionary,id):
val = dictionary.get(id,0)+1
dictionary[id] = val
return val
#twitter usa horario pdt lo que son 9 horas mas en españa
def get_utc_time_particioned(date,utc_offset=9):
datetime_object = datetime.strptime(date, '%a %b %d %H:%M:%S +0000 %Y') + timedelta(hours=utc_offset)
return datetime_object.strftime("%Y-%m-%d"),datetime_object.hour,datetime_object.minute
# [yyyy-mm-dd] -> dict[hh] -> dic[min] -> list[tweet_id1...]
#twitter usa horario pdt lo que son 9 horas mas en españa
def get_utc_time(date,utc_offset=9):
return datetime.strptime(date, '%a %b %d %H:%M:%S +0000 %Y') + timedelta(hours=utc_offset)
def insert_tweet_in_date_dict(tweet_id,fecha,hora,minuto):
min_dict = { minuto:[tweet_id] }
hour_dict = {hora: min_dict}
if fecha not in global_variables.tweets_by_date_dict:
global_variables.tweets_by_date_dict[fecha]=hour_dict
else:
if hora not in global_variables.tweets_by_date_dict[fecha]:
global_variables.tweets_by_date_dict[fecha].update(hour_dict)
else:
if minuto not in global_variables.tweets_by_date_dict[fecha][hora]:
global_variables.tweets_by_date_dict[fecha][hora].update(min_dict)
else:
# posible mejora1: mirar los segundos y son 30 o mas insertar por el final
# y si son de 0 a 29 insertar por el principio
# posible mejora 2 insertar tupla (tweet_id,segs)
# posible mejora 3 lista con 12 listas (intervalos de 5 segs)
global_variables.tweets_by_date_dict[fecha][hora][minuto] += [tweet_id]
def is_user(id):
if id in global_variables.users_dict:
return True
elif id in global_variables.tweets_dict:
return False
else:
return None
def is_tweet(id):
if id in global_variables.users_dict:
return False
elif id in global_variables.tweets_dict:
return True
else:
return None
def replace_bullet_with_dot(word):
return word.replace('•','.')
def replace_dot_with_bullet(word):
return word.replace('.','•')
#cadena_unicode = u"prueba•prueba".encode("utf-8")
#cadena_utf = cadena_unicode.decode("utf-8")
#print(cadena_unicode)
#print(cadena_utf)
# u'•' == u'\u2022'
def change_dot_in_keys_for_bullet(dicctionary):
new_dict = {}
for k,v in dicctionary.items():
if "." in k:
print("[CHANGE DOT FOR BULLET INFO] Changing '.' in key {} for '•'".format(k))
new_key = replace_dot_with_bullet(k)
new_dict[new_key] = v
else:
new_dict[k] = v
return new_dict
def change_bullet_in_keys_for_dot(dicctionary):
new_dict = {}
for k,v in dicctionary.items():
if "•" in k:
print("[CHANGE BULLET FOR DOT INFO] Changing '•' in key {} for '.'".format(k))
new_key = replace_bullet_with_dot(k)
new_dict[new_key] = v
else:
new_dict[k] = v
return new_dict
###################################################################################################################################
###################################################################################################################################
################################################# DEBUG METHODS ###################################################################
################################################# It doesn't add functionality ####################################################
###################################################################################################################################
def show_date_dicctionary():
for fecha,dict_horas in global_variables.tweets_by_date_dict.items():
for hora,dict_mins in dict_horas.items():
for minuto,tweets_ids in dict_mins.items():
print("fecha {}- hora {}:{} {}".format(fecha,hora,minuto,tweets_ids[0:8]))
def show_date_dicctionary_simple():
count_local = 0
try:
print("\n\nNum Diccionario1 Diccionario2 Diccionario3 Value Diccionario3")
print(" key=fecha key=hora key=minuto")
for fecha,dict_horas in global_variables.tweets_by_date_dict.items():
for hora,dict_mins in dict_horas.items():
for minuto,tweets_ids in dict_mins.items():
count_local+=1
print("{0:<8} {1:<14}-> {2:<10} -> {3:<8} [tweets_ids_list]".format(count_local,fecha,hora,minuto))
if count_local >= 20:
raise Exception
except:
pass
def print_num_tweets_per_date():
count_local = 0
for fecha,dict_horas in global_variables.tweets_by_date_dict.items():
for hora,dict_mins in dict_horas.items():
for minuto,tweets_ids in dict_mins.items():
count_local+=len(tweets_ids)
print(count_local)
def print_top_10_list(lista,titulo):
print(titulo)
print("{0:>30} {1:>20}{2:>15} {3:<20} {4:<15}".format("ID","AMOUNT","IN USERS DICT", "IN TWEETS DICT","USER"))
for index,(id,amount) in enumerate(lista,1):
res1 = global_variables.users_dict.get(id,False)
res2 = global_variables.tweets_dict.get(id,False)
res3 = global_variables.quotes_dict.get(id,False)
res4 = global_variables.retweets_dict.get(id,False)
if res1!= False:
userId = id
elif res2 != False:
userId = global_variables.tweets_dict[id]["user"]["id_str"]
elif res3 != False:
userId = global_variables.quotes_dict[id]["user"]["id_str"]
elif res4 != False:
userId = global_variables.retweets_dict[id]["user"]["id_str"]
else:
userId = "unknown"
user = get_one_screen_name(userId)
print("{0:<3} {1:>34} {2:<15} {3:<15} {4:>4} {5:>25}".format(index,id,amount,str(res1!=False),str(res2!=False),user))
print("\n\n\n")
def print_all_top_ten_lists():
print_top_10_list(global_variables.global_most_favs_tweets,"tweets con mas likes")
print_top_10_list(global_variables.global_most_favs_users,"usuarios que mas likes dan")
print_top_10_list(global_variables.global_most_followers_users,"usuarios con mas followers")
print_top_10_list(global_variables.global_most_rt_tweets,"tweets con mas retweets")
print_top_10_list(global_variables.global_most_tweets_users,"usuarios con mas tweets publicados")
# top 10 referente a nuestro conjunto de datos y sus estadisticas internas
print_top_10_list(global_variables.local_most_messages_users,"usuarios de los que tenemos mas mensajes ( entre tweets y retweets")
print_top_10_list(global_variables.local_most_retweets_users,"usuarios de los cuales tenemos mas retweets")
print_top_10_list(global_variables.local_most_tweets_users,"usuarios de los cuales tenemos mas tweets")
print_top_10_list(global_variables.local_most_replied_tweets,"tweets para los cuales tenemos mas respuestas")
print_top_10_list(global_variables.local_most_replied_users,"usuarios para los cuale tenemos mas respuestas")
print_top_10_list(global_variables.local_most_quoted_tweets,"tweets para los cuales tenemos mas citas")
print_top_10_list(global_variables.local_most_quoted_users,"usuarios para los cuales tenemos mas citas")
#print_top_10_list(global_variables.local_most_favs_users,"")
#print_top_10_list(global_variables.local_most_followers_users,"usuarios de los cuales tenemos mas followers")