-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtools.py
47 lines (38 loc) · 1.22 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# coding: utf-8
import pandas as pd
from utils import gen_column_info_list, map_by_chunk, save_pandas, read_as_pandas
import config
#
# map_by_chunk('../hist_features/df_hist_feature_all.csv',
# read_func=lambda x: pd.read_csv(x, iterator=True),
# map_func=lambda df: df.drop(df.columns[df.columns.str.startswith('Unnamed')], axis=1),
# save_func=lambda df: df.to_csv('../hist_features/df_hist_feature_all_clean.csv', mode='a+',
# float_format='.7f'))
#
#
# """
# 用来过滤数据
# """
# s = 17
# e = 30
# map_by_chunk(
# 'train.hdf5',
# read_func=lambda filename: read_as_pandas(filename, by_chunk=True),
# map_func=lambda df: df.loc[(df['clickTime_day'] >= s) & (df['clickTime_day'] <= e), :],
# save_func=lambda df: save_pandas(df, 'result.hdf5', append=True),
# )
"""
增加一列
"""
s = 17
e = 30
def map_func1(df):
df['clickTime_day'] = df['clickTime'].astype(int) // 1000000
return df
map_by_chunk(
'../train.csv',
read_func=lambda filename: read_as_pandas(filename, by_chunk=True),
map_func=map_func1,
save_func=lambda df: save_pandas(df, '../train_dc.csv', append=True, index=False),
)
# dataframe summary