-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdata_augmentation.py
32 lines (30 loc) · 1.35 KB
/
data_augmentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import numpy as np
import pandas as pd
pd.set_option('display.max_rows',1000)
pd.set_option('display.max_columns',1000)
train = pd.read_csv("./data/VUA/VUA_train_features2.csv")
train_DA = pd.read_csv("./data/VUA/VUA_verb_features.csv")
train_DA2 = pd.read_csv("./data/VUA/VUA_allpos_features.csv")
print(train.shape)
print(train_DA.shape)
print(train_DA2.shape)
train_DA['label'] = pd.read_csv("./submit/vua_verb_answer12.txt", usecols=['label'])
train_DA2['label'] = pd.read_csv("./submit/vua_allpos_answer13.txt", usecols=['label'])
train_all = pd.concat((train,train_DA))
train_all = pd.concat((train_all,train_DA2))
print(train_all.shape)
print(train_all[-10:])
train_all.to_csv('./data/VUA2/VUA_train_da2.csv', index=False, header=True)
train = pd.read_csv("./data/TOEFI/TOEFI_train_features2.csv")
train_DA = pd.read_csv("./data/TOEFI/TOEFI_verb_features.csv")
train_DA2 = pd.read_csv("./data/TOEFI/TOEFI_allpos_features.csv")
print(train.shape)
print(train_DA.shape)
print(train_DA2.shape)
train_DA['label'] = pd.read_csv("./submit/toefi_verb_answer27.txt", usecols=['label'])
train_DA2['label'] = pd.read_csv("./submit/toefi_allpos_answer26.txt", usecols=['label'])
train_all = pd.concat((train,train_DA))
train_all = pd.concat((train_all,train_DA2))
print(train_all.shape)
print(train_all[-10:])
train_all.to_csv('./data/TOEFI/TOEFI_train_da2.csv', index=False, header=True)