-
-
Notifications
You must be signed in to change notification settings - Fork 122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
When running saits multiple times, the same parameters cannot yield the same results. #555
Comments
Hi there 👋, Thank you so much for your attention to PyPOTS! You can follow me on GitHub I have received your message and will respond ASAP. Thank you for your patience! 😃 Best, |
`import random 设置随机种子,确保结果可复现seed = 2024 #数据读取 #数据归一化---应该要在加入缺失值之前归一化 #将2维数据转化为3维 ##数据缺失设置 df_with_missing = df_origin_numpydf_with_missing = mcar(df_origin_numpy, p=0.3) df_with_missing = mnar_t(df_origin_numpy, cycle=20, pos=10, scale=3)df_with_missing = seq_missing(df_origin_numpy, p=0.3, seq_len=5)#数据、掩码设置 sample_num, sequence_length, num_features = df_with_missing.shape def calc_r2(predictions, targets, masks=None, ):
def evaluate_model(n_layers, d_model, d_ffn,n_heads, d_k, d_v, lr,dataset):
saits_output = evaluate_model(n_layers=1, d_model=64, d_ffn=64,n_heads=1, d_k=64, d_v=64, lr=1e-4,dataset=dataset_for_testing) saits_output = evaluate_model(n_layers=1, d_model=64, d_ffn=64,n_heads=1, d_k=64, d_v=64, lr=1e-4,dataset=dataset_for_testing) saits_output = evaluate_model(n_layers=2, d_model=64, d_ffn=64,n_heads=1, d_k=64, d_v=64, lr=1e-4,dataset=dataset_for_testing) saits_output = evaluate_model(n_layers=2, d_model=64, d_ffn=64,n_heads=1, d_k=64, d_v=64, lr=1e-4,dataset=dataset_for_testing) testing_mae = calc_mae(saits_output, test_X_ori, test_X_indicating_mask) print(f"Testing MAE: {testing_mae:.4f}") mse = calc_mse(saits_output, test_X_ori, test_X_indicating_mask) 计算 平均相对误差 MREmre = calc_mre(saits_output, test_X_ori, test_X_indicating_mask) R2 = calc_r2(saits_output, test_X_ori, test_X_indicating_mask) |
This issue had no activity for 14 days. It will be closed in 1 week unless there is some new activity. Is this issue already resolved? |
Issue description
If I only run the following code once:“saits_output = evaluate_model(n_layers=1, d_model=64, d_ffn=64,n_heads=1, d_k=64, d_v=64, lr=1e-4,dataset=dataset_for_testing)
”, I can get the same result by running it multiple times. But when I run a program multiple times, the output results are slightly different each time. How can I solve this problem?
import random
import numpy as np
import benchpots
from pypots.utils.random import set_random_seed
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler
from pypots.utils.metrics import calc_mae,calc_mse,calc_mre
from pypots.optim import Adam
from pypots.imputation import SAITS
from pygrinder import (
mcar,
mar_logistic,
mnar_x,
mnar_t,
rdo,
seq_missing,
block_missing,
calc_missing_rate
)
设置随机种子,确保结果可复现
seed = 2024
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
set_random_seed(seed)
#数据读取
df_origin = pd.read_csv('your.csv',index_col=0)[:24000]#第一列时间作为索引
df_origin_numpy = df_origin.values # 转换为 NumPy 数组(如果数据集是 Pandas DataFrame)
#数据归一化---应该要在加入缺失值之前归一化
scaler = StandardScaler()
df_origin_numpy = scaler.fit_transform(df_origin_numpy)
num_rows, num_cols = df_origin_numpy.shape
#将2维数据转化为3维
target_shape = 48 # 每个样本的元素个数
A = num_rows // target_shape # A应该是行数除以 144*21 后的商
if num_rows % target_shape != 0:
A = num_rows // target_shape # 使用最大整除后的 A
df_origin_numpy = df_origin_numpy[:A * target_shape, :]
df_origin_numpy = df_origin_numpy.reshape(A, 48, 25)
##数据缺失设置
df_with_missing = df_origin_numpy
df_with_missing = mcar(df_origin_numpy, p=0.3)
df_with_missing = mnar_t(df_origin_numpy, cycle=20, pos=10, scale=3)
df_with_missing = seq_missing(df_origin_numpy, p=0.3, seq_len=5)
#数据、掩码设置
dataset_for_testing = {
"X": df_with_missing,
"y": df_origin_numpy
}
test_X_indicating_mask = np.isnan(df_with_missing)
test_X_ori = np.nan_to_num(df_origin_numpy) # 将原始数据中的 NaN 转换为 0
sample_num, sequence_length, num_features = df_with_missing.shape
def calc_r2(predictions, targets, masks=None, ):
"""计算 R²(决定系数),只针对非缺失部分计算"""
if masks is not None:
# 只计算非缺失数据
predictions = predictions * masks
targets = targets * masks
def evaluate_model(n_layers, d_model, d_ffn,n_heads, d_k, d_v, lr,dataset):
"""
创建并训练 SAITS 模型,并返回评估结果。
"""
saits = SAITS(
n_steps=sequence_length, # 时间步长,即每个样本的长度
n_features=num_features, # 特征数量
n_layers=n_layers,
d_model=d_model,
d_ffn=d_ffn, # 假设 d_ffn 与 d_model 相同
n_heads=n_heads,
d_k=d_k,
d_v=d_v,
dropout=0.1,
ORT_weight=1, # 你可以根据需要调整这些权重
MIT_weight=1,
batch_size=8,
epochs=10,
patience=2,
optimizer=Adam(lr=lr),
num_workers=0,
device="cuda", # 自动选择设备
saving_path="tutorial_results/imputation/saits", # 保存路径
model_saving_strategy="best", # 只保存最好的模型
)
saits_output = evaluate_model(n_layers=1, d_model=64, d_ffn=64,n_heads=1, d_k=64, d_v=64, lr=1e-4,dataset=dataset_for_testing)
saits_output = evaluate_model(n_layers=1, d_model=64, d_ffn=64,n_heads=1, d_k=64, d_v=64, lr=1e-4,dataset=dataset_for_testing)
saits_output = evaluate_model(n_layers=2, d_model=64, d_ffn=64,n_heads=1, d_k=64, d_v=64, lr=1e-4,dataset=dataset_for_testing)
saits_output = evaluate_model(n_layers=2, d_model=64, d_ffn=64,n_heads=1, d_k=64, d_v=64, lr=1e-4,dataset=dataset_for_testing)
testing_mae = calc_mae(saits_output, test_X_ori, test_X_indicating_mask)
print(f"Testing MAE: {testing_mae:.4f}")
mse = calc_mse(saits_output, test_X_ori, test_X_indicating_mask)
print(f"Testing MSE: {mse:.4f}")
计算 平均相对误差 MRE
mre = calc_mre(saits_output, test_X_ori, test_X_indicating_mask)
print(f"Testing MRE: {mre:.4f}")
R2 = calc_r2(saits_output, test_X_ori, test_X_indicating_mask)
print(f"Testing R2: {R2:.4f}")
The text was updated successfully, but these errors were encountered: