diff --git a/data/stereoseq_mouse_embryo/sample_info.csv b/data/stereoseq_mouse_embryo/sample_info.csv deleted file mode 100644 index 2a4c8b00..00000000 --- a/data/stereoseq_mouse_embryo/sample_info.csv +++ /dev/null @@ -1,54 +0,0 @@ -"filename","file_type","sample_name","size","download" -"E9.5_E1S1.MOSTA.h5ad","h5ad","E9.5_E1S1","442M","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E9.5_E1S1.MOSTA.h5ad" -"E9.5_E2S1.MOSTA.h5ad","h5ad","E9.5_E2S1","381M","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E9.5_E2S1.MOSTA.h5ad" -"E9.5_E2S2.MOSTA.h5ad","h5ad","E9.5_E2S2","292M","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E9.5_E2S2.MOSTA.h5ad" -"E9.5_E2S3.MOSTA.h5ad","h5ad","E9.5_E2S3","350M","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E9.5_E2S3.MOSTA.h5ad" -"E9.5_E2S4.MOSTA.h5ad","h5ad","E9.5_E2S4","297M","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E9.5_E2S4.MOSTA.h5ad" -"E10.5_E1S1.MOSTA.h5ad","h5ad","E10.5_E1S1","1.4G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E10.5_E1S1.MOSTA.h5ad" -"E10.5_E1S2.MOSTA.h5ad","h5ad","E10.5_E1S2","944M","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E10.5_E1S2.MOSTA.h5ad" -"E10.5_E1S3.MOSTA.h5ad","h5ad","E10.5_E1S3","1.4G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E10.5_E1S3.MOSTA.h5ad" -"E10.5_E2S1.MOSTA.h5ad","h5ad","E10.5_E2S1","382M","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E10.5_E2S1.MOSTA.h5ad" -"E11.5_E1S1.MOSTA.h5ad","h5ad","E11.5_E1S1","1.7G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E11.5_E1S1.MOSTA.h5ad" -"E11.5_E1S2.MOSTA.h5ad","h5ad","E11.5_E1S2","1.2G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E11.5_E1S2.MOSTA.h5ad" -"E11.5_E1S3.MOSTA.h5ad","h5ad","E11.5_E1S3","1.1G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E11.5_E1S3.MOSTA.h5ad" -"E11.5_E1S4.MOSTA.h5ad","h5ad","E11.5_E1S4","1.5G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E11.5_E1S4.MOSTA.h5ad" -"E12.5_E1S1.MOSTA.h5ad","h5ad","E12.5_E1S1","2.6G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E12.5_E1S1.MOSTA.h5ad" -"E12.5_E1S2.MOSTA.h5ad","h5ad","E12.5_E1S2","2.2G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E12.5_E1S2.MOSTA.h5ad" -"E12.5_E1S3.MOSTA.h5ad","h5ad","E12.5_E1S3","1.4G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E12.5_E1S3.MOSTA.h5ad" -"E12.5_E1S4.MOSTA.h5ad","h5ad","E12.5_E1S4","2.4G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E12.5_E1S4.MOSTA.h5ad" -"E12.5_E1S5.MOSTA.h5ad","h5ad","E12.5_E1S5","2.0G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E12.5_E1S5.MOSTA.h5ad" -"E12.5_E2S1.MOSTA.h5ad","h5ad","E12.5_E2S1","1.3G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E12.5_E2S1.MOSTA.h5ad" -"E13.5_E1S1.MOSTA.h5ad","h5ad","E13.5_E1S1","3.3G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E13.5_E1S1.MOSTA.h5ad" -"E13.5_E1S2.MOSTA.h5ad","h5ad","E13.5_E1S2","4.2G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E13.5_E1S2.MOSTA.h5ad" -"E13.5_E1S3.MOSTA.h5ad","h5ad","E13.5_E1S3","4.2G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E13.5_E1S3.MOSTA.h5ad" -"E13.5_E1S4.MOSTA.h5ad","h5ad","E13.5_E1S4","3.1G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E13.5_E1S4.MOSTA.h5ad" -"E14.5_E1S1.MOSTA.h5ad","h5ad","E14.5_E1S1","5.8G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E14.5_E1S1.MOSTA.h5ad" -"E14.5_E1S2.MOSTA.h5ad","h5ad","E14.5_E1S2","4.3G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E14.5_E1S2.MOSTA.h5ad" -"E14.5_E1S3.MOSTA.h5ad","h5ad","E14.5_E1S3","2.1G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E14.5_E1S3.MOSTA.h5ad" -"E14.5_E1S4.MOSTA.h5ad","h5ad","E14.5_E1S4","4.9G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E14.5_E1S4.MOSTA.h5ad" -"E14.5_E1S5.MOSTA.h5ad","h5ad","E14.5_E1S5","4.3G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E14.5_E1S5.MOSTA.h5ad" -"E14.5_E2S1.MOSTA.h5ad","h5ad","E14.5_E2S1","2.1G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E14.5_E2S1.MOSTA.h5ad" -"E14.5_E2S2.MOSTA.h5ad","h5ad","E14.5_E2S2","3.6G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E14.5_E2S2.MOSTA.h5ad" -"E15.5_E1S1.MOSTA.h5ad","h5ad","E15.5_E1S1","4.1G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E15.5_E1S1.MOSTA.h5ad" -"E15.5_E1S2.MOSTA.h5ad","h5ad","E15.5_E1S2","3.6G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E15.5_E1S2.MOSTA.h5ad" -"E15.5_E1S3.MOSTA.h5ad","h5ad","E15.5_E1S3","4.6G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E15.5_E1S3.MOSTA.h5ad" -"E15.5_E1S4.MOSTA.h5ad","h5ad","E15.5_E1S4","3.4G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E15.5_E1S4.MOSTA.h5ad" -"E15.5_E2S1.MOSTA.h5ad","h5ad","E15.5_E2S1","3.0G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E15.5_E2S1.MOSTA.h5ad" -"E16.5_E1S1.MOSTA.h5ad","h5ad","E16.5_E1S1","4.5G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E1S1.MOSTA.h5ad" -"E16.5_E1S2.MOSTA.h5ad","h5ad","E16.5_E1S2","3.6G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E1S2.MOSTA.h5ad" -"E16.5_E1S3.MOSTA.h5ad","h5ad","E16.5_E1S3","5.3G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E1S3.MOSTA.h5ad" -"E16.5_E1S4.MOSTA.h5ad","h5ad","E16.5_E1S4","4.5G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E1S4.MOSTA.h5ad" -"E16.5_E1S5.MOSTA.h5ad","h5ad","E16.5_E1S5","4.1G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E1S5.MOSTA.h5ad" -"E16.5_E2S10.MOSTA.h5ad","h5ad","E16.5_E2S10","3.4G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S10.MOSTA.h5ad" -"E16.5_E2S11.MOSTA.h5ad","h5ad","E16.5_E2S11","3.1G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S11.MOSTA.h5ad" -"E16.5_E2S12.MOSTA.h5ad","h5ad","E16.5_E2S12","2.5G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S12.MOSTA.h5ad" -"E16.5_E2S13.MOSTA.h5ad","h5ad","E16.5_E2S13","2.6G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S13.MOSTA.h5ad" -"E16.5_E2S1.MOSTA.h5ad","h5ad","E16.5_E2S1","2.1G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S1.MOSTA.h5ad" -"E16.5_E2S2.MOSTA.h5ad","h5ad","E16.5_E2S2","2.9G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S2.MOSTA.h5ad" -"E16.5_E2S3.MOSTA.h5ad","h5ad","E16.5_E2S3","2.8G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S3.MOSTA.h5ad" -"E16.5_E2S4.MOSTA.h5ad","h5ad","E16.5_E2S4","2.9G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S4.MOSTA.h5ad" -"E16.5_E2S5.MOSTA.h5ad","h5ad","E16.5_E2S5","4.1G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S5.MOSTA.h5ad" -"E16.5_E2S6.MOSTA.h5ad","h5ad","E16.5_E2S6","3.7G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S6.MOSTA.h5ad" -"E16.5_E2S7.MOSTA.h5ad","h5ad","E16.5_E2S7","4.3G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S7.MOSTA.h5ad" -"E16.5_E2S8.MOSTA.h5ad","h5ad","E16.5_E2S8","4.0G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S8.MOSTA.h5ad" -"E16.5_E2S9.MOSTA.h5ad","h5ad","E16.5_E2S9","5.0G","https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/E16.5_E2S9.MOSTA.h5ad" \ No newline at end of file diff --git a/data/stereoseq_mouse_embryo/stereoseq_mouse_embryo.py b/data/stereoseq_mouse_embryo/stereoseq_mouse_embryo.py index bf71a237..f10ddcd1 100644 --- a/data/stereoseq_mouse_embryo/stereoseq_mouse_embryo.py +++ b/data/stereoseq_mouse_embryo/stereoseq_mouse_embryo.py @@ -13,9 +13,22 @@ # 6 available but only 2 contain region label and coordinates -sample_info=pd.read_csv('sample_info.csv') +sample_name = ['E9.5_E1S1', 'E9.5_E2S1', 'E9.5_E2S2', 'E9.5_E2S3', 'E9.5_E2S4', + 'E10.5_E1S1', 'E10.5_E1S2', 'E10.5_E1S3', 'E10.5_E2S1', + 'E11.5_E1S1', 'E11.5_E1S2', 'E11.5_E1S3', 'E11.5_E1S4', + 'E12.5_E1S1', 'E12.5_E1S2', 'E12.5_E1S3', 'E12.5_E1S4', + 'E12.5_E1S5', 'E12.5_E2S1', 'E13.5_E1S1', 'E13.5_E1S2', + 'E13.5_E1S3', 'E13.5_E1S4', 'E14.5_E1S1', 'E14.5_E1S2', + 'E14.5_E1S3', 'E14.5_E1S4', 'E14.5_E1S5', 'E14.5_E2S1', + 'E14.5_E2S2', 'E15.5_E1S1', 'E15.5_E1S2', 'E15.5_E1S3', + 'E15.5_E1S4', 'E15.5_E2S1', 'E16.5_E1S1', 'E16.5_E1S2', + 'E16.5_E1S3', 'E16.5_E1S4', 'E16.5_E1S5', 'E16.5_E2S10', + 'E16.5_E2S11', 'E16.5_E2S12', 'E16.5_E2S13', 'E16.5_E2S1', + 'E16.5_E2S2', 'E16.5_E2S3', 'E16.5_E2S4', 'E16.5_E2S5', + 'E16.5_E2S6', 'E16.5_E2S7', 'E16.5_E2S8', 'E16.5_E2S9'] + +LINKS = [f"https://ftp.cngb.org/pub/SciRAID/stomics/STDS0000058/stomics/{sample}.MOSTA.h5ad" for sample in sample_name] -LINKS = sample_info["download"].tolist() META_DICT = {"technology":"Stereo-seq"} @@ -35,7 +48,7 @@ def download_links(links, temp_dir): except Exception as e: print(f"Error downloading {link}: {e}") -def process_adata(adata_path,output_folder,iteration,sample_df,sample_info): +def process_adata(adata_path,output_folder,iteration,sample_df,sample_name): folder_name = os.path.splitext(os.path.basename(adata_path))[0] complete_path = os.path.join(output_folder,folder_name) os.makedirs(complete_path, exist_ok=True) @@ -74,7 +87,7 @@ def process_adata(adata_path,output_folder,iteration,sample_df,sample_info): # add info for sample.tsv # Your sample_data_basis dictionary - sample_data_basis = {"sample":sample_info["sample_name"].iloc[iteration],"n_clusters": adata.obs.annotation.nunique(), "directory": folder_name} + sample_data_basis = {"sample":sample_name[iteration],"n_clusters": adata.obs.annotation.nunique(), "directory": folder_name} # Creating a DataFrame from the dictionary sample_data = pd.DataFrame([sample_data_basis]) @@ -114,7 +127,7 @@ def main(): sample_df = pd.DataFrame(columns=SAMPLE_COLUMNS,index=range(len(LINKS))) anndatas = [os.path.join(temp_dir, file) for file in os.listdir(temp_dir) if file.endswith(".h5ad")] for iteration, adata in enumerate(anndatas): - process_adata(adata, args.out_dir,iteration,sample_df,sample_info) + process_adata(adata, args.out_dir,iteration,sample_df,sample_name) # write json