From d98647a4615ea12272d3d74784a27f7fc0e89271 Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Fri, 28 Jun 2024 13:25:44 -0700 Subject: [PATCH 1/5] add new model --- classification/test_model.py | 34 +++ make_csv.py | 28 ++ old-config.yml | 128 ++++++++++ pyha_analyzer/augmentations.py | 2 +- .../chunking_methods/audio_splitter.py | 84 ------ .../chunking_methods/chunks_config.py | 23 -- .../chunking_methods/combine_annotations.py | 46 ---- pyha_analyzer/chunking_methods/csv_cleaner.py | 77 ------ .../chunking_methods/gen_csv_labels.py | 208 --------------- .../chunking_methods/overlapping_targets.py | 67 ----- .../chunking_methods/sliding_chunks.py | 159 ------------ pyha_analyzer/config.py | 37 +-- pyha_analyzer/dataset.py | 17 +- pyha_analyzer/infer.py | 239 ++++++++++++++++++ pyha_analyzer/models/timm_model.py | 3 +- pyha_analyzer/run_raw.py | 67 +++++ pyha_analyzer/train.py | 1 + pyha_analyzer/utils.py | 38 ++- 18 files changed, 557 insertions(+), 701 deletions(-) create mode 100644 classification/test_model.py create mode 100644 make_csv.py create mode 100644 old-config.yml create mode 100644 pyha_analyzer/infer.py create mode 100644 pyha_analyzer/run_raw.py diff --git a/classification/test_model.py b/classification/test_model.py new file mode 100644 index 00000000..bf5c02b3 --- /dev/null +++ b/classification/test_model.py @@ -0,0 +1,34 @@ +from typing import Dict, Any, Tuple +import os +import datetime +from torchmetrics.classification import MultilabelAveragePrecision + +import torch +import torch.nn.functional as F +from torch.optim import Adam +from torch.amp import autocast +import numpy as np +from dataset import PyhaDF_Dataset, get_datasets +from model import TimmModel +from utils import set_seed, print_verbose +from config import get_config +from tqdm import tqdm +from train import load_datasets, valid + +tqdm.pandas() +time_now = datetime.datetime.now().strftime('%Y%m%d-%H%M') +device = 'cuda' if torch.cuda.is_available() else 'cpu' +print("Device is: ",device) +CONFIG = get_config() + +train_dataset, val_dataset, train_dataloader, val_dataloader = load_datasets(CONFIG) + +print("Loading Model...") +model_for_run = TimmModel(num_classes=130, + model_name="convnextv2_nano", + checkpoint="/share/acoustic_species_id/models/convnextv2_nano-20230710-1731-0.pt", + CONFIG=CONFIG).to(device) + +model_for_run.load_state_dict(torch.load("/share/acoustic_species_id/models/convnextv2_nano-20230710-1731-0.pt")) + +valid(model_for_run, val_dataloader, 0, 1, CONFIG) \ No newline at end of file diff --git a/make_csv.py b/make_csv.py new file mode 100644 index 00000000..4555397d --- /dev/null +++ b/make_csv.py @@ -0,0 +1,28 @@ +# %% +import pandas as pd + +df = pd.read_csv("/mnt/passive-acoustic-biodiversity/Peru_2019_Audiomoth_Sound_Recordings/2019_Peru_MDD_AudioMoth_Recordings_Metadata_Firmware_Timing_Error_Corrected_Faulty_Clips_Removed.csv") + +# %% +df + +# %% +df["CLIP LENGTH"] = df["Duration"] + +# %% +import math +def create_raw_chunks(row): + row = row.iloc[0] + rows = [] + for i in range(0, math.floor(row["CLIP LENGTH"]), 5): + row_temp = row.copy(deep=True) + row_temp["OFFSET"] = i + row_temp["DURATION"] = 5 + rows.append(row_temp.to_frame().T) + return pd.concat(rows) + + +chunked_df = df.groupby("SourceFile", as_index=False).apply(create_raw_chunks).reset_index() +chunked_df.to_csv("peru-2019-pyha-anaylzer-inferance.csv") + + diff --git a/old-config.yml b/old-config.yml new file mode 100644 index 00000000..7c708a1e --- /dev/null +++ b/old-config.yml @@ -0,0 +1,128 @@ +# Acoustic Multiclass Training config file + +# Required path related config +dataframe_csv: "/share/acoustic_species_id/132PeruXC_TweetyNetLabels_baseline.csv" +data_path: "/share/acoustic_species_id/132_peru_xc_BC_2020/" + +# Dataframe column names +offset_col: "OFFSET" +duration_col: "DURATION" +file_name_col: "IN FILE" +manual_id_col: "SPECIES" + +# Device Settings +device: auto # Options: cuda, cpu, auto +prepros_device: cpu # Options: cuda, CPU + +#chunking settings +is_unchunked: True +does_center_chunking: False +chunk_length_s: 5 +min_length_s: 0.4 +include_last: false +overlap: 0.5 + +# Training params +train_batch_size: ls + +validation_batch_size: 32 +jobs: 4 +valid_freq: 1000 +mixed_precision: true +valid_dataset_ratio: 0.3 +does_weighted_sampling: False + +# Logging +logging: true +logging_freq: 20 # Batches per log +wandb_entity: "acoustic-species-identification" +wandb_project: "acoustic-species-reu2023-sweeps" +wandb_run_name: "auto" +debug: false + +# Functional settings +seed: 0 +sample_rate: 32_000 +map_debug: false +train_test_split: 0.8 +num_fold: 5 + +# Model hyperparameters +# Specifies default model architecture from timm library +# Options: +# eca_nfnet_l0 (90 MB) +# tf_efficientnet_b4 (70 MB) +# convnext_nano (60 MB) +# convnext_tiny (110 MB) +# resnetv2_50 (100 MB) +# resnetv2_101 (170 MB) +# seresnext50_32x4d (100 MB) +# seresnext101_32x4d (200 MB) +# rexnet_200 (70 MB) +# mobilenetv3_large_100_miil_in21k (70 MB) +model: "eca_nfnet_l0" +epochs: 10 +learning_rate: 0.001 +# Loss function options: +# CE: Cross entropy +# BCE: Binary cross entropy +# BCEWL: Binary cross entropy with logits +loss_fnc: "CE" +imb: false + +# Hyperparameter sweep settings +sweep_id: + +# Specify path to load a checkpoint +model_checkpoint: "" + +# Patience +early_stopping: false +patience: 3 +min_valid_map_delta: 0.01 + +# Data augmentation probabilities +mixup_p: 0 +time_shift_p: 0 +noise_p: 0.23 +freq_mask_p: 0.5 +time_mask_p: 0.11 +rand_eq_p: 0 +noise_p: 23 +lowpass_p: 0.05 +highpass_p: 0.02 +bg_noise_p: 0 + +# Data augmentation parameters +noise_type: "violet" +noise_alpha: 0.06 +freq_mask_param: 10 # Number of frequencies masked +time_mask_param: 38 # Number of samples masked +mixup_alpha_range: [0.0, 0.6] +rand_eq_f_range: [20, 8000] +rand_eq_q_range: [0.3, 3] +rand_eq_g_range: [-2, 8] +rand_eq_iters: 2 +lowpass_cutoff: 7000 +lowpass_q_val: 0.44 +highpass_cutoff: 1000 +highpass_q_val: 0.44 +bg_noise_alpha_range: [0.0, 0.4] + +# Background noise params +# Expects background_path to lead to a directory with only audio files +# Example dataset: BirdVOX at https://dcase.community/challenge2018/task-bird-audio-detection +# Make sure to delete all bird clips so it is only background noise +# If path is blank and p=0, background noise will not be used +bg_noise_path: "" + +# Transforms settings +imbalance_sampler: false +pos_weight: 1 +smoothing: 0.05 + +# FFT Settings +hop_length: 512 +n_mels: 194 +n_fft: 1400 +max_time: 5 diff --git a/pyha_analyzer/augmentations.py b/pyha_analyzer/augmentations.py index dfc0a6c6..7437a0ea 100644 --- a/pyha_analyzer/augmentations.py +++ b/pyha_analyzer/augmentations.py @@ -139,7 +139,7 @@ def forward( chosen clip, Tensor of target mixed with the target of the randomly chosen file """ - if utils.rand(0,1) <= self.prob: + if utils.rand(0,1) >= self.prob: return clip, target num_other_clips = sample(self.num_clips_distribution) diff --git a/pyha_analyzer/chunking_methods/audio_splitter.py b/pyha_analyzer/chunking_methods/audio_splitter.py index 596551b6..e69de29b 100644 --- a/pyha_analyzer/chunking_methods/audio_splitter.py +++ b/pyha_analyzer/chunking_methods/audio_splitter.py @@ -1,84 +0,0 @@ -""" Splits longer audio files into smaller ones """ - -import os -import pandas as pd -import torch -import torchaudio -from tqdm import tqdm - -CONFIG = { - "metadata_csv": "annotations_chunked.csv", - "metadata_output": "annotations_split.csv", - - "audio_dir": "input", - "sample_rate": "error", # Only use if input format is pt - "output_dir": "output", - "output_format": "flac", # Supports torch audio formats - - "chunk_length_s": 60 * 5, # Length of each clip in seconds - "overlap_s": 10, # Overlap to add to each file in seconds - - "file_name_col": "FILE NAME", - "offset_col": "OFFSET", - -} - -def output_file_name(path: str, index: int, file_format: str) -> str: - """ Returns the output file name for a given input file name and index """ - return os.path.basename(path).split('.')[0] + "_" + str(index) + "." + file_format - -def split_audio_file(path: str): - """ Splits audio file into smaller chunks """ - split_len = CONFIG["chunk_length_s"] - - # Load audio file - if path.endswith(".pt"): - audio = torch.load(path) - sample_rate = CONFIG["sample_rate"] - else: - audio, sample_rate = torchaudio.load(path) # type: ignore - audio = audio[0] - - file_len = len(audio)/float(sample_rate) - num_splits = int(file_len / split_len) - - for i in range(num_splits): - # Create slice - aud_slice = audio[i*split_len*sample_rate:((i+1)*split_len+CONFIG["overlap_s"])*sample_rate] - torchaudio.save(os.path.join(CONFIG["output_dir"], # type: ignore - output_file_name(path,i,CONFIG["output_format"])), - torch.unsqueeze(aud_slice,0), sample_rate) - -def edit_row(row: pd.Series) -> pd.Series: - """ Edits a row of the metadata csv to reflect the new audio files - Changes file name and offset - """ - offset = row[CONFIG["offset_col"]] - file_index = int(offset/CONFIG["chunk_length_s"]) - # Update file name - row[CONFIG["file_name_col"]] = \ - output_file_name(str(row[CONFIG["file_name_col"]]), file_index, CONFIG["output_format"]) - # Shift offset - row[CONFIG["offset_col"]] -= file_index * CONFIG["chunk_length_s"] - return row - -def edit_metadata(df: pd.DataFrame): - """ Edits metadata to reflect the new audio files """ - return df.apply(edit_row, axis=1) - -def split_all(input_dir: str): - """ Splits all audio files in the input directory """ - input_dir = os.path.abspath(input_dir) - for path in tqdm(os.listdir(input_dir)): - audio_path = os.path.join(input_dir, path) - split_audio_file(audio_path) - -def main(): - """ Main function """ - df = pd.read_csv(CONFIG["metadata_csv"], index_col=0) - split_all(CONFIG["audio_dir"]) - df = edit_metadata(df) - df.to_csv(CONFIG["metadata_output"]) - -if __name__ == "__main__": - main() diff --git a/pyha_analyzer/chunking_methods/chunks_config.py b/pyha_analyzer/chunking_methods/chunks_config.py index 8eb617f1..e69de29b 100644 --- a/pyha_analyzer/chunking_methods/chunks_config.py +++ b/pyha_analyzer/chunking_methods/chunks_config.py @@ -1,23 +0,0 @@ - -""" Stores default argument information for the argparser - Methods: - get_config: returns an ArgumentParser with the default arguments -""" -import argparse - - -def get_config(): - """ Returns a config variable with the command line arguments or defaults - """ - parser = argparse.ArgumentParser() - - parser.add_argument('-l', '--chunk_length_s', default=5, type=int, help='duration') - parser.add_argument('-f', '--filetype', default='.wav', type=str) - parser.add_argument('-w', '--sliding_window', action='store_true') - - parser.add_argument('-a', '--audio_path', default='~/path/to/data/', type=str) - parser.add_argument('-m', '--metadata', default='~/metadata.csv', type=str) - parser.add_argument('-s', '--strong_labels', default='~/strong_labels.csv', type=str) - parser.add_argument('-c', '--chunk_labels', default='~/chunks.csv', type=str) - - return parser.parse_args() diff --git a/pyha_analyzer/chunking_methods/combine_annotations.py b/pyha_analyzer/chunking_methods/combine_annotations.py index 34b7faa0..e69de29b 100644 --- a/pyha_analyzer/chunking_methods/combine_annotations.py +++ b/pyha_analyzer/chunking_methods/combine_annotations.py @@ -1,46 +0,0 @@ -""" Combines short frequent annotations into a longer call annotation """ - -import argparse - -import pandas as pd -from tqdm import tqdm - -def combine_annotations(df: pd.DataFrame, max_gap_s: float = 0.5) -> pd.DataFrame: - """ Combine any annotations that have a gap length less than max_gap seconds""" - groups = df.groupby("FILE NAME") - out_groups = [] - for _, group in tqdm(groups): - group.reset_index(drop=True, inplace=True) - df = group.sort_values(by=["OFFSET"]) - i = 0 - off_col = df.columns.get_loc("OFFSET") - dur_col = df.columns.get_loc("DURATION") - # Can't use a for loop because combining annotations changes the length of the dataframe :( - while i < len(df.index) - 1: - gap_length = df.iloc[i+1,off_col] - df.iloc[i,off_col] - df.iloc[i,dur_col] - if gap_length < max_gap_s: - # Combine the two annotations by increasing the firsts duration - # and deleting the second - df.iloc[i, dur_col] += gap_length + df.iloc[i+1,dur_col] - df.drop(df.iloc[i+1].name, inplace=True) - else: - i += 1 - - df.reset_index(drop=True, inplace=True) - out_groups.append(df) - return pd.concat(out_groups, ignore_index=True, sort=False) - -if __name__=="__main__": - argparser = argparse.ArgumentParser() - argparser.add_argument('-i', '--input', type=str) - argparser.add_argument('-o', '--output', type=str) - argparser.add_argument('-g', '--max_gap', type=float, default=0.3) - args = argparser.parse_args() - assert args.input is not None, "Input file not specified" - assert args.output is not None, "Output file not specified" - - dataframe = pd.read_csv(args.input, index_col=0) - combined = combine_annotations(dataframe, args.max_gap) - combined.to_csv(args.output) - print("Old annotation count:",len(dataframe.index)) - print("New annotation count:",len(combined.index)) diff --git a/pyha_analyzer/chunking_methods/csv_cleaner.py b/pyha_analyzer/chunking_methods/csv_cleaner.py index 1fd354a2..e69de29b 100644 --- a/pyha_analyzer/chunking_methods/csv_cleaner.py +++ b/pyha_analyzer/chunking_methods/csv_cleaner.py @@ -1,77 +0,0 @@ -""" - Small script that can generally be used to clean up an audio metadata csv file -""" -import os -import pandas as pd - -ARGUMENTS = { - # INPUT - "input_path": "../example_dataset/metadata.csv", - "has_index_col": True, - "file_name_col": "file_location", - - # PROCESSING - # Specify these if there is a start and end time for the clip - # Offset and duration will be calculated from these - "start_time": "", - "end_time": "", - # Columns - "column_renames": { - "MANUAL ID": "Species eBird Code", - "Scientific Name": "SCIENTIFIC", - "Common Name": "COMMON", - "Offset": "OFFSET", - "Duration": "DURATION", - }, - - # OUTPUT - "cols_to_save": [ - "FILE NAME", - #"Species eBird Code", - "OFFSET", - "DURATION", - "SCIENTIFIC", - "COMMON" - ], - "output_path": "../example_dataset/metadata_cleaned.csv", - -} - -def main(): - """ Main function """ - if ARGUMENTS["input_path"] == "": - raise ValueError("Input path not specified") - - if ARGUMENTS["has_index_col"]: - df = pd.read_csv(ARGUMENTS["input_path"], index_col=0) - else: - df = pd.read_csv(ARGUMENTS["input_path"]) - # Rename columns - col_renames = ARGUMENTS["column_renames"] - for col in col_renames.items(): - if col in df.columns: - df = df.rename(columns={col: col_renames[col]}) - elif col_renames[col] in df.columns: - pass # Already renamed - else: - print("Warning: column", col, "not found in dataset") - - # Delete missing files - df = df[df[ARGUMENTS["file_name_col"]].apply(lambda x: isinstance(x,str))] - df = df[df[ARGUMENTS["file_name_col"]]!=""] - - # Fix file name column - df["FILE NAME"] = df[ARGUMENTS["file_name_col"]].apply(os.path.basename) - - # Turn start and end to offset and duration - if ARGUMENTS["start_time"] != "": - df["OFFSET"] = df[ARGUMENTS["start_time"]] - df["DURATION"] = df[ARGUMENTS["end_time"]] - df[ARGUMENTS["start_time"]] - - df = df.reset_index(drop=True) - df = df[ARGUMENTS["cols_to_save"]] - df.to_csv(ARGUMENTS["output_path"]) - print(f"Wrote {len(df)} annotations to {ARGUMENTS['output_path']}") - -if __name__ == "__main__": - main() diff --git a/pyha_analyzer/chunking_methods/gen_csv_labels.py b/pyha_analyzer/chunking_methods/gen_csv_labels.py index ec5254e7..e69de29b 100644 --- a/pyha_analyzer/chunking_methods/gen_csv_labels.py +++ b/pyha_analyzer/chunking_methods/gen_csv_labels.py @@ -1,208 +0,0 @@ -"""Generates binary annotations using TweetyNet from weakly labeled audio. -This file should be run from inside the PyHa directory. It also requires -config.py and WTS_Chunking.py to be added to the PyHa directory. -Input: A path to a folder with audio files -Output: A csv with chunked, strongly-labeled annotations -""" -import sys -from math import ceil -from pathlib import Path - -import pandas as pd -from pydub import AudioSegment, exceptions -# pylint: disable=import-error #this file gets put into PyHa -# pylint: disable=no-name-in-module -from PyHa.IsoAutio import generate_automated_labels # pyright: ignore -from pyha_analyzer.chunking_methods.sliding_chunks import dynamic_yan_chunking -from pyha_analyzer.chunking_methods.chunks_config import get_config - -# This could be changed to use Microfaune or BirdNET, but the parameters are -# somewhat different and TweetyNet should be the default. -ISOLATION_PARAMETERS = { - "model" : "tweetynet", - "tweety_output": True, - "verbose" : True -} - -def convert_audio(directory: str, filetype: str) -> None: - """Convert audio files to .wav files with PyDub. Used to ensure - that TweetyNet can read the files for predictions. - Args: - directory (str) - - Path to folder containing audio files - filetype (str) - - File extension for incoming audio files - Returns: - None - """ - # conversion not needed for tweetynet processing - if filetype in [".wav", ".mp3"]: - print(f'Conversion from {filetype} not required for TweetyNet processing') - return - print(f'Converting audio for {directory}') - file_list = [f for f in Path(directory).glob('**/*') if f.is_file()] - for path in file_list: - if path.suffix == filetype: - audio = AudioSegment.from_file(path) - audio.export(path.with_suffix('.wav'), format='wav') - -def generate_labels(path: str) -> pd.DataFrame: - """Generate binary automated time-specific labels using TweetyNet as - implemented in PyHa. - Args: - path (str) - - Path to folder containing audio files with at most one - subdirectory level - Returns: - PyHa-formatted DataFrame - """ - rootdir = Path(path) - if not rootdir.is_dir(): - print(f'Directory not found in path {path}', file=sys.stderr) - sys.exit(1) - - # generate labels at a top level - automated_df = generate_automated_labels(path, ISOLATION_PARAMETERS) - - # check subdirectories in case files organized by class - subfolders = [str(f) for f in rootdir.rglob('*') if f.is_dir()] - for folder in sorted(subfolders): - temp_df = generate_automated_labels(folder, ISOLATION_PARAMETERS) - if temp_df.empty: - continue - automated_df = pd.concat([automated_df, temp_df], ignore_index=True, sort=False) - - if automated_df.empty: - print('No labels generated') - - return automated_df - -def attach_labels(metadata_df: pd.DataFrame, binary_df: pd.DataFrame) -> pd.DataFrame: - """ Attach the primary label from original metadata as a strong label - for each chunk and reformat the columns for the training pipeline. - Args: - metadata_df (DataFrame) - - DataFrame with original audio clip information. Assumes - Xeno-canto formatting. - binary_df (DataFrame) - - DataFrame with time-specific labels. Assumes PyHa formatting. - Returns: - DataFrame with minimum required columns for training - """ - if 'filename' not in metadata_df.columns: - raise KeyError("This function merges .csvs on filename. Check your metadata columns!") - strong_df = metadata_df.merge(binary_df, left_on='filename', right_on='IN FILE') - strong_df = strong_df[['Species eBird Code', - 'Scientific Name', - 'IN FILE', - 'FOLDER', - 'OFFSET', - 'DURATION', - 'CLIP LENGTH']] - strong_df = strong_df.rename(columns={'IN FILE': 'FILE NAME', - 'Species eBird Code': 'SPECIES', - 'Scientific Name': 'SCIENTIFIC'}) - return strong_df - -def generate_raw_chunks(directory: str, metadata_df: pd.DataFrame, chunk_length_s: int=5, - filetype: str='.wav') -> pd.DataFrame: - """Create simple chunks by dividing the file into equal length - segments. Used as a baseline comparison to PyHa's pseudo-labeling. - Args: - directory (str) - - Path to folder containing audio files - metadata_df (DataFrame) - - DataFrame original audio clip information. Assumes - Xeno-canto formatting. - chunk_length_s (int) - - Length of desired file chunks in seconds - filetype (str) - - File extension for incoming audio files - Returns a DataFrame with end-to-end chunked annotations - """ - if 'filename' not in metadata_df.columns: - raise KeyError("This function merges .csvs on filename. Check your metadata columns!") - chunks = [] - chunk_length_ms = chunk_length_s * 1000 - file_list = [f for f in Path(directory).glob('**/*') if f.is_file() and f.suffix == filetype] - for path in sorted(file_list): - try: - audio = AudioSegment.from_file(path) - except exceptions.CouldntDecodeError as ex: - # catch ffmpeg error - print('Audio conversion failed for ', path) - print(ex) - continue - - file_length_ms = len(audio) - num_chunks = ceil(file_length_ms / (chunk_length_ms)) - - # attempt to match file with scientific name and ebird code - try: - row = metadata_df.loc[metadata_df['filename'] == path.name] - scientific = row['Scientific Name'].iloc[0] - species = row['Species eBird Code'].iloc[0] - except IndexError as ex: - print('Scientific name or species lookup failed for ', path.name) - print(ex) - continue - - # create chunks and add to dataframe - for i in range(num_chunks): - start = i * chunk_length_ms - end = start + chunk_length_ms - # cut off ending chunks that won't be 5s long - if end <= file_length_ms: - temp = { - 'SPECIES' : species, - 'SCIENTIFIC' : scientific, - 'FILE NAME' : path.name, - 'FOLDER' : path.parent, - 'OFFSET' : start / 1000, - 'DURATION' : chunk_length_s, - 'CLIP LENGTH' : file_length_ms - } - chunks.append(temp) - return pd.DataFrame(chunks) - -def main(): - """Generates binary annotations using TweetyNet from weakly labeled audio. - Args: - None - Returns: - None - """ - cfg = get_config() - metadata = pd.read_csv(cfg.metadata) - - if cfg.sliding_window: - # saved to csv in case attaching labels fails as generating labels takes more time - print('Generating labels...') - convert_audio(cfg.audio_path, cfg.filetype) - labels = generate_labels(cfg.audio_path) - labels.to_csv(cfg.strong_labels) - - print('Attaching strong labels...') - strong_labels = attach_labels(metadata, labels) - strong_labels.to_csv(cfg.strong_labels) - - print('Generating sliding chunks...') - chunks_df = dynamic_yan_chunking(strong_labels, - cfg.chunk_length_s, - cfg.min_length_s, - cfg.overlap, - cfg.chunk_margin_s, - only_slide=False) - chunks_df.to_csv(cfg.chunk_labels) - else: - print('Generating raw chunks...') - chunks_df = generate_raw_chunks( - directory=cfg.audio_path, - metadata_df=metadata, - chunk_length_s=cfg.chunk_length_s, - filetype=cfg.filetype) - chunks_df.to_csv(cfg.chunk_labels) - print("Wrote chunks to", cfg.chunk_labels) - -if __name__=="__main__": - main() diff --git a/pyha_analyzer/chunking_methods/overlapping_targets.py b/pyha_analyzer/chunking_methods/overlapping_targets.py index fccbd9df..e69de29b 100644 --- a/pyha_analyzer/chunking_methods/overlapping_targets.py +++ b/pyha_analyzer/chunking_methods/overlapping_targets.py @@ -1,67 +0,0 @@ -""" Takes in a dataframe csv containing chunked data (from config.yml) -Recognizes overlapping targets and creates a new dataframe with mixed targets -In the "TARGET" column, there will be a dictionary with the target for each class -Example target: - { - "class1": 1, - "other_class": 0.4, - "other_class2": 0.2 - } - where the other number is the overlapping proportion with the original annotation - This may be loaded using ast.literal_eval to get the dictionary from the CSV -""" -import os.path - -import pandas as pd -from tqdm import tqdm -from pyha_analyzer import config - -cfg = config.cfg - -def apply_overlapping_target(row: pd.Series, group: pd.DataFrame): - """ Iterates over other rows in group and returns row with target dictionary """ - manual_id = str(row[cfg.manual_id_col]) - target = dict({manual_id: 1}) - start = row[cfg.offset_col] - end = row[cfg.offset_col] + row[cfg.duration_col] - # Iterate over other rows in group and check if they overlap - for _, other_row in group.iterrows(): - other_id = str(other_row[cfg.manual_id_col]) - if other_id == manual_id: - continue - other_start = other_row[cfg.offset_col] - other_end = other_row[cfg.offset_col] + other_row[cfg.duration_col] - # Calculate overlap - overlap = 0 - if start < other_start < end: - overlap = (end - other_start) / row[cfg.duration_col] - if start < other_end < end: - overlap = (other_end - start) / row[cfg.duration_col] - if overlap > 0: - target[other_id] = round(max(overlap, target.get(other_id, 0)),2) - row["TARGET"] = target - return row - -def get_targets(df: pd.DataFrame): - """ - Get the targets for each group and returns a new dataframe - """ - by_file = df.groupby(cfg.file_name_col, as_index=False) - unique_files = df[cfg.file_name_col].unique() - groups = [] - groups = [by_file.get_group(file) for file in unique_files] - processed_groups = [group.apply( - lambda row,g=group: apply_overlapping_target(row,g),axis=1 - ) for group in tqdm(groups)] - return pd.concat(processed_groups) - -def main(): - """ Main function """ - df = pd.read_csv(cfg.dataframe_csv, index_col=0) - df_mixed = get_targets(df) - # Delete manual id column so it is not accidentally used - del df_mixed[cfg.manual_id_col] - df_mixed.to_csv(os.path.basename(cfg.dataframe_csv)+"_mixed.csv") - -if __name__ == "__main__": - main() diff --git a/pyha_analyzer/chunking_methods/sliding_chunks.py b/pyha_analyzer/chunking_methods/sliding_chunks.py index b4b08c0d..e69de29b 100644 --- a/pyha_analyzer/chunking_methods/sliding_chunks.py +++ b/pyha_analyzer/chunking_methods/sliding_chunks.py @@ -1,159 +0,0 @@ -"""Chunking script from PyHa to convert weak labels to strong labels. -""" -from typing import Dict, List - -import pandas as pd - - -# pylint: disable-next=too-many-arguments -def convolving_chunk(row:dict, - chunk_length_s: int, - min_length_s: float, - overlap: float, - chunk_margin_s: float, - only_slide=False) -> List[Dict]: - """ - Helper function that converts a binary annotation row to uniform chunks. - Note: Annotations of length shorter than min_length are ignored. Annotations - that are shorter than or equal to chunk_length are chopped into three chunks - where the annotation is placed at the start, middle, and end. Annotations - that are longer than chunk_length are chunked used a sliding window. - Args: - row (dict) - - Single annotation row represented as a dict - chunk_length_s (int) - - Duration in seconds to set all annotation chunks - min_length_s (float) - - Duration in seconds to ignore annotations shorter in length - overlap (float) - - Percentage of overlap between chunks - chunk_margin_s (float) - - Duration to pad chunks on either side - only_slide (bool) - - If True, only annotations greater than chunk_length_s are chunked - Returns: - Array of labels of chunk_length_s duration - """ - starts = [] - offset_s = max(float(row['OFFSET'])-chunk_margin_s, 0) - duration_s = float(row['DURATION']) # length of annotation - duration_s += 2 * chunk_margin_s - end_s = float(min(offset_s + duration_s, float(row["CLIP LENGTH"]))) - chunk_self_time = chunk_length_s * (1 - overlap) - - #Ignore small duration (could be errors, play with this value) - if duration_s < min_length_s: - return [] - - # calculate valid offsets for short annotations - if duration_s <= chunk_length_s: - # start of clip - if (offset_s + chunk_length_s) < float(row['CLIP LENGTH']) and not only_slide: - starts.append(offset_s) - # middle of clip - if end_s - chunk_length_s/2.0 > 0 and end_s + chunk_length_s/2.0 < row['CLIP LENGTH']: - starts.append((offset_s + end_s)/2.0 - chunk_length_s/2.0) - # end of clip - if end_s - chunk_length_s > 0 and not only_slide: - starts.append(end_s - chunk_length_s) - # calculate valid offsets for long annotations - else: - clip_num = int(round(duration_s / chunk_self_time)) - for i in range(clip_num): - if (offset_s + chunk_length_s) + (i * chunk_self_time) < row['CLIP LENGTH']: - starts.append(offset_s + i * chunk_self_time) - - # create new rows - rows = [] - for value in starts: - new_row = row.copy() - new_row['OFFSET'] = value - new_row['DURATION'] = chunk_length_s - rows.append(new_row) - return rows - -def convolving_chunk_old(row:dict, - chunk_length_s=3, - min_length_s=0.4, - only_slide=False)->List[Dict]: - """ Helper function that converts a binary annotation row to uniform chunks. """ - starts = [] - offset_s = row['OFFSET'] # start time of original clip - duration_s = row['DURATION'] # length of annotation - end_s = offset_s + duration_s - half_chunk_s = chunk_length_s / 2 - - if duration_s < min_length_s: - return [] - # calculate valid offsets for short annotations - if (duration_s <= chunk_length_s) and not only_slide: - if (offset_s + chunk_length_s) < row['CLIP LENGTH']: - starts.append(offset_s) # start of clip - if end_s - half_chunk_s > 0 and end_s + half_chunk_s < row['CLIP LENGTH']: - starts.append(end_s - half_chunk_s) # middle of clip - if end_s - chunk_length_s > 0: - starts.append(end_s - chunk_length_s) # end of clip - # calculate valid offsets for long annotations - else: - clip_num = int(duration_s / half_chunk_s) - for i in range(clip_num-1): - if (offset_s + chunk_length_s) + (i * half_chunk_s) < row['CLIP LENGTH']: - starts.append(offset_s + i * half_chunk_s) - rows = [] - for value in starts: - new_row = row.copy() - new_row['OFFSET'] = value - new_row['DURATION'] = chunk_length_s - rows.append(new_row) - return rows - -# pylint: disable-next=too-many-arguments -def dynamic_yan_chunking(df: pd.DataFrame, - chunk_length_s: int, - min_length_s: float, - overlap: float, - chunk_margin_s: float, - only_slide: bool=False) -> pd.DataFrame: - """ - Function that converts a Dataframe containing binary annotations - to uniform chunks using a sliding window - Args: - df (Dataframe) - - Dataframe of annotations - chunk_length_s (int) - - Duration in seconds to set all annotation chunks - min_length_s (float) - - Duration in seconds to ignore annotations shorter in length - overlap (float) - - Percentage of overlap between chunks - chunk_margin_s (float) - - Duration to pad chunks on either side - only_slide (bool) - - If True, only annotations greater than chunk_length_s are chunked - Returns: - Dataframe of labels with chunk_length duration - """ - return_dicts = [] - for _, row in df.iterrows(): - rows_dict = convolving_chunk(row.to_dict(), - chunk_length_s, - min_length_s, - overlap, - chunk_margin_s, - only_slide) - return_dicts.extend(rows_dict) - print("Cur annotations:",len(df)) - print("New chunks:",len(return_dicts)) - return pd.DataFrame(return_dicts) - -def dynamic_yan_chunking_old(df: pd.DataFrame, - chunk_length_s:int=3, - min_length_s:float=0.4, - only_slide:bool=False)-> pd.DataFrame: - """ Function that converts a Dataframe containing binary annotations - to uniform chunks using a sliding window """ - return_dicts = [] - for _, row in df.iterrows(): - rows_dict = convolving_chunk_old(row.to_dict(), chunk_length_s, min_length_s, only_slide) - return_dicts.extend(rows_dict) - return pd.DataFrame(return_dicts) diff --git a/pyha_analyzer/config.py b/pyha_analyzer/config.py index 20fa22f2..b284cd5f 100644 --- a/pyha_analyzer/config.py +++ b/pyha_analyzer/config.py @@ -130,27 +130,28 @@ def cli_values(self): Saves all command line arguments to config class Primarily intended for quick flags such as turning a property on or off """ - parser = argparse.ArgumentParser() - parser.add_argument('-l', '--logging', action='store_false') - parser.add_argument('-d', '--debug', action='store_true') + pass + # parser = argparse.ArgumentParser() + # parser.add_argument('-l', '--logging', action='store_false') + # parser.add_argument('-d', '--debug', action='store_true') - arg_cfgs = parser.parse_args() + # arg_cfgs = parser.parse_args() - # Add all command line args to config - # Overwrite because user is defining them most recently - # Unless they are default value - arg_cfgs = vars(arg_cfgs) - for key in arg_cfgs: - if self.config_dict[key] == parser.get_default(key): - setattr(self, key, arg_cfgs[key]) + # # Add all command line args to config + # # Overwrite because user is defining them most recently + # # Unless they are default value + # arg_cfgs = vars(arg_cfgs) + # for key in arg_cfgs: + # if self.config_dict[key] == parser.get_default(key): + # setattr(self, key, arg_cfgs[key]) - logger.setLevel(logging.DEBUG) - console_handler = logging.StreamHandler() - if self.debug: - console_handler.setLevel(logging.DEBUG) - else: - console_handler.setLevel(logging.INFO) - logger.addHandler(console_handler) + # logger.setLevel(logging.DEBUG) + # console_handler = logging.StreamHandler() + # if self.debug: + # console_handler.setLevel(logging.DEBUG) + # else: + # console_handler.setLevel(logging.INFO) + # logger.addHandler(console_handler) def required_checks(self, parameter): """ diff --git a/pyha_analyzer/dataset.py b/pyha_analyzer/dataset.py index c4d35215..9dc5133e 100644 --- a/pyha_analyzer/dataset.py +++ b/pyha_analyzer/dataset.py @@ -67,6 +67,12 @@ def __init__(self, for root, _, files in os.walk(cfg.data_path): self.data_dir |= {os.path.join(root,file) for file in files} + if not os.path.exists(cfg.data_path2): + raise FileNotFoundError("Data path does not exist") + self.data_dir2 = set() + for root, _, files in os.walk(cfg.data_path2): + self.data_dir2 |= {os.path.join(root,file) for file in files} + #Log bad files self.bad_files = [] @@ -147,7 +153,7 @@ def process_audio_file(self, file_name: str) -> pd.Series: """ exts = "." + file_name.split(".")[-1] new_name = file_name.replace(exts, ".pt") - if os.path.join(self.cfg.data_path, new_name) in self.data_dir: + if os.path.join(self.cfg.data_path2, new_name) in self.data_dir2: #ASSUME WE HAVE ALREADY PREPROCESSED THIS CORRECTLY return pd.Series({ "FILE NAME": file_name, @@ -171,13 +177,16 @@ def process_audio_file(self, file_name: str) -> pd.Series: resample = audtr.Resample(sample_rate, self.cfg.sample_rate) audio = resample(audio) - torch.save(audio, os.path.join(self.cfg.data_path,new_name)) - self.data_dir.add(new_name) + temp_new_file = os.path.join(self.cfg.data_path2,new_name) + os.makedirs(os.path.dirname(temp_new_file), exist_ok=True) + torch.save(audio, temp_new_file) + self.data_dir2.add(new_name) # IO is messy, I want any file that could be problematic # removed from training so it isn't stopped after hours of time # Hence broad exception # pylint: disable-next=W0718 except Exception as exc: + print(exc) logger.debug("%s is bad %s", file_name, exc) return pd.Series({ "FILE NAME": file_name, @@ -247,7 +256,7 @@ def to_image(self, audio): # Sigmoid to get 0 to 1 scaling (0.5 becomes mean) mel = torch.sigmoid(mel) - return torch.stack([mel, mel, mel]) + return mel.unsqueeze(0) #torch.stack([mel, mel, mel]) def __getitem__(self, index): #-> Any: """ Takes an index and returns tuple of spectrogram image with corresponding label diff --git a/pyha_analyzer/infer.py b/pyha_analyzer/infer.py new file mode 100644 index 00000000..43cd80ad --- /dev/null +++ b/pyha_analyzer/infer.py @@ -0,0 +1,239 @@ + +# %% +import pandas as pd +from pyha_analyzer.train import run_batch +from pyha_analyzer.dataset import get_datasets, make_dataloaders, PyhaDFDataset +from pyha_analyzer import config +from pyha_analyzer.models.timm_model import TimmModel +import logging + +logger = logging.getLogger("acoustic_multiclass_training") + +cfg = config.cfg + +import pandas as pd +df = pd.read_csv("/home/shperry/acoustic-multiclass-training/peru-2019-pyha-anaylzer-inferance.csv") + +# %% + + +classes = [ + "amabaw1", + "amapyo1", + "astgna1", + "baffal1", + "barant1", + "bartin2", + "batman1", + "blacar1", + "blbthr1", + "blcbec1", + "blctro1", + "blfant1", + "blfcot1", + "blfjac1", + "blfnun1", + "blgdov1", + "blhpar1", + "bltant2", + "blttro1", + "bobfly1", + "brratt1", + "bsbeye1", + "btfgle1", + "bubgro2", + "bubwre1", + "bucmot4", + "buffal1", + "butsal1", + "butwoo1", + "chwfog1", + "cinmou1", + "cintin1", + "citwoo1", + "coffal1", + "coltro1", + "compot1", + "cowpar1", + "crfgle1", + "ducatt1", + "ducfly", + "ducgre1", + "duhpar", + "dutant2", + "elewoo1", + "eulfly1", + "fasant1", + "fepowl", + "forela1", + "garkin1", + "gilbar1", + "gnbtro1", + "gocspa1", + "goeant1", + "gogwoo1", + "gramou1", + "grasal3", + "grcfly1", + "greant1", + "greibi1", + "gretin1", + "grfdov1", + "gryant1", + "gryant2", + "gycfly1", + "gycwor1", + "hauthr1", + "horscr1", + "letbar1", + "littin1", + "litwoo2", + "lobwoo1", + "lowant1", + "meapar", + "muswre2", + "olioro1", + "oliwoo1", + "partan1", + "pavpig2", + "pirfly1", + "plbwoo1", + "pltant1", + "pluant1", + "plupig2", + "plwant1", + "puteup1", + "putfru1", + "pygant1", + "rcatan1", + "rebmac2", + "renwoo1", + "rinant2", + "rinkin1", + "rinwoo1", + "royfly1", + "ruboro1", + "rucant2", + "rudpig", + "rufant3", + "ruftof1", + "ruqdov", + "scapig2", + "scbwoo5", + "scrpih1", + "sobcac1", + "specha3", + "spigua1", + "spwant2", + "squcuc1", + "stbwoo2", + "strcuc1", + "strwoo2", + "strxen1", + "stwqua1", + "tabsco1", + "thlwre1", + "undtin1", + "viotro3", + "wespuf1", + "whbtot1", + "whcspa1", + "whfant2", + "whltyr1", + "whnrob1", + "whrsir1", + "whttou1", + "whtwoo2", + "whwbec1", + "wibpip1", + "yectyr1", + "yemfly1", + "yercac1", + "yetwoo2" + ] + + +df["MANUAL ID"] = "yetwoo2" + +# %% +from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler +df = df[["OFFSET", "DURATION", "SourceFile", "MANUAL ID"]] +infer_dataset = PyhaDFDataset(df, train=False, species=classes, cfg=cfg) +infer_dataloader = DataLoader( + infer_dataset, + cfg.validation_batch_size, + shuffle=False, + num_workers=cfg.jobs, + ) + +# %% +print(len(infer_dataset)) + +# %% + +import torch +model_for_run = TimmModel(num_classes=len(classes), + model_name=cfg.model).to(cfg.device) + +if cfg.model_checkpoint != "": + model_for_run.load_state_dict(torch.load(cfg.model_checkpoint)) + +# %% +from tqdm import tqdm +import torch.nn.functional as F +model = model_for_run +model.eval() + +log_pred, log_label = [], [] + +num_valid_samples = int(len(infer_dataloader)) + +# tqdm is a progress bar +dl_iter = tqdm(infer_dataloader, position=5, total=num_valid_samples) + +def run_batch(model: TimmModel, + mels: torch.Tensor, + labels: torch.Tensor, + ): + """ Runs the model on a single batch + Args: + model: the model to pass the batch through + mels: single batch of input data + labels: single batch of expecte output + Returns (tuple of): + loss: the loss of the batch + outputs: the output of the model + """ + mels = mels.to(cfg.device) + labels = labels.to(cfg.device) + outputs = model(mels) + return [0], outputs + +print("ran model") +with torch.no_grad(): + for _, (mels, labels) in enumerate(dl_iter): + try: + _, outputs = run_batch(model, mels, labels) + log_pred.append(torch.clone(outputs.cpu()).detach()) + log_label.append(torch.clone(labels.cpu()).detach()) + except Exception as e: + print(e) + +print("save results") +# sigmoid predictions +log_pred = F.sigmoid(torch.cat(log_pred)).cpu() +print(log_pred.shape) +pd.DataFrame(log_pred, columns=classes).to_csv("result.csv") + +#cmap, smap = map_metric(log_pred, torch.cat(log_label), dataset.class_dist) + +# %% +cfg.data_path2 + +# %% + + +# %% + + + diff --git a/pyha_analyzer/models/timm_model.py b/pyha_analyzer/models/timm_model.py index 4b61a96a..f49b928c 100644 --- a/pyha_analyzer/models/timm_model.py +++ b/pyha_analyzer/models/timm_model.py @@ -34,7 +34,8 @@ def __init__(self, model_name, pretrained=pretrained, num_classes=num_classes, - drop_rate=cfg.drop_rate) + drop_rate=cfg.drop_rate, + in_chans=cfg.channels) self.loss_fn = None self.without_logits = cfg.loss_fnc == "BCE" diff --git a/pyha_analyzer/run_raw.py b/pyha_analyzer/run_raw.py new file mode 100644 index 00000000..71291692 --- /dev/null +++ b/pyha_analyzer/run_raw.py @@ -0,0 +1,67 @@ +""" Gets the testing mAP of a model on soundscapes """ + +import pandas as pd +import torch +from torch.utils.data import DataLoader +import wandb +from tqdm import tqdm + +from pyha_analyzer import config +from pyha_analyzer import dataset +from pyha_analyzer import train +from pyha_analyzer import utils +from pyha_analyzer.models.timm_model import TimmModel + +cfg = config.Config() + +def main(): + """ Returns the testing mAP for the specified data and class list """ + torch.multiprocessing.set_start_method('spawn') + print(f"Device is: {"CUDA"}, Preprocessing Device is {"CPU"}") + utils.set_seed(0) + wandb.init(mode="disabled") + + # Get dataset + df = pd.read_csv(cfg.dataframe_csv, index_col=0) + if cfg.class_list is None: + raise ValueError("Class list must be specified in config") + for class_item in cfg.class_list: + df[class_item] = 0 + test_ds = dataset.PyhaDFDataset(df,train=False, species=cfg.class_list) + dataloader = DataLoader( + test_ds, + cfg.train_batch_size, + shuffle=False, + num_workers=cfg.jobs, + ) + + # Get model + model_for_run = TimmModel(num_classes=test_ds.num_classes, + model_name=cfg.model).to(cfg.device) + model_for_run.create_loss_fn(test_ds) + try: + model_for_run.load_state_dict(torch.load(cfg.model_checkpoint)) + except FileNotFoundError as exc: + raise FileNotFoundError("Model not found: " + cfg.model_checkpoint) from exc + + + # Testing + train.BEST_VALID_MAP = 1.0 + model_for_run.eval() + log_pred = [] + log_label = [] + dataloader = tqdm(dataloader, total=len(test_ds)/cfg.train_batch_size) + + with torch.no_grad(): + for index, (mels, labels) in enumerate(dataloader): + + loss, outputs = train.run_batch(model_for_run, mels, labels) + + log_pred.append(torch.clone(outputs.cpu()).detach()) + log_label.append(torch.clone(labels.cpu()).detach()) + return log_pred, log_label + # Do some stuff in the python command line to attach labels to dataframe + + +if __name__ == "__main__": + main() diff --git a/pyha_analyzer/train.py b/pyha_analyzer/train.py index e4c1ca2d..29a6c8e8 100644 --- a/pyha_analyzer/train.py +++ b/pyha_analyzer/train.py @@ -379,6 +379,7 @@ def main(in_sweep=True) -> None: if cfg.early_stopping and early_stopper.early_stop(valid_cmap): logger.info("Early stopping has triggered on epoch %d", epoch) break + return train_dataset, val_dataset, infer_dataset if __name__ == '__main__': torch.multiprocessing.set_sharing_strategy('file_system') diff --git a/pyha_analyzer/utils.py b/pyha_analyzer/utils.py index 1f24360d..bc7d80c0 100644 --- a/pyha_analyzer/utils.py +++ b/pyha_analyzer/utils.py @@ -126,24 +126,36 @@ def get_annotation( if offset: frame_offset += rand_offset() num_frames = int(annotation[conf.duration_col] * sample_rate) - # Load audio - audio = torch.load(Path(cfg.data_path)/file_name) - - if audio.shape[0] > num_frames: - audio = audio[frame_offset:frame_offset+num_frames] - - # Crop if too long - if audio.shape[0] > target_num_samples: - audio = crop_audio(audio, target_num_samples) - # Pad if too short - if audio.shape[0] < target_num_samples: - audio = pad_audio(audio, target_num_samples) + audio = torch.load(Path(cfg.data_path2)/file_name) + except Exception as e: print(e) print(file_name, index) - raise RuntimeError("Bad Audio") from e + audio = torch.zeros(1000) + #raise RuntimeError("Bad Audio") from e + + #print(audio.shape, frame_offset) + + + if audio.shape[-1] < frame_offset: + print(annotation, "TOO LONG") + raise "Bad File - Frame offset larger than annotation" + + if audio.shape[-1] > num_frames: + audio = audio[frame_offset:frame_offset+num_frames] + # Crop if too long + if audio.shape[-1] > target_num_samples: + audio = crop_audio(audio, target_num_samples) + + + # Pad if too short + if audio.shape[-1] < target_num_samples: + audio = pad_audio(audio, target_num_samples) + + + audio = audio.to(conf.prepros_device) target = target.to(conf.prepros_device) return audio, target From aee20263d9bcb2548ab3073a8feb8163d547859c Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Fri, 28 Jun 2024 13:32:11 -0700 Subject: [PATCH 2/5] fix error handling --- pyha_analyzer/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyha_analyzer/utils.py b/pyha_analyzer/utils.py index bc7d80c0..52e83716 100644 --- a/pyha_analyzer/utils.py +++ b/pyha_analyzer/utils.py @@ -140,7 +140,7 @@ def get_annotation( if audio.shape[-1] < frame_offset: print(annotation, "TOO LONG") - raise "Bad File - Frame offset larger than annotation" + audio = torch.zeros(1000) if audio.shape[-1] > num_frames: audio = audio[frame_offset:frame_offset+num_frames] From d31d7eaae6bcaec1590fb6f183d5dd498f88bde9 Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Fri, 12 Jul 2024 15:26:03 -0700 Subject: [PATCH 3/5] feat: add second folder for pt files --- pyha_analyzer/dataset.py | 2 +- pyha_analyzer/infer.py | 455 ++++++++++++++++++------------------ pyha_analyzer/infer_test.py | 22 ++ 3 files changed, 248 insertions(+), 231 deletions(-) create mode 100644 pyha_analyzer/infer_test.py diff --git a/pyha_analyzer/dataset.py b/pyha_analyzer/dataset.py index 9dc5133e..54f1d268 100644 --- a/pyha_analyzer/dataset.py +++ b/pyha_analyzer/dataset.py @@ -288,7 +288,7 @@ def __getitem__(self, index): #-> Any: target = self.samples.loc[index, self.classes].values.astype(np.int32) target = torch.Tensor(target) - return image, target + return image, target, index def get_num_classes(self) -> int: """ Returns number of classes diff --git a/pyha_analyzer/infer.py b/pyha_analyzer/infer.py index 43cd80ad..d504ca11 100644 --- a/pyha_analyzer/infer.py +++ b/pyha_analyzer/infer.py @@ -1,3 +1,7 @@ +import torch +from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler +from tqdm import tqdm +import torch.nn.functional as F # %% import pandas as pd @@ -7,233 +11,224 @@ from pyha_analyzer.models.timm_model import TimmModel import logging -logger = logging.getLogger("acoustic_multiclass_training") - -cfg = config.cfg - -import pandas as pd -df = pd.read_csv("/home/shperry/acoustic-multiclass-training/peru-2019-pyha-anaylzer-inferance.csv") - -# %% - - -classes = [ - "amabaw1", - "amapyo1", - "astgna1", - "baffal1", - "barant1", - "bartin2", - "batman1", - "blacar1", - "blbthr1", - "blcbec1", - "blctro1", - "blfant1", - "blfcot1", - "blfjac1", - "blfnun1", - "blgdov1", - "blhpar1", - "bltant2", - "blttro1", - "bobfly1", - "brratt1", - "bsbeye1", - "btfgle1", - "bubgro2", - "bubwre1", - "bucmot4", - "buffal1", - "butsal1", - "butwoo1", - "chwfog1", - "cinmou1", - "cintin1", - "citwoo1", - "coffal1", - "coltro1", - "compot1", - "cowpar1", - "crfgle1", - "ducatt1", - "ducfly", - "ducgre1", - "duhpar", - "dutant2", - "elewoo1", - "eulfly1", - "fasant1", - "fepowl", - "forela1", - "garkin1", - "gilbar1", - "gnbtro1", - "gocspa1", - "goeant1", - "gogwoo1", - "gramou1", - "grasal3", - "grcfly1", - "greant1", - "greibi1", - "gretin1", - "grfdov1", - "gryant1", - "gryant2", - "gycfly1", - "gycwor1", - "hauthr1", - "horscr1", - "letbar1", - "littin1", - "litwoo2", - "lobwoo1", - "lowant1", - "meapar", - "muswre2", - "olioro1", - "oliwoo1", - "partan1", - "pavpig2", - "pirfly1", - "plbwoo1", - "pltant1", - "pluant1", - "plupig2", - "plwant1", - "puteup1", - "putfru1", - "pygant1", - "rcatan1", - "rebmac2", - "renwoo1", - "rinant2", - "rinkin1", - "rinwoo1", - "royfly1", - "ruboro1", - "rucant2", - "rudpig", - "rufant3", - "ruftof1", - "ruqdov", - "scapig2", - "scbwoo5", - "scrpih1", - "sobcac1", - "specha3", - "spigua1", - "spwant2", - "squcuc1", - "stbwoo2", - "strcuc1", - "strwoo2", - "strxen1", - "stwqua1", - "tabsco1", - "thlwre1", - "undtin1", - "viotro3", - "wespuf1", - "whbtot1", - "whcspa1", - "whfant2", - "whltyr1", - "whnrob1", - "whrsir1", - "whttou1", - "whtwoo2", - "whwbec1", - "wibpip1", - "yectyr1", - "yemfly1", - "yercac1", - "yetwoo2" - ] - - -df["MANUAL ID"] = "yetwoo2" - -# %% -from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler -df = df[["OFFSET", "DURATION", "SourceFile", "MANUAL ID"]] -infer_dataset = PyhaDFDataset(df, train=False, species=classes, cfg=cfg) -infer_dataloader = DataLoader( - infer_dataset, - cfg.validation_batch_size, - shuffle=False, - num_workers=cfg.jobs, - ) - -# %% -print(len(infer_dataset)) - -# %% - -import torch -model_for_run = TimmModel(num_classes=len(classes), - model_name=cfg.model).to(cfg.device) - -if cfg.model_checkpoint != "": - model_for_run.load_state_dict(torch.load(cfg.model_checkpoint)) - -# %% -from tqdm import tqdm -import torch.nn.functional as F -model = model_for_run -model.eval() - -log_pred, log_label = [], [] - -num_valid_samples = int(len(infer_dataloader)) - -# tqdm is a progress bar -dl_iter = tqdm(infer_dataloader, position=5, total=num_valid_samples) - -def run_batch(model: TimmModel, - mels: torch.Tensor, - labels: torch.Tensor, - ): - """ Runs the model on a single batch - Args: - model: the model to pass the batch through - mels: single batch of input data - labels: single batch of expecte output - Returns (tuple of): - loss: the loss of the batch - outputs: the output of the model - """ - mels = mels.to(cfg.device) - labels = labels.to(cfg.device) - outputs = model(mels) - return [0], outputs - -print("ran model") -with torch.no_grad(): - for _, (mels, labels) in enumerate(dl_iter): - try: - _, outputs = run_batch(model, mels, labels) - log_pred.append(torch.clone(outputs.cpu()).detach()) - log_label.append(torch.clone(labels.cpu()).detach()) - except Exception as e: - print(e) - -print("save results") -# sigmoid predictions -log_pred = F.sigmoid(torch.cat(log_pred)).cpu() -print(log_pred.shape) -pd.DataFrame(log_pred, columns=classes).to_csv("result.csv") - -#cmap, smap = map_metric(log_pred, torch.cat(log_label), dataset.class_dist) - -# %% -cfg.data_path2 - -# %% - - -# %% - - - +torch.multiprocessing.set_sharing_strategy('file_system') + +if __name__ == '__main__': + torch.multiprocessing.set_start_method('spawn') + + logger = logging.getLogger("acoustic_multiclass_training") + + cfg = config.cfg + + df = pd.read_csv("/home/shperry/acoustic-multiclass-training/peru-2019-pyha-anaylzer-inferance.csv") + + classes = [ + "amabaw1", + "amapyo1", + "astgna1", + "baffal1", + "barant1", + "bartin2", + "batman1", + "blacar1", + "blbthr1", + "blcbec1", + "blctro1", + "blfant1", + "blfcot1", + "blfjac1", + "blfnun1", + "blgdov1", + "blhpar1", + "bltant2", + "blttro1", + "bobfly1", + "brratt1", + "bsbeye1", + "btfgle1", + "bubgro2", + "bubwre1", + "bucmot4", + "buffal1", + "butsal1", + "butwoo1", + "chwfog1", + "cinmou1", + "cintin1", + "citwoo1", + "coffal1", + "coltro1", + "compot1", + "cowpar1", + "crfgle1", + "ducatt1", + "ducfly", + "ducgre1", + "duhpar", + "dutant2", + "elewoo1", + "eulfly1", + "fasant1", + "fepowl", + "forela1", + "garkin1", + "gilbar1", + "gnbtro1", + "gocspa1", + "goeant1", + "gogwoo1", + "gramou1", + "grasal3", + "grcfly1", + "greant1", + "greibi1", + "gretin1", + "grfdov1", + "gryant1", + "gryant2", + "gycfly1", + "gycwor1", + "hauthr1", + "horscr1", + "letbar1", + "littin1", + "litwoo2", + "lobwoo1", + "lowant1", + "meapar", + "muswre2", + "olioro1", + "oliwoo1", + "partan1", + "pavpig2", + "pirfly1", + "plbwoo1", + "pltant1", + "pluant1", + "plupig2", + "plwant1", + "puteup1", + "putfru1", + "pygant1", + "rcatan1", + "rebmac2", + "renwoo1", + "rinant2", + "rinkin1", + "rinwoo1", + "royfly1", + "ruboro1", + "rucant2", + "rudpig", + "rufant3", + "ruftof1", + "ruqdov", + "scapig2", + "scbwoo5", + "scrpih1", + "sobcac1", + "specha3", + "spigua1", + "spwant2", + "squcuc1", + "stbwoo2", + "strcuc1", + "strwoo2", + "strxen1", + "stwqua1", + "tabsco1", + "thlwre1", + "undtin1", + "viotro3", + "wespuf1", + "whbtot1", + "whcspa1", + "whfant2", + "whltyr1", + "whnrob1", + "whrsir1", + "whttou1", + "whtwoo2", + "whwbec1", + "wibpip1", + "yectyr1", + "yemfly1", + "yercac1", + "yetwoo2" + ] + + + df["MANUAL ID"] = "yetwoo2" + + # %% + df = df[["OFFSET", "DURATION", "SourceFile", "MANUAL ID"]] + infer_dataset = PyhaDFDataset(df, train=False, species=classes, cfg=cfg) + infer_dataloader = DataLoader( + infer_dataset, + cfg.validation_batch_size, + shuffle=False, + num_workers=cfg.jobs, + ) + + infer_dataset.samples.to_csv("_DATA_TO_RUN_ON.csv") + + model_for_run = TimmModel(num_classes=len(classes), + model_name=cfg.model).to(cfg.device) + + if cfg.model_checkpoint != "": + model_for_run.load_state_dict(torch.load(cfg.model_checkpoint)) + + # %% + model = model_for_run + model.eval() + + log_pred, log_label, idx_label = [], [], [] + + num_valid_samples = int(len(infer_dataloader)) + + # tqdm is a progress bar + dl_iter = tqdm(infer_dataloader, position=5, total=num_valid_samples) + + def run_batch(model: TimmModel, + mels: torch.Tensor, + labels: torch.Tensor, + ): + """ Runs the model on a single batch + Args: + model: the model to pass the batch through + mels: single batch of input data + labels: single batch of expecte output + Returns (tuple of): + loss: the loss of the batch + outputs: the output of the model + """ + mels = mels.to(cfg.device) + labels = labels.to(cfg.device) + outputs = model(mels) + return [0], outputs + + print("ran model") + + with torch.no_grad(): + for count, (mels, labels, idx) in enumerate(dl_iter): + try: + _, outputs = run_batch(model, mels, labels) + log_pred.append(torch.clone(outputs.cpu()).detach()) + idx_label.append(idx) + except Exception as e: + print(e) + + if count % 10_000 == 0: + + #print("save results") + # sigmoid predictions + log_pred_temp = F.sigmoid(torch.cat(log_pred)).cpu() + #print(log_pred_temp.shape) + idx_labels = torch.cat(idx_label) + #print(idx_labels) + df_results = pd.DataFrame(log_pred_temp, columns=classes) + df_results["index"] = idx_labels.numpy() + df_results.to_csv(f"result_test_{count}.csv") + df_results = [] + + log_pred, log_label, idx_label = [], [], [] \ No newline at end of file diff --git a/pyha_analyzer/infer_test.py b/pyha_analyzer/infer_test.py new file mode 100644 index 00000000..c440ebf6 --- /dev/null +++ b/pyha_analyzer/infer_test.py @@ -0,0 +1,22 @@ + +# %% +import pandas as pd +from pyha_analyzer.train import run_batch +from pyha_analyzer.dataset import get_datasets, make_dataloaders, PyhaDFDataset +from pyha_analyzer import config +from pyha_analyzer.models.timm_model import TimmModel +import logging + +logger = logging.getLogger("acoustic_multiclass_training") + +cfg = config.cfg + +import torch +model_for_run = TimmModel(num_classes=132, + model_name=cfg.model).to(cfg.device) + +print(next(model_for_run.parameters()).device) + +for i in range(torch.cuda.device_count()): + device_name = f'cuda:{i}' + print(f'{i} device name:{torch.cuda.get_device_name(torch.device(device_name))}') \ No newline at end of file From 148c44215edda0e8b4f5119acc807f18182e4ca4 Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Fri, 12 Jul 2024 15:26:14 -0700 Subject: [PATCH 4/5] feat: inferance script --- pyha_analyzer/infer.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pyha_analyzer/infer.py b/pyha_analyzer/infer.py index d504ca11..a9b60265 100644 --- a/pyha_analyzer/infer.py +++ b/pyha_analyzer/infer.py @@ -231,4 +231,16 @@ def run_batch(model: TimmModel, df_results.to_csv(f"result_test_{count}.csv") df_results = [] - log_pred, log_label, idx_label = [], [], [] \ No newline at end of file + log_pred, log_label, idx_label = [], [], [] + #print("save results") + # sigmoid predictions + log_pred_temp = F.sigmoid(torch.cat(log_pred)).cpu() + #print(log_pred_temp.shape) + idx_labels = torch.cat(idx_label) + #print(idx_labels) + df_results = pd.DataFrame(log_pred_temp, columns=classes) + df_results["index"] = idx_labels.numpy() + df_results.to_csv(f"result_test_{count}.csv") + df_results = [] + + log_pred, log_label, idx_label = [], [], [] \ No newline at end of file From ea24ae6c8997fa6addce3ca91246cbd6ab36638e Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Mon, 15 Jul 2024 16:36:47 -0700 Subject: [PATCH 5/5] remove channel code --- pyha_analyzer/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyha_analyzer/dataset.py b/pyha_analyzer/dataset.py index 54f1d268..d9397e05 100644 --- a/pyha_analyzer/dataset.py +++ b/pyha_analyzer/dataset.py @@ -256,7 +256,7 @@ def to_image(self, audio): # Sigmoid to get 0 to 1 scaling (0.5 becomes mean) mel = torch.sigmoid(mel) - return mel.unsqueeze(0) #torch.stack([mel, mel, mel]) + return torch.stack([mel, mel, mel]) def __getitem__(self, index): #-> Any: """ Takes an index and returns tuple of spectrogram image with corresponding label