-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
1,249 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,21 @@ | ||
{ | ||
"image": "ghcr.io/ucsd-e4e/aid-audio-sim" | ||
"image": "ghcr.io/ucsd-e4e/aid-audio-sim", | ||
"runArgs": ["--platform=linux/amd64" ], | ||
"postCreateCommand": "pip install -r requirements.txt", | ||
"customizations": { | ||
"vscode": { | ||
"extensions": [ | ||
"ms-python.vscode-pylance", | ||
"ms-python.python", | ||
"ms-toolsai.jupyter", | ||
"njpwerner.autodocstring", | ||
"ms-python.isort", | ||
"eamodio.gitlens", | ||
"VisualStudioExptTeam.vscodeintellicode", | ||
"github.vscode-github-actions", | ||
"ms-python.black-formatter", | ||
"ms-azuretools.vscode-docker" | ||
] | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import quaternion | ||
|
||
import habitat_sim.sim | ||
import numpy as np | ||
from scipy.io import wavfile | ||
|
||
|
||
backend_cfg = habitat_sim.SimulatorConfiguration() | ||
backend_cfg.scene_id = "data/scene_datasets/mp3d/17DRP5sb8fy/17DRP5sb8fy.glb" | ||
backend_cfg.scene_dataset_config_file = "data/scene_datasets/mp3d/mp3d.scene_dataset_config.json" | ||
backend_cfg.load_semantic_mesh = True | ||
backend_cfg.enable_physics = False | ||
|
||
agent_cfg = habitat_sim.agent.AgentConfiguration() | ||
|
||
cfg = habitat_sim.Configuration(backend_cfg, [agent_cfg]) | ||
sim = habitat_sim.Simulator(cfg) | ||
|
||
audio_sensor_spec = habitat_sim.AudioSensorSpec() | ||
audio_sensor_spec.uuid = "audio_sensor" | ||
audio_sensor_spec.enableMaterials = True # make sure _semantic.ply file is in the scene folder | ||
audio_sensor_spec.channelLayout.type = habitat_sim.sensor.RLRAudioPropagationChannelLayoutType.Mono | ||
audio_sensor_spec.channelLayout.channelCount = 1 | ||
audio_sensor_spec.position = [0.0, 1.5, 0.0] | ||
audio_sensor_spec.acousticsConfig.sampleRate = 16000 | ||
audio_sensor_spec.acousticsConfig.indirect = True | ||
sim.add_sensor(audio_sensor_spec) | ||
|
||
audio_sensor = sim.get_agent(0)._sensors["audio_sensor"] | ||
audio_sensor.setAudioSourceTransform(np.array([-8.56, 1.5, 0.50])) | ||
audio_sensor.setAudioMaterialsJSON("data/mp3d_material_config.json") | ||
agent = sim.get_agent(0) | ||
new_state = sim.get_agent(0).get_state() | ||
new_state.position = np.array([-10.57, 0, -0.25]) | ||
new_state.sensor_states = {} | ||
agent.set_state(new_state, True) | ||
obs = np.array(sim.get_sensor_observations()["audio_sensor"]) | ||
wavfile.write('data/output.wav', 16000, obs.T) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
librosa | ||
ipykernel |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
# %% [markdown] | ||
# # HELLO! | ||
# | ||
# $ x + 1 $ | ||
|
||
# %% | ||
from magnum import Vector3 | ||
|
||
# %% | ||
#%%capture | ||
import os | ||
import quaternion | ||
import habitat_sim.sim | ||
import numpy as np | ||
from scipy.io import wavfile | ||
|
||
|
||
os.chdir('/home/e4e-student/soundspaces/habitat-sim') | ||
dataset = 'mp3d' # or replace with 'mp3d', one example for each dataset | ||
|
||
backend_cfg = habitat_sim.SimulatorConfiguration() | ||
if dataset == 'mp3d': | ||
backend_cfg.scene_id = "data/scene_datasets/forests/test/forest.glb" | ||
# IMPORTANT: missing this file will lead to load the semantic scene incorrectly | ||
backend_cfg.scene_dataset_config_file = "data/scene_datasets/mp3d/mp3d.scene_dataset_config.json" | ||
else: | ||
backend_cfg.scene_id = "data/scene-datasets/forests/test/forest.glb" | ||
# IMPORTANT: missing this file will lead to load the semantic scene incorrectly | ||
backend_cfg.scene_dataset_config_file = "sound-spaces/data/scene_datasets/dataset_0/test_dataset_0.scene_dataset_config.json" | ||
backend_cfg.load_semantic_mesh = True | ||
backend_cfg.enable_physics = False | ||
agent_config = habitat_sim.AgentConfiguration() | ||
cfg = habitat_sim.Configuration(backend_cfg, [agent_config]) | ||
sim = habitat_sim.Simulator(cfg) | ||
|
||
#set navmesh path for searching for navigable points | ||
if dataset == 'mp3d': | ||
sim.pathfinder.load_nav_mesh(os.path.join(f"data/scene_datasets/forests/test/forest.navmesh")) | ||
else: | ||
sim.pathfinder.load_nav_mesh(os.path.join(f"data/scene_datasets/gibson/Oyens.navmesh")) | ||
|
||
audio_sensor_spec = habitat_sim.AudioSensorSpec() | ||
audio_sensor_spec.uuid = "audio_sensor" | ||
audio_sensor_spec.enableMaterials = True | ||
audio_sensor_spec.channelLayout.channelType = habitat_sim.sensor.RLRAudioPropagationChannelLayoutType.Binaural | ||
audio_sensor_spec.channelLayout.channelCount = 1 | ||
# audio sensor location set with respect to the agent | ||
audio_sensor_spec.position = Vector3(0.0, 1.5, 0.0) # audio sensor has a height of 1.5m | ||
audio_sensor_spec.acousticsConfig.sampleRate = 48000 | ||
# whether indrect (reverberation) is present in the rendered IR | ||
audio_sensor_spec.acousticsConfig.indirect = True | ||
sim.add_sensor(audio_sensor_spec) | ||
|
||
audio_sensor = sim.get_agent(0)._sensors["audio_sensor"] | ||
# NOTE: got this from https://github.com/facebookresearch/rlr-audio-propagation/blob/main/RLRAudioPropagationPkg/data/mp3d_material_config.json | ||
audio_sensor.setAudioMaterialsJSON("data/mp3d_material_config.json") | ||
|
||
# %% | ||
|
||
|
||
# %% | ||
# sampled navigable point is on the floor | ||
source_pos = Vector3(0,0,0)#sim.pathfinder.get_random_navigable_point() | ||
print('Sample source location: ', source_pos) | ||
|
||
# %% | ||
print("hi") | ||
|
||
# %% | ||
height = Vector3(0,1.5,0) | ||
agent_pos = Vector3(2,0,0) | ||
|
||
# %% | ||
audio_sensor.setAudioSourceTransform(source_pos + height) # add 1.5m to the height calculation | ||
|
||
# %% | ||
agent = sim.get_agent(0) | ||
new_state = sim.get_agent(0).get_state() | ||
|
||
# %% | ||
#audio_sensor.setAudioSourceTransform(source_pos + height) | ||
|
||
new_state.position = source_pos + agent_pos | ||
new_state.sensor_states = {} | ||
agent.set_state(new_state, True) | ||
#print(sim, flush=True) | ||
sim.get_sensor_observations() | ||
ir = np.array(sim.get_sensor_observations()["audio_sensor"]) #BREAKS HERE TODO FIX MODEL MAYBE? | ||
print(ir.shape) | ||
|
||
# one a category is not found in the material mapping file, the default acoustic material will be used. | ||
|
||
# %% | ||
# This bit of code crashes. Unsure why, trying to review this | ||
#sim.get_sensor_observations() | ||
|
||
# %% | ||
# check if the direct sound is present (source is visibile from the listener) | ||
#audio_sensor.sourceIsVisible() | ||
# Note this does not appear to exist in newer habitat sim... | ||
|
||
# %% | ||
# check the efficiency of rendering, outdoor would have a very low value, e.g. < 0.05, | ||
# while a closed indoor room would have >0.95, and a room with some holes might be in the 0.1-0.8 range. | ||
# if the ray efficiency is low for an indoor environment, it indicates a lot of ray leak from holes | ||
# you should repair the mesh in this case for more accurate acoustic rendering | ||
# audio_sensor.getRayEfficiency() | ||
# Note this does not appear to exist in newer habitat sim... | ||
|
||
# %% | ||
# plot the waveform of IR and show the audio | ||
from librosa.display import waveshow, specshow | ||
import IPython | ||
|
||
waveshow(ir[0, :10000], sr=48000) | ||
IPython.display.Audio(ir, rate=48000) | ||
|
||
# %% | ||
#!pip install librosa | ||
|
||
# %% | ||
# one example for how to use IR data to get the reverberant speech | ||
import librosa | ||
#sr, vocal = wavfile.read('res/singing.wav') | ||
vocal, sr = librosa.load(path="/home/e4e-student/soundspaces/sound-spaces/examples/XC150592 - Screaming Piha - Lipaugus vociferans.mp3") | ||
print(sr, vocal.shape) | ||
IPython.display.Audio(vocal, rate=sr) | ||
|
||
# %% | ||
from scipy.signal import fftconvolve | ||
|
||
# convolve the vocal with IR | ||
convolved_vocal = np.array([fftconvolve(vocal, ir_channel) for ir_channel in ir]) | ||
IPython.display.Audio(convolved_vocal, rate=sr) | ||
|
||
# %% | ||
convolved_vocal.shape, vocal.shape | ||
|
||
# %% | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
|
||
y = vocal | ||
y_ = convolved_vocal | ||
|
||
fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True) | ||
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max) | ||
D_ = librosa.amplitude_to_db(np.abs(librosa.stft(y_.mean(axis=0))), ref=np.max) | ||
|
||
img = librosa.display.specshow(D, y_axis='linear', x_axis='time', | ||
|
||
sr=sr, ax=ax[0]) | ||
|
||
img = librosa.display.specshow(D_, y_axis='linear', x_axis='time', | ||
|
||
sr=sr, ax=ax[1]) | ||
|
||
ax[0].set(title='Linear-frequency power spectrogram') | ||
|
||
ax[0].label_outer() | ||
|
||
# %% | ||
import matplotlib.pyplot as plt | ||
|
||
y, sr = librosa.load(librosa.ex('choice'), duration=15) | ||
|
||
fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True) | ||
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max) | ||
|
||
img = librosa.display.specshow(D, y_axis='linear', x_axis='time', | ||
|
||
sr=sr, ax=ax[0]) | ||
|
||
ax[0].set(title='Linear-frequency power spectrogram') | ||
|
||
ax[0].label_outer() | ||
|
||
# %% | ||
from pyroomacoustics.experimental.rt60 import measure_rt60 | ||
|
||
rt60 = measure_rt60(ir[0], sr, decay_db=30, plot=True) | ||
print(f'RT60 of the rendered IR is {rt60:.4f} seconds') | ||
|
||
# %% | ||
|
||
|
||
|