Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

spectrum analysis examples #98

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions caits/fe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
power_to_db,
spectrogram,
stft,
fft_frequencies
)
from ._statistical import (
average_power,
Expand Down Expand Up @@ -93,6 +94,7 @@
"power_to_db",
"spectrogram",
"stft",
"fft_frequencies",
"average_power",
"central_moments",
"crest_factor",
Expand Down
139 changes: 116 additions & 23 deletions caits/fe/_spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ def stft(
out: Optional[np.ndarray] = None,
) -> np.ndarray:
"""
Note:
The functionality in this implementation are basically derived from
librosa v0.10.1:
https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py

Args:
y:
Expand All @@ -51,9 +55,6 @@ def stft(
Returns:

"""
# The functionality in this implementation are basically derived from
# librosa v0.10.1:
# https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py

# By default, use the entire frame
if win_length is None:
Expand Down Expand Up @@ -216,9 +217,14 @@ def istft(
length: Optional[int] = None,
out: Optional[np.ndarray] = None,
) -> np.ndarray:
# The functionality in this implementation are basically derived from
# librosa v0.10.1:
# https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py
"""
Note:
The functionality in this implementation are basically derived from
librosa v0.10.1:
https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py

"""


if n_fft is None:
n_fft = 2 * (stft_matrix.shape[-2] - 1)
Expand Down Expand Up @@ -351,6 +357,49 @@ def spectrogram(
center: bool = True,
pad_mode: _PadModeSTFT = "constant",
) -> Tuple[np.ndarray, int]:
"""Retrieves a magnitude spectrogram.

This is primarily used in feature extraction functions that can operate on
either audio time-series or spectrogram input.

Note:
The functionality in this implementation are basically derived from
librosa v0.10.1:
https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py

Args:
y: (np.ndarray) Audio time-series.
S: (np.ndarray) Spectrogram input, optional.
n_fft: (int) STFT window size.
hop_length: (int) STFT hop length.
power: (float) Exponent for the magnitude spectrogram,
e.g., 1 for energy, 2 for power, etc.
win_length: (int) Each frame of audio is windowed by `window`.
The window will be of length `win_length` and then padded
with zeros to match `n_fft`.

If unspecified, defaults to `win_length = n_fft`.
window: (string, tuple, number, function, or np.ndarray)
- a window specification (string, tuple, or number);
see `scipy.signal.get_window`
- a window function, such as `scipy.signal.windows.hann`
- a vector or array of length `n_fft`
center: (boolean)
- If `True`, the signal `y` is padded so that frame
`t` is centered at `y[t * hop_length]`.
- If `False`, then frame `t` begins at `y[t * hop_length]`
pad_mode: (string) If `center=True`, the padding mode to use
at the edges of the signal. By default, STFT uses zero padding.

Returns:
S_out: (np.ndarray)
- If `S` is provided as input, then `S_out == S`
- Else, `S_out = |stft(y, ...)|**power`
n_fft: (int)
- If `S` is provided, then `n_fft` is inferred from `S`
- Else, copied from input
"""

if S is not None:
# Infer n_fft from spectrogram shape, but only if it mismatches
if n_fft is None or n_fft // 2 + 1 != S.shape[-2]:
Expand Down Expand Up @@ -506,9 +555,13 @@ def power_to_db(
amin: float = 1e-10,
top_db: Optional[float] = 80.0,
) -> np.ndarray:
# The functionality in this implementation are basically derived from
# librosa v0.10.1:
# https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py
"""
Note:
The functionality in this implementation are basically derived from
librosa v0.10.1:
https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py
"""


S = np.asarray(S)

Expand Down Expand Up @@ -543,11 +596,17 @@ def power_to_db(
return log_spec


def db_to_power(S_db: np.ndarray, *, ref: float = 1.0) -> np.ndarray:
# The functionality in this implementation is basically derived from
# librosa v0.10.1:
# https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py
"""Convert a dB-scale spectrogram to a power spectrogram.
def db_to_power(
S_db: np.ndarray,
*,
ref: float = 1.0
) -> np.ndarray:
"""Converts a dB-scale spectrogram to a power spectrogram.

Note:
The functionality in this implementation is basically derived from
librosa v0.10.1:
https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py

This effectively inverts ``power_to_db``::

Expand All @@ -563,10 +622,12 @@ def amplitude_to_db(
amin: float = 1e-5,
top_db: Optional[float] = 80.0,
) -> np.ndarray:
# The functionality in this implementation is basically derived from
# librosa v0.10.1:
# https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py
"""Convert an amplitude spectrogram to dB-scaled spectrogram.
"""Converts an amplitude spectrogram to dB-scaled spectrogram.

Note:
The functionality in this implementation is basically derived from
librosa v0.10.1:
https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py

This is equivalent to ``power_to_db(S**2, ref=ref**2, amin=amin**2, top_db=top_db)``,
but is provided for convenience.
Expand Down Expand Up @@ -594,14 +655,46 @@ def amplitude_to_db(
return power_to_db(power, ref=ref_value ** 2, amin=amin ** 2, top_db=top_db)


def db_to_amplitude(S_db: np.ndarray, *, ref: float = 1.0) -> np.ndarray:
"""Convert a dB-scaled spectrogram to an amplitude spectrogram.
def db_to_amplitude(
S_db: np.ndarray,
*,
ref: float = 1.0
) -> np.ndarray:
"""Converts a dB-scaled spectrogram to an amplitude spectrogram.

Note:
The functionality in this implementation is basically derived from
librosa v0.10.1:
https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py

This effectively inverts `amplitude_to_db`::

db_to_amplitude(S_db) ~= 10.0**(0.5 * S_db/10 + log10(ref))
"""
# The functionality in this implementation is basically derived from
# librosa v0.10.1:
# https://github.com/librosa/librosa/blob/main/librosa/core/spectrum.py
return db_to_power(S_db, ref=ref ** 2) ** 0.5



def fft_frequencies(
*,
sr: float = 22050,
n_fft: int = 2048
) -> np.ndarray:
"""This is an alternative implementation of `np.fft.fftfreq`
with a predefined window length and the sample spacing calculated as
1 / sampling rate.

Args:
sr: Signal sampling rate as integer.
n_fft: FFT window size as integer.

Returns:
np.ndarray: Frequencies ``(0, sr/n_fft, 2*sr/n_fft, ..., sr/2)``

Examples:
>>> fft_frequencies(sr=22050, n_fft=16)
array([ 0., 1400.17, 2800.24, 4200.83,
5600.89, 7000.03, 8400.48, 9800.92, 11200.38 ])

"""
return np.fft.rfftfreq(n=n_fft, d=1.0 / sr)
275 changes: 275 additions & 0 deletions examples/spectrum_transform.ipynb

Large diffs are not rendered by default.