import numpy as np # This is the main numerical library we will use
import matplotlib.pyplot as plt # This is the main plotting library we will use
import scipy.io.wavfile as wavfile # We will use this library to load in audio
import IPython.display as ipd # This is a library that allows us to play audio samples in Jupyter


# Load in the audio with wavfile.read.  It gives us both
# the sample rate (fs) and the audio samples as a numpy array (x)
fs, x = wavfile.read("happybirthday.wav")

plt.subplot(2, 1, 1)
plt.plot(x)
plt.title("Full Audio Waveform of \"Happy Birthday\"")
plt.subplot(2, 1, 2)
y = x[54000:70000]
plt.plot(y)
plt.title("\"Birth\" Only")
plt.tight_layout()
ipd.Audio(x, rate=fs)

#print("The sample rates is %i samples/second"%fs)


## Play twice as fast
ipd.Audio(x, rate=int(fs/2))


# "Decimate" in time by a factor of 12 (take 1 out of every 12 samples) to demo aliasing
fac = 12 ## Downsample factor
y = x[0::fac]
plt.plot(y)
ipd.Audio(y, rate=fs/fac)


y = x[::-1] #Reverse audio
ipd.Audio(y, rate=fs)


# TODO: Change to use formula for halfsteps
h = 0 # Number of halfsteps away from concert A
freq = 660*(2**(h/12))
print(freq)
fs = 44100  # 44100 samples per second
seconds = 1  # Note duration of 1 seconds

# Generate array with seconds*sample_rate steps, ranging between 0 and seconds
t = np.linspace(0, seconds, seconds * fs)

# Generate a 440 Hz sine wave
y = np.sin(2*np.pi*freq*t)
plt.figure(figsize=(8, 4))
plt.plot(t, y)
plt.xlim([0, 0.05]) # Show the first 50 milliseconds
plt.xlabel("Seconds")
plt.ylabel("Value")
plt.title("660hz Frequency")
plt.savefig("660.svg", bbox_inches='tight')
ipd.Audio(y, rate=fs)
wavfile.write("660.wav", fs, y)

660.0


## Class exercise: Play the major triad notes A, C#, E in a sequence
fs = 44100
y = np.zeros(fs*3)
t = np.linspace(0, 1, fs)
fA = 440
fCSharp = 440*(2**(4/12)) # A C# is 4 halfsteps above A
fE = 440*(2**(7/12)) # An E is 7 halfsteps above A
y[0:fs] = np.sin(2*np.pi*fA*t)
y[fs:fs*2] = np.sin(2*np.pi*fCSharp*t)
y[fs*2:fs*3] = np.sin(2*np.pi*fE*t)
ipd.Audio(y, rate=fs)


fs = 44100
t = np.linspace(0, 1, fs)
y = np.zeros(fs)
y = np.sin(2*np.pi*fA*t)
y = y + np.sin(2*np.pi*fCSharp*t)
y = y + np.sin(2*np.pi*fE*t)
ipd.Audio(y, rate=fs)


def make_sinusoid(note, fs, duration):
    """
    Parameters
    ----------
    note: int
        The number of halfsteps away from concert A
    fs: int
        The sample rate
    duration: float
        The number of seconds elapsed
        
    Returns
    -------
    ndarray(N)
        An array of samples from an appropriate sinusoid
    """
    ## Step 1: Setup time samples
    t = np.linspace(0, duration, int(fs*duration))
    ## Step 2: Figure out appropriate frequency
    f = 440*(2**(note/12))
    ## Step 3: Make the array with the appropriate sine
    y = np.sin(2*np.pi*f*t)
    return y


fs = 44100
xAHalf = make_sinusoid(0, fs, 0.5) # A that's a half a second long
xCSHalf = make_sinusoid(4, fs, 0.5) # C# that's a half a second long
xEHalf = make_sinusoid(7, fs, 0.5)
xSpace = np.zeros(int(fs*0.1)) ## Put a tenth of a second space
x = np.concatenate((xAHalf, xSpace, xAHalf, xSpace, xCSHalf, xSpace, xEHalf, xSpace, xEHalf))
ipd.Audio(x, rate=fs)


x = np.concatenate((np.array([0, 1, 2]), np.array([3, 4, 5])))
print(x)

[0 1 2 3 4 5]


## Just a quick demo of what int() does to a decimal number, 
# based on a question in class.  It always rounds down!
int(5.7)

5


np.mod(np.arange(50), 5)

array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1,
       2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3,
       4, 0, 1, 2, 3, 4])


# Show sawtooth wave
fs = 44100
t = np.arange(fs)
x = np.mod(t, 100)
plt.plot(x[0:1000])
ipd.Audio(x, rate=fs)


fs, x = wavfile.read("happybirthday.wav")
ipd.Audio(x, rate=fs)


y = np.array(x) # Copy over x
delay = int(fs/4)
y[delay::] = y[delay::] + y[0:-delay]
ipd.Audio(y, rate=fs)


h = np.zeros(fs)
## Impulse response that we create by 
## setting every 6000th sample to 1
h[0::4000] = 1  
plt.plot(h)
plt.title("h (Echo signal)")
y = np.convolve(x, h)
ipd.Audio(y, rate=fs)


## Show basic lowpass filter
h = np.ones(4000)
y = np.convolve(x, h, 'valid')
ipd.Audio(y, rate=fs)


fs = 44100
x = np.random.randn(fs*3)
plt.plot(x[0:1000])
ipd.Audio(x, rate=fs)


fs = 44100
x = np.random.randn(fs*3)
h = np.ones(1000)
y = np.convolve(x, h)
plt.plot(y[0:1000])
ipd.Audio(y, rate=fs)


fs = 44100
x = np.random.randn(fs)
h = np.zeros(fs)
h[0::75] = 1  ## Impulse response
y = np.convolve(x, h)
plt.subplot(2, 1, 1)
plt.plot(h[0:1000])
plt.subplot(2, 1, 2)
plt.plot(y[fs:fs+500])
ipd.Audio(y, rate=fs)


def plot_fourier_mag(x, fs):
    """
    Given audio samples and the sample rate, plot
    the magnitude of the Fourier transform of x with 
    the appropriate frequency labels
    Parameters
    ----------
    x: ndarray(N)
        The audio samples
    fs: int
        The sample rate in hz
    """
    xft = np.abs(np.fft.fft(x))
    freqs = np.fft.fftfreq(len(x), 1/fs)
    plt.plot(freqs[freqs > 0], xft[freqs > 0])
    plt.xlabel("Frequency")
    plt.ylabel("Magnitude")
    return xft


## TODO: Add another frequency, 
t = np.arange(fs)
freq = 440  
fs = 44100  
seconds = 1

freq2 = 440*2**(7/12)
freq3 = 440*2**(4/12)

# Generate array with seconds*sample_rate steps, ranging between 0 and seconds
t = np.linspace(0, seconds, seconds * fs, False)
x = np.sin(2*np.pi*freq*t)
x = x + np.sin(2*np.pi*freq2*t)
x = x + np.sin(2*np.pi*freq3*t)

# Take the fourier transform of x
plot_fourier_mag(x, fs)
plt.xlim([0, 2000])

ipd.Audio(x, rate=fs)


plt.plot(x[0:2000])

[<matplotlib.lines.Line2D at 0x7fe6047b00a0>]


fs = 44100
T = 100
x = np.arange(fs)
x = np.mod(x, T)

plt.subplot(2, 1, 1)
plt.plot(x[0:1000])
plt.title("Raw signal")
plt.subplot(2, 1, 2)
plot_fourier_mag(x, fs)
plt.xlim([0, 8000])
plt.title("Fourier Transform")
plt.tight_layout()

ipd.Audio(x, rate=fs)


## TODO: Look at sinusoidal approximation of sawtooth wave
fs = 44100  
seconds = 1
freq = 440
t = np.linspace(0, seconds, seconds * fs, False)
y = -np.sin(2*np.pi*freq*t)
y = y - (1.0/2.0)*np.sin(2*np.pi*2*freq*t)
y = y - (1.0/3.0)*np.sin(2*np.pi*3*freq*t)
y = y - (1.0/4.0)*np.sin(2*np.pi*4*freq*t)
y = y - (1.0/5.0)*np.sin(2*np.pi*5*freq*t)
y = y - (1.0/6.0)*np.sin(2*np.pi*6*freq*t)
y = y - (1.0/7.0)*np.sin(2*np.pi*7*freq*t)

plt.subplot(211)
plt.plot(y[0:1000])
plt.title("Sawtooth approximation of sinusoid")
plt.subplot(212)
plot_fourier_mag(y, fs)
plt.xlim([0, 8000])
plt.title("Discrete Fourier Transform Magnitude")
plt.tight_layout()
ipd.Audio(y, rate=fs)


## Do the same thing with a for loop
y = np.zeros(fs)
n_harmonics = 6
t = np.linspace(0, 1, fs)
freq = 440
for i in range(1, n_harmonics+1):
    coeff = (1.0/i)*(-1)**(i+1)
    y = y + coeff*np.sin(2*np.pi*freq*i*t)

plt.subplot(211)
plt.plot(y[0:1000])
plt.title("Sawtooth approximation of sinusoid")
plt.subplot(212)
plot_fourier_mag(y, fs)
plt.xlim([0, 8000])
plt.title("Discrete Fourier Transform Magnitude")
plt.tight_layout()
ipd.Audio(y, rate=fs)


# look at speech, look at noise
# Show effect of lowpass filter
fs, x = wavfile.read("gameover.wav")

plt.subplot(2, 1, 1)
plt.plot(x)
plt.title("Audio")
plt.subplot(2, 1, 2)
xft = plot_fourier_mag(x, fs)
plt.xlim([0, 8000])
plt.title("Fourier Magnitude")
#plt.yscale("log")
plt.ylim([10, np.max(xft)])
plt.tight_layout()
ipd.Audio(x, rate=fs)


fs, x = wavfile.read("gameover.wav")
x = np.convolve(np.ones(1000), x)

plt.subplot(2, 1, 1)
plt.plot(x)
plt.title("Audio")
plt.subplot(2, 1, 2)
xft = plot_fourier_mag(x, fs)
plt.xlim([0, 8000])
plt.title("Fourier Magnitude")
#plt.yscale("log")
plt.ylim([10, np.max(xft)])
plt.tight_layout()
ipd.Audio(x, rate=fs)

Audio Waveform Processing in Python¶

Chris Tralie¶

Setting up the environment¶

Audio Waveforms¶

Sinusoids¶

Echoes / Convolution¶

Noise / Comb Filters¶

The Discrete Fourier Transform¶

$ \left( \frac{44100 \text{samples}}{1 \text{second}} \right) \left( \frac{1 \text{cycle}}{100 \text{samples}} \right) = \frac{441 \text{cycles}}{1 \text{second}} $