Sound source: University of Iowa Electronic Music Studios

Listen to these three notes on the piano

Image modified from: piano-music-theory.com

A3
A4
B4

import stuff
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import aifc
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()  # datetime converter for a matplotlib
import seaborn as sns
sns.set(style="ticks", font_scale=1.5)

import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pio.templates.default = "presentation"
import math
import scipy
from scipy.io import wavfile

# %matplotlib widget
define useful functions
def open_aiff(filename):
    data = aifc.open(filename)
    Nframes = data.getnframes()
    str_signal = data.readframes(Nframes)
    # d = np.fromstring(str_signal, numpy.short).byteswap()
    d = np.frombuffer(str_signal, dtype=np.int16).byteswap()
    # if there are two channels, take only one of them
    if data.getnchannels() == 2:
        d = d[::2]
    N = len(d)
    dt = 1.0 / data.getframerate()
    time = np.arange(N) * dt
    return time, d

def open_wav_return_fft(filename, max_freq=None):
    sample_rate, signal = wavfile.read(filename)
    time = np.arange(len(signal)) / sample_rate
    dt = time[1] - time[0]
    N = len(time)
    signal = normalize(signal)
    fft = scipy.fft.fft(signal) / N
    k = scipy.fft.fftfreq(N, dt)
    fft_abs = np.abs(fft)
    if max_freq == None:
        max_freq = k.max()
    dk = 1 / (time.max() - time.min())
    n = int(max_freq/dk)
    return k[:n], fft_abs[:n]


def normalize(signal):
    return (signal - signal.mean()) / signal.std()

def fft_abs(signal, time, max_freq=None):
    dt = time[1] - time[0]
    N = len(time)
    fft = scipy.fft.fft(signal) / N
    k = scipy.fft.fftfreq(N, dt)
    fft_abs = np.abs(fft)
    if max_freq == None:
        max_freq = k.max()
    dk = 1 / (time.max() - time.min())
    n = int(max_freq/dk)
    return k[:n], fft_abs[:n]
    
load audio files
tmin = 2.00
tmax = 2.20

timeA4, pianoA4 = open_aiff('files/Piano.mf.A4.aiff')
indices = np.where((timeA4 > tmin) & (timeA4 < tmax))
timeA4 = timeA4[indices]
pianoA4 = normalize(pianoA4[indices])

timeA3, pianoA3 = open_aiff('files/Piano.mf.A3.aiff')
indices = np.where((timeA3 > tmin) & (timeA3 < tmax))
timeA3 = timeA3[indices]
pianoA3 = normalize(pianoA3[indices])

timeB4, pianoB4 = open_aiff('files/Piano.mf.B4.aiff')
indices = np.where((timeB4 > tmin) & (timeB4 < tmax))
timeB4 = timeB4[indices]
pianoB4 = normalize(pianoB4[indices])
plot time series for 3 notes
# blue, orange, green, red, purple, brown, pink, gray, olive, cyan = px.colors.qualitative.D3[:]

color_A3 = "#e7c535"
color_A4 = "#ff00c3"
color_B4 = "#327fce"

fig = make_subplots()

fig.add_trace(
    go.Scatter(x=list(timeA3),
               y=list(pianoA3),
               name='piano A3',
               line=dict(color=color_A3),
               visible='legendonly'),
)

fig.add_trace(
    go.Scatter(x=list(timeA4),
               y=list(pianoA4),
               name='piano A4',
               line=dict(color=color_A4),
               ),
)

fig.add_trace(
    go.Scatter(x=list(timeB4),
               y=list(pianoB4),
               name='piano B4',
               line=dict(color=color_B4),
               visible='legendonly'),
)

# Add range slider
fig.update_layout(
    title='3 notes on the piano',
    xaxis=dict(
        rangeslider={"visible":True},
        type="linear"
    ),
    legend={"orientation":"h",           # Horizontal legend
            "yanchor":"top",             # Anchor legend to the top
            "y":1.1,                    # Adjust vertical position
            "xanchor":"center",           # Anchor legend to the right
            "x":0.5,                       # Adjust horizontal position
           },
)

# Set y-axes titles
fig.update_yaxes(
    title_text="signal",
    range=[-3,3])
fig.update_xaxes(
    title_text="time (s)",)

44.1 power spectrum

We can plot the absolute value of the Fourier transform of each signal, see below.

calculate fourier transform
kA4, abs_fft_A4 = fft_abs(pianoA4, timeA4, max_freq=4e3)
kA3, abs_fft_A3 = fft_abs(pianoA3, timeA3, max_freq=4e3)
kB4, abs_fft_B4 = fft_abs(pianoB4, timeB4, max_freq=4e3)
plot power spectrum for 3 notes
fig = make_subplots()

fig.add_trace(
    go.Scatter(x=list(kA3),
               y=list(abs_fft_A3),
               name='piano A3',
               line=dict(color=color_A3),
               visible='legendonly'),
)

fig.add_trace(
    go.Scatter(x=list(kA4),
               y=list(abs_fft_A4),
               name='piano A4',
               line=dict(color=color_A4),),
)

fig.add_trace(
    go.Scatter(x=list(kB4),
               y=list(abs_fft_B4),
               name='piano B4',
               line=dict(color=color_B4),
               visible='legendonly'),
)

# Add range slider
fig.update_layout(
    title='3 notes on the piano',
    legend={"orientation":"h",           # Horizontal legend
            "yanchor":"top",             # Anchor legend to the top
            "y":1.1,                    # Adjust vertical position
            "xanchor":"center",           # Anchor legend to the right
            "x":0.5,                       # Adjust horizontal position
           },
)

# Set y-axes titles
fig.update_yaxes(
    title_text="abs(FFT)",
    range=[-0.1,1])
fig.update_xaxes(
    title_text="frequency (Hz)",)

We call the graph above a power spectrum. “Spectrum” refers to the frequencies, and “power” means that we square the signal. Strictly speaking, we see above the square root of the power spectrum, but I’ll still call it power spectrum.

The energy associated with a wave is proportional to the square of the wave’s amplitude. When we computed the fft of the signal, we divided it by N, which is akin to dividing by the whole time duration of the signal. For this reason we are dealing with power, which is energy / time. Finally, it should be noted that the square of a complex number z=a+ib is given by

|z|^2 = z\cdot z^*,

where z^*=a-ib is its complex conjugate:

z\cdot z^* = (a+ib)(a-ib) = a^2 + b^2.

44.2 harmonics

Why do we see peaks at regular intervals in the power spectrum??

We call the lowest peak in the spectrum the fundamental frequency. Note that there are other high frequency peaks that follow the fundamental, at regular intervals. These are called overtones, and they are multiples of the fundamental frequency. The fundamental and the overtones are called together the harmonics.

Image source: N.H. Crowhurst

The sound produced by musical instruments is the outcome of vibrations in the body of the instrument. Often, these vibrations are standing waves, and that is the reason why we see such a strong overtone signature in the power spectrum.

44.3 timbre

instrument recording of A4 image
piano
flute
trumpet
violin
Show the code
tmin = 1.00
tmax = 2.50

time_list = []
signal_list = []
instrument_list = ['flute', 'piano', 'trumpet', 'violin']
file_list = ['flute-C4.wav',
             'piano-C4.wav',
             'trumpet-C4.wav',
             'violin-C4.wav'
            ]

k_list = []
abs_fft_list = []
for i, filename in enumerate(file_list):
    k, abs_fft = open_wav_return_fft(f'files/{filename}', max_freq=3.5e3)
    k_list.append(k)
    abs_fft_list.append(abs_fft)
plot power spectrum for 3 notes
fig = make_subplots()

for i, instrument in enumerate(instrument_list):
    vis = 'legendonly'
    if instrument == 'piano': vis = True
    fig.add_trace(
        go.Scatter(x=list(k_list[i]),
                y=list(abs_fft_list[i]),
                name=f'{instrument}',
                # line=dict(color=color_A3),
                visible=vis#'legendonly'
                ),
    )

# Add range slider
fig.update_layout(
    # title='3 notes on the piano',
    legend={"orientation":"h",           # Horizontal legend
            "yanchor":"top",             # Anchor legend to the top
            "y":1.1,                    # Adjust vertical position
            "xanchor":"center",           # Anchor legend to the right
            "x":0.5,                       # Adjust horizontal position
           },
)

# Set y-axes titles
fig.update_yaxes(
    title_text="abs(FFT)",
    range=[-0.05,0.4])
fig.update_xaxes(
    title_text="frequency (Hz)",)

This is a nice video about the connection between tibre and the harmonic series.

44.4 linear vs. logarithmic scale

Listen to these two sequences of sounds.

1

2

See below a graph representing the sequence of frequencies for these two recordings. Which recording sounds more regular, like climbing steps of equal size?

Show the code
fr_linear = np.arange(220.0, 220.0 + 20*90, 90)
alpha = 2 ** (2.0/12)
fr_exp = np.array(
        [220.0 * alpha ** i for i in range(20)]
    )
fig, ax = plt.subplots()
ax.plot(fr_linear, 'o', mfc="black", mec="None")
ax.plot(fr_exp, 's', mfc=[0.7]*3, mec="None")
ax.set(xlabel="sound #",
       ylabel="frequency (Hz)",
       yticks=np.arange(200,2001,400))
pass

Probably, the most common tuning standard in western music is A440, meaning that the note corresponding to the A4 on the piano must have a fundamental frequency equal to 440 Hz. Go to the graph shown in power spectrum , zoom in, and find out if the piano was “properly” tuned.

The A3 note has its fundamental frequency at half of that, namely 220 Hz.

Dividing an octave in 12 equal half-steps is called “equal temperament”. Multiplying the A3 frequency 12 times by an unknown factor y should give us the frequency of A4:

\begin{split} 220 \times y^{12} &= 440\\ y^{12} &= 2 \\ \therefore y &= \sqrt[12]{2} \end{split}

We can now easily check out if the fundamental frequency for B3 as shown in the graph agrees with this rule:

calculate frequency for B4
factor = 2**(1/12)
B4 = 440 * factor**2  # two half steps above A4
print(f"B4 = {B4:.2f} Hz")
B4 = 493.88 Hz

Our pitch perception is logarithmic, meaning that when we hear a string of frequencies that increase exponentially, we percieve it as increasing with regular steps. Sound 2 corresponds to the exponential orange dots.

44.5 chords

# of harmonic frequency (Hz) pitch interval
1 261.626 C4 fundamental
2 523.252 C5 octave
3 523.252 G5 perfect fifth
4 1046.504 C6 octave
5 1308.130 E6 major third
6 1569.755 G6 perfect fifth

Show the code
from palettable.tableau import Tableau_10
colors = Tableau_10.hex_colors

N = 7
harmonics = np.arange(1, N+1)
pitches = 261.626 * harmonics
names = ['C4', 'C5', 'G5', 'C6', 'E6', 'G6', 'Bb6']

fig, ax = plt.subplots()
ax.bar(harmonics, pitches, color=colors[:N])

for i,h in enumerate(harmonics):
    ax.text(h, pitches[i]+10, names[i], ha="center")

ax.set(xlabel="harmonics",
       ylabel="frequency (Hz)",
       xticks=harmonics,
       yticks=np.arange(00,2001,500))
pass

C (major chord, C + E + G = do-mi-sol)



C7 (C + E + G + Bb = do-mi-sol-si bemol)


Dissonant sound:
C + C#