44 sound & music

Sound source: University of Iowa Electronic Music Studios

Listen to these three notes on the piano

Image modified from: piano-music-theory.com

A3
A4
B4

import stuff

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import aifc
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()  # datetime converter for a matplotlib
import seaborn as sns
sns.set(style="ticks", font_scale=1.5)

import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pio.templates.default = "presentation"
import math
import scipy
from scipy.io import wavfile

# %matplotlib widget

define useful functions

def open_aiff(filename):
    data = aifc.open(filename)
    Nframes = data.getnframes()
    str_signal = data.readframes(Nframes)
    # d = np.fromstring(str_signal, numpy.short).byteswap()
    d = np.frombuffer(str_signal, dtype=np.int16).byteswap()
    # if there are two channels, take only one of them
    if data.getnchannels() == 2:
        d = d[::2]
    N = len(d)
    dt = 1.0 / data.getframerate()
    time = np.arange(N) * dt
    return time, d

def open_wav_return_fft(filename, max_freq=None):
    sample_rate, signal = wavfile.read(filename)
    time = np.arange(len(signal)) / sample_rate
    dt = time[1] - time[0]
    N = len(time)
    signal = normalize(signal)
    fft = scipy.fft.fft(signal) / N
    k = scipy.fft.fftfreq(N, dt)
    fft_abs = np.abs(fft)
    if max_freq == None:
        max_freq = k.max()
    dk = 1 / (time.max() - time.min())
    n = int(max_freq/dk)
    return k[:n], fft_abs[:n]


def normalize(signal):
    return (signal - signal.mean()) / signal.std()

def fft_abs(signal, time, max_freq=None):
    dt = time[1] - time[0]
    N = len(time)
    fft = scipy.fft.fft(signal) / N
    k = scipy.fft.fftfreq(N, dt)
    fft_abs = np.abs(fft)
    if max_freq == None:
        max_freq = k.max()
    dk = 1 / (time.max() - time.min())
    n = int(max_freq/dk)
    return k[:n], fft_abs[:n]

load audio files

tmin = 2.00
tmax = 2.20

timeA4, pianoA4 = open_aiff('files/Piano.mf.A4.aiff')
indices = np.where((timeA4 > tmin) & (timeA4 < tmax))
timeA4 = timeA4[indices]
pianoA4 = normalize(pianoA4[indices])

timeA3, pianoA3 = open_aiff('files/Piano.mf.A3.aiff')
indices = np.where((timeA3 > tmin) & (timeA3 < tmax))
timeA3 = timeA3[indices]
pianoA3 = normalize(pianoA3[indices])

timeB4, pianoB4 = open_aiff('files/Piano.mf.B4.aiff')
indices = np.where((timeB4 > tmin) & (timeB4 < tmax))
timeB4 = timeB4[indices]
pianoB4 = normalize(pianoB4[indices])

plot time series for 3 notes

# blue, orange, green, red, purple, brown, pink, gray, olive, cyan = px.colors.qualitative.D3[:]

color_A3 = "#e7c535"
color_A4 = "#ff00c3"
color_B4 = "#327fce"

fig = make_subplots()

fig.add_trace(
    go.Scatter(x=list(timeA3),
               y=list(pianoA3),
               name='piano A3',
               line=dict(color=color_A3),
               visible='legendonly'),
)

fig.add_trace(
    go.Scatter(x=list(timeA4),
               y=list(pianoA4),
               name='piano A4',
               line=dict(color=color_A4),
               ),
)

fig.add_trace(
    go.Scatter(x=list(timeB4),
               y=list(pianoB4),
               name='piano B4',
               line=dict(color=color_B4),
               visible='legendonly'),
)

# Add range slider
fig.update_layout(
    title='3 notes on the piano',
    xaxis=dict(
        rangeslider={"visible":True},
        type="linear"
    ),
    legend={"orientation":"h",           # Horizontal legend
            "yanchor":"top",             # Anchor legend to the top
            "y":1.1,                    # Adjust vertical position
            "xanchor":"center",           # Anchor legend to the right
            "x":0.5,                       # Adjust horizontal position
           },
)

# Set y-axes titles
fig.update_yaxes(
    title_text="signal",
    range=[-3,3])
fig.update_xaxes(
    title_text="time (s)",)

When we zoom in, both A3 and A4 have the same period, but different shapes.
A4 and B4 have slightly different periods (A4 is longer).

44.1 power spectrum

We can plot the absolute value of the Fourier transform of each signal, see below.

calculate fourier transform

kA4, abs_fft_A4 = fft_abs(pianoA4, timeA4, max_freq=4e3)
kA3, abs_fft_A3 = fft_abs(pianoA3, timeA3, max_freq=4e3)
kB4, abs_fft_B4 = fft_abs(pianoB4, timeB4, max_freq=4e3)

plot power spectrum for 3 notes

fig = make_subplots()

fig.add_trace(
    go.Scatter(x=list(kA3),
               y=list(abs_fft_A3),
               name='piano A3',
               line=dict(color=color_A3),
               visible='legendonly'),
)

fig.add_trace(
    go.Scatter(x=list(kA4),
               y=list(abs_fft_A4),
               name='piano A4',
               line=dict(color=color_A4),),
)

fig.add_trace(
    go.Scatter(x=list(kB4),
               y=list(abs_fft_B4),
               name='piano B4',
               line=dict(color=color_B4),
               visible='legendonly'),
)

# Add range slider
fig.update_layout(
    title='3 notes on the piano',
    legend={"orientation":"h",           # Horizontal legend
            "yanchor":"top",             # Anchor legend to the top
            "y":1.1,                    # Adjust vertical position
            "xanchor":"center",           # Anchor legend to the right
            "x":0.5,                       # Adjust horizontal position
           },
)

# Set y-axes titles
fig.update_yaxes(
    title_text="abs(FFT)",
    range=[-0.1,1])
fig.update_xaxes(
    title_text="frequency (Hz)",)

We call the graph above a power spectrum. “Spectrum” refers to the frequencies, and “power” means that we square the signal. Strictly speaking, we see above the square root of the power spectrum, but I’ll still call it power spectrum.

The energy associated with a wave is proportional to the square of the wave’s amplitude. When we computed the fft of the signal, we divided it by N, which is akin to dividing by the whole time duration of the signal. For this reason we are dealing with power, which is energy / time. Finally, it should be noted that the square of a complex number z=a+ib is given by

|z|^2 = z\cdot z^*,

where z^*=a-ib is its complex conjugate:

z\cdot z^* = (a+ib)(a-ib) = a^2 + b^2.

44.2 harmonics

Why do we see peaks at regular intervals in the power spectrum??

We call the lowest peak in the spectrum the fundamental frequency. Note that there are other high frequency peaks that follow the fundamental, at regular intervals. These are called overtones, and they are multiples of the fundamental frequency. The fundamental and the overtones are called together the harmonics.

Image source: N.H. Crowhurst

The sound produced by musical instruments is the outcome of vibrations in the body of the instrument. Often, these vibrations are standing waves, and that is the reason why we see such a strong overtone signature in the power spectrum.

44.3 timbre

instrument	recording of A4	image
piano
flute
trumpet
violin

Show the code

tmin = 1.00
tmax = 2.50

time_list = []
signal_list = []
instrument_list = ['flute', 'piano', 'trumpet', 'violin']
file_list = ['flute-C4.wav',
             'piano-C4.wav',
             'trumpet-C4.wav',
             'violin-C4.wav'
            ]

k_list = []
abs_fft_list = []
for i, filename in enumerate(file_list):
    k, abs_fft = open_wav_return_fft(f'files/{filename}', max_freq=3.5e3)
    k_list.append(k)
    abs_fft_list.append(abs_fft)

plot power spectrum for 3 notes

fig = make_subplots()

for i, instrument in enumerate(instrument_list):
    vis = 'legendonly'
    if instrument == 'piano': vis = True
    fig.add_trace(
        go.Scatter(x=list(k_list[i]),
                y=list(abs_fft_list[i]),
                name=f'{instrument}',
                # line=dict(color=color_A3),
                visible=vis#'legendonly'
                ),
    )

# Add range slider
fig.update_layout(
    # title='3 notes on the piano',
    legend={"orientation":"h",           # Horizontal legend
            "yanchor":"top",             # Anchor legend to the top
            "y":1.1,                    # Adjust vertical position
            "xanchor":"center",           # Anchor legend to the right
            "x":0.5,                       # Adjust horizontal position
           },
)

# Set y-axes titles
fig.update_yaxes(
    title_text="abs(FFT)",
    range=[-0.05,0.4])
fig.update_xaxes(
    title_text="frequency (Hz)",)

This is a nice video about the connection between tibre and the harmonic series.

44.4 linear vs. logarithmic scale

Listen to these two sequences of sounds.

See below a graph representing the sequence of frequencies for these two recordings. Which recording sounds more regular, like climbing steps of equal size?

Show the code

fr_linear = np.arange(220.0, 220.0 + 20*90, 90)
alpha = 2 ** (2.0/12)
fr_exp = np.array(
        [220.0 * alpha ** i for i in range(20)]
    )
fig, ax = plt.subplots()
ax.plot(fr_linear, 'o', mfc="black", mec="None")
ax.plot(fr_exp, 's', mfc=[0.7]*3, mec="None")
ax.set(xlabel="sound #",
       ylabel="frequency (Hz)",
       yticks=np.arange(200,2001,400))
pass

Probably, the most common tuning standard in western music is A440, meaning that the note corresponding to the A4 on the piano must have a fundamental frequency equal to 440 Hz. Go to the graph shown in power spectrum , zoom in, and find out if the piano was “properly” tuned.

The A3 note has its fundamental frequency at half of that, namely 220 Hz.

Because there are 12 half-steps between A3 and A4, can you figure out a rule how to find the frequency of any note on the piano?

Dividing an octave in 12 equal half-steps is called “equal temperament”. Multiplying the A3 frequency 12 times by an unknown factor y should give us the frequency of A4:

\begin{split} 220 \times y^{12} &= 440\\ y^{12} &= 2 \\ \therefore y &= \sqrt[12]{2} \end{split}

We can now easily check out if the fundamental frequency for B3 as shown in the graph agrees with this rule:

calculate frequency for B4

factor = 2**(1/12)
B4 = 440 * factor**2  # two half steps above A4
print(f"B4 = {B4:.2f} Hz")

B4 = 493.88 Hz

Our pitch perception is logarithmic, meaning that when we hear a string of frequencies that increase exponentially, we percieve it as increasing with regular steps. Sound 2 corresponds to the exponential orange dots.

44.5 chords

# of harmonic	frequency (Hz)	pitch	interval
1	261.626	C4	fundamental
2	523.252	C5	octave
3	523.252	G5	perfect fifth
4	1046.504	C6	octave
5	1308.130	E6	major third
6	1569.755	G6	perfect fifth

Show the code

from palettable.tableau import Tableau_10
colors = Tableau_10.hex_colors

N = 7
harmonics = np.arange(1, N+1)
pitches = 261.626 * harmonics
names = ['C4', 'C5', 'G5', 'C6', 'E6', 'G6', 'Bb6']

fig, ax = plt.subplots()
ax.bar(harmonics, pitches, color=colors[:N])

for i,h in enumerate(harmonics):
    ax.text(h, pitches[i]+10, names[i], ha="center")

ax.set(xlabel="harmonics",
       ylabel="frequency (Hz)",
       xticks=harmonics,
       yticks=np.arange(00,2001,500))
pass

C (major chord, C + E + G = do-mi-sol)

C7 (C + E + G + Bb = do-mi-sol-si bemol)

Dissonant sound:
C + C#