Waveform analysis/Doh ray me
Analyse a given section of monophonic audio waveform, for average pitch and output one of the sol-fa trigraphs (like these, except each has three letters) corresponding to average pitch level: Doh, Ray, Mee, Fah, Soh, Lah, Tee, doh.
Optionally, follow the trigraph with a plus or minus symbol, to indicate whether the note falls above or below the solfa. Extend the scale to cover 21 notes: DOH, RAY, MEE, FAH, SOH, LAH, TEE, Doh, Ray, Mee, Fah, Soh, Lah, Tee, doh, ray, mee, fah, soh, lah, tee.
A calibration parameter can be provided to suit different voices. This can be provided as a variable defined within the code.
Go
Clearly, this task is only feasible if you know how frequencies are encoded as bytes in the waveform and even then there are mathematical difficulties in reversing the procedure which mean that the eventual result is unlikely to be exact.
As an example, we analyze the .wav file (notes.wav) created by the [Musical Scale] task. As we know that the same frequency was used to generate each sample (44100 bytes), it is only necessary to examine a small number of bytes for each sample to determine the average frequency for the file as a whole (8 samples).
However, as each calculation was of necessity rounded to the nearer byte, it seems sensible to use more than one byte per sample (but not so many that the multi-valued arcsine function will be applied to a value outside its principal range) to try and reduce the effect of rounding. 20 bytes per sample is used here though curiously using only 3 bytes per sample would have produced a more accurate result (384.9 Hz).
Some optional aspects of the task have been ignored as they are not relevant to this particular example.
package main
import (
"encoding/binary"
"fmt"
"log"
"math"
"os"
)
var (
freqs = []float64{261.6, 293.6, 329.6, 349.2, 392.0, 440.0, 493.9, 523.3}
notes = []string{"Doh", "Ray", "Mee", "Fah", "Soh", "Lah", "Tee", "doh"}
)
func getNote(freq float64) string {
index := len(freqs)
for i := 0; i < len(freqs); i++ {
if freq <= freqs[i] {
index = i
break
}
}
switch index {
case 0:
return "Doh-"
case len(freqs):
return "doh+"
default:
if freqs[index]-freq <= freq-freqs[index-1] {
return notes[index] + "-"
}
return notes[index-1] + "+"
}
}
func check(err error) {
if err != nil {
log.Fatal(err)
}
}
func main() {
file, err := os.Open("notes.wav")
check(err)
defer file.Close()
hdr := make([]byte, 44)
_, err = file.Read(hdr)
check(err)
// check header parameters
sampleRate := int(binary.LittleEndian.Uint32(hdr[24:28]))
fmt.Println("Sample Rate :", sampleRate)
dataLength := int(binary.LittleEndian.Uint32(hdr[40:]))
duration := dataLength / sampleRate
fmt.Println("Duration :", duration)
sum := 0.0
sampleRateF := float64(sampleRate)
data := make([]byte, sampleRate)
nbytes := 20
fmt.Println("Bytes examined :", nbytes, "per sample")
for j := 0; j < duration; j++ {
_, err := file.Read(data)
check(err)
for i := 1; i <= nbytes; i++ {
bf := float64(data[i]) / 32
freq := math.Asin(bf) * sampleRateF / (float64(i) * math.Pi * 2)
sum += freq
}
}
cav := sum / (float64(duration) * float64(nbytes))
fmt.Printf("\nComputed average frequency = %.1f Hz (%s)\n", cav, getNote(cav))
sum = 0.0
for i := 0; i < len(freqs); i++ {
sum += freqs[i]
}
aav := sum / float64(len(freqs))
fmt.Printf("Actual average frequency = %.1f Hz (%s)\n", aav, getNote(aav))
}
- Output:
Sample Rate : 44100 Duration : 8 Bytes examined : 20 per sample Computed average frequency = 387.1 Hz (Soh-) Actual average frequency = 385.4 Hz (Soh-)
Julia
Uses the LibSndFile library for WAV file reading and the DSP module's implementation of the ESPRIT algorithm to analyze the sound for its fundamental frequency.
using DSP, FileIO, LibSndFile
const soundfilename = "Cscale3octaves.wav"
const freq_to_solfa = Dict([
130.81 => "DOH"
146.83 => "RAY"
164.81 => "MEE"
174.61 => "FAH"
196.0 => "SOH"
220.0 => "LAH"
246.94 => "TEE"
261.63 => "Doh"
293.66 => "Ray"
329.63 => "Mee"
349.23 => "Fah"
392.0 => "Soh"
440.0 => "Lah"
493.88 => "Tee"
523.25 => "doh"
587.33 => "ray"
659.25 => "mee"
698.46 => "fah"
783.99 => "soh"
880.0 => "lah"
987.77 => "tee"
])
const sfreqs = sort(collect(keys(freq_to_solfa)))
function closestfreqs(samples, fs=44100.0)
pfreqs = Float64[]
for sample in samples
M = div(length(sample) + 1, 3)
arr = [Complex{Float64}(x) for x in sample]
narr = filter(x -> x > 0, esprit(arr, M, 4, fs))
idx = argmin([abs(f - narr[end]) for f in sfreqs])
push!(pfreqs, sfreqs[idx])
end
return pfreqs
end
function getchunks(soundfile, channel=1, timespan=0.1)
sv = load(soundfile)
fs = LibSndFile.samplerate(sv)
samplespan, data = Int(round(timespan * fs)), view(sv, :, channel)
return (fs, [data[i:i+samplespan-1] for i in 1:samplespan:length(data)-samplespan-1])
end
function makenotelist(soundfile, repetitionsneeded=2)
changelist = String[]
fs, samples = getchunks(soundfile)
freqs = closestfreqs(samples, fs)
reps, prev = 0, ""
for freq in freqs
note = freq_to_solfa[freq]
if note != prev
prev = note
reps = 0
else
reps += 1
if reps == repetitionsneeded
push!(changelist, note)
end
end
end
return changelist
end
println(makenotelist(soundfilename))
- Output:
["DOH", "RAY", "MEE", "FAH", "SOH", "LAH", "TEE", "Doh", "Ray", "Mee", "Fah", "Soh", "Lah", "Lah", "Tee", "doh", "ray", "mee", "fah", "soh", "lah", "tee"]
Nim
The input .wav file (notes.wav) is created by the [Musical Scale#Nim] entry.
import endians, math, stats, strformat
const
Freqs = [261.6, 293.6, 329.6, 349.2, 392.0, 440.0, 493.9, 523.3]
Notes = ["Doh", "Ray", "Mee", "Fah", "Soh", "Lah", "Tee", "doh"]
func getNote(freq: float): string =
var index = Freqs.len
for i, f in Freqs:
if freq <= f:
index = i
break
result = if index == 0:
"Doh-"
elif index == Freqs.len:
"doh+"
elif Freqs[index] - freq <= freq - Freqs[index-1]:
Notes[index] & '-'
else:
Notes[index-1] & '+'
proc getUint32(buffer: openArray[byte]; pos: Natural): uint32 =
littleEndian32(result.addr, buffer[pos].unsafeAddr)
let file = open("notes.wav")
var hdr: array[44, byte]
let n = file.readBytes(hdr, 0, hdr.len)
doAssert n == hdr.len
# Check header parameters.
let sampleRate = hdr.getUint32(24)
echo "Sample rate: ", sampleRate
let dataLength = hdr.getUint32(40)
let duration = dataLength div sampleRate
echo "Duration: ", duration
var sum = 0.0
let sampleRateF = float(sampleRate)
var data = newSeq[byte](sampleRate)
let nbytes = 20
echo "Bytes examined: ", nbytes, " per sample"
for j in 0..<duration:
let n = file.readBytes(data, 0, data.len)
doAssert n == data.len
for i in 1..nbytes:
let bf = float(data[i]) / 32
let freq = arcsin(bf) * sampleRateF / (i.toFloat * 2 * Pi)
sum += freq
let cav = sum / float(duration.int * nbytes)
echo &"\nComputed average frequency = {cav:.1f} Hz ({cav.getNote()})"
let aav = mean(Freqs)
echo &"Actual average frequency = {aav:.1f} Hz ({aav.getNote()})"
- Output:
Sample rate: 44100 Duration: 8 Bytes examined: 20 per sample Computed average frequency = 387.1 Hz (Soh-) Actual average frequency = 385.4 Hz (Soh-)
file.close()
Perl
The input .wav file (notes.wav) is created by the [Musical Scale#Go] entry.
# 20200808 added Perl programming solution
use v5.10;
use strict;
use warnings;
use autodie;
use Math::Trig;
use List::Util qw/sum/;
use constant PI => 4 * atan2(1, 1);
my @freqs = qw ( 261.6 293.6 329.6 349.2 392.0 440.0 493.9 523.3 );
my @notes = qw ( Doh Ray Mee Fah Soh Lah Tee doh );
sub getNote {
my $freq = $_[0];
my $index = @freqs;
for (0..$index-1) { $index = $_ and last if $freq <= $freqs[$_] }
given ($index) {
when (0) { "Doh-" }
when (@freqs) { "doh+" }
default { $freqs[$index] - $freq <= $freq - $freqs[$index-1]
? $notes[$index] . "-" : $notes[$index-1] . "+" }
}
}
open my $fh, '<:raw', './notes.wav';
# http://www.topherlee.com/software/pcm-tut-wavformat.html
read $fh, my $header, 28;
print "Sample Rate : ", my $sampleRate = unpack(' x24 L< ', $header), "\n" ;
read $fh, $header, 16;
my $dataLength = unpack(' x12 L< ', $header);
print "Duration : ", my $duration = $dataLength / $sampleRate, "\n";
my ( $sum, $nbytes ) = ( 0, 20 ) ;
print "Bytes examined : $nbytes per sample\n";
while ( read $fh, my $data, $sampleRate ) {
my @chunk = split('', $data);
for my $k (1..$nbytes) {
my $bf = ord($chunk[$k]) / 32;
$sum += asin($bf) * $sampleRate / ( 2 * PI * $k );
}
}
close $fh;
my $cav = $sum / ( $duration * $nbytes );
printf "Computed average frequency = %.1f", $cav;
print " Hz (",getNote($cav),")\n";
my $aav = sum(@freqs) / @freqs;
printf "Actual average frequency = %.1f", $aav;
print " Hz (",getNote($aav),")\n";
- Output:
go run Musical_scale.go file notes.wav notes.wav: RIFF (little-endian) data, WAVE audio, Microsoft PCM, 8 bit, mono 44100 Hz ./Doh_ray_me.pl Sample Rate : 44100 Duration : 8 Bytes examined : 20 per sample Computed average frequency = 387.1 Hz (Soh-) Actual average frequency = 385.4 Hz (Soh-)
Phix
Likewise analyses the output file of Musical_scale#Phix
without js -- (file i/o) constant freqs = {261.6, 293.6, 329.6, 349.2, 392.0, 440.0, 493.9, 523.3}, notes = {"Doh", "Ray", "Mee", "Fah", "Soh", "Lah", "Tee", "doh"} function getNote(atom freq) integer idx = length(freqs)+1 for i=1 to length(freqs) do if freq<=freqs[i] then idx = i exit end if end for string res if idx=1 then res = "Doh-" elsif idx>length(freqs) then res = "doh+" elsif freqs[idx]-freq <= freq-freqs[idx-1] then res = notes[idx] & "-" else res = notes[idx-1] & "+" end if return res end function object data = get_text("notes.wav") if data=-1 then crash("error opening notes.wav") end if integer sampleRate = bytes_to_int(data[25..28]), dataLength = bytes_to_int(data[41..44]), nbytes = 20, offset = 45 atom duration = dataLength/sampleRate, tot = 0 printf(1,"Sample Rate : %d\n", sampleRate) printf(1,"Duration : %s\n", elapsed(duration)) printf(1,"Bytes examined : %d per sample\n\n", nbytes) for j=1 to duration do for i=1 to nbytes do atom bf = data[offset+i]/32, freq = arcsin(bf) * sampleRate / (i*PI*2) tot += freq end for offset += sampleRate end for atom cav = tot / (duration * nbytes), aav = sum(freqs) / length(freqs) printf(1,"Computed average frequency = %.1f Hz (%s)\n", {cav, getNote(cav)}) printf(1,"Actual average frequency = %.1f Hz (%s)\n", {aav, getNote(aav)})
- Output:
Sample Rate : 44100 Duration : 8s Bytes examined : 20 per sample Computed average frequency = 387.1 Hz (Soh-) Actual average frequency = 385.4 Hz (Soh-)
Raku
The input .wav file (notes.wav) is created by the [Musical Scale#Go] entry.
# 20200721 Raku programming solution
my \freqs = < 261.6 293.6 329.6 349.2 392.0 440.0 493.9 523.3 >;
my \notes = < Doh Ray Mee Fah Soh Lah Tee doh >;
sub getNote (\freq) {
my $index = freqs;
for (0..^$index) { $index = $_ and last if freq ≤ freqs[$_] }
given $index {
when 0 { "Doh-" }
when freqs { "doh+" }
default { freqs[$index] - freq ≤ freq - freqs[$index-1]
?? notes[$index] ~ "-" !! notes[$index-1] ~ "+" }
}
}
my $file = slurp "./notes.wav", :bin or die;
# http://www.topherlee.com/software/pcm-tut-wavformat.html
# https://stackoverflow.com/a/49081648/3386748
my $sampleRate = Blob.new(@$file[24..27]).read-uint32(0,LittleEndian);
# self-ref: [+] @$file[24..27].pairs.map: {$_.value*256**$_.key};
say "Sample Rate : ", $sampleRate;
my $dataLength = Blob.new(@$file[40..43]).read-uint32(0,LittleEndian);
# self-ref: [+] @$file[40..43].pairs.map: {$_.value*256**$_.key};
my $duration = $dataLength / $sampleRate;
say "Duration : ", $duration;
my ( $sum, $sampleRateF ) = ( 0, $sampleRate.Num ) ;
my $nbytes = 20;
say "Bytes examined : ", $nbytes, " per sample";
for @$file[44..*].rotor($sampleRate) -> $data {
for (1..$nbytes) -> $k {
my $bf = @$data[$k] / 32;
$sum += $bf.asin × $sampleRateF / ( 2 × π × $k );
}
}
my $cav = $sum / ( $duration * $nbytes );
say "Computed average frequency = {$cav.fmt('%.1f')} Hz ({getNote($cav)})";
my $aav = ([+] freqs) / freqs;
say "Actual average frequency = {$aav.fmt('%.1f')} Hz ({getNote($aav)})";
- Output:
go run Musical_scale.go file notes.wav notes.wav: RIFF (little-endian) data, WAVE audio, Microsoft PCM, 8 bit, mono 44100 Hz ./Doh_ray_me.raku Sample Rate : 44100 Duration : 8 Bytes examined : 20 per sample Computed average frequency = 387.1 Hz (Soh-) Actual average frequency = 385.4 Hz (Soh-)
V (Vlang)
As an example, we analyze the .wav file (notes.wav) created by the [Musical Scale] task.
import os
import math
import encoding.binary
const (
freqs = [261.6, 293.6, 329.6, 349.2, 392.0, 440.0, 493.9, 523.3]
notes = ["Doh", "Ray", "Mee", "Fah", "Soh", "Lah", "Tee", "doh"]
)
fn get_note(freq f64) string {
mut index := freqs.len
for i in 0..freqs.len {
if freq <= freqs[i] {
index = i
break
}
}
match index {
0 {
return "Doh-"
}
freqs.len {
return "doh+"
}
else {
if freqs[index]-freq <= freq-freqs[index-1] {
return '${notes[index]}-'
}
return '${notes[index-1]}+'
}
}
}
fn main() {
mut file := os.open("notes.wav")?
defer {
file.close()
}
mut hdr := []byte{len: 44}
file.read(mut &hdr)?
// check header parameters
sample_rate := int(binary.little_endian_u32(hdr[24..28]))
println("Sample Rate : $sample_rate")
data_length := int(binary.little_endian_u32(hdr[40..]))
duration := data_length / sample_rate
println("Duration : $duration")
mut sum := 0.0
sample_rate_f := f64(sample_rate)
mut data := []byte{len: sample_rate}
nbytes := 20
println("Bytes examined : $nbytes per sample")
for _ in 0..duration {
file.read(mut &data)?
for i := 1; i <= nbytes; i++ {
bf := f64(data[i]) / 32
freq := math.asin(bf) * sample_rate_f / (f64(i) * math.pi * 2)
sum += freq
}
}
cav := sum / (f64(duration) * f64(nbytes))
println("\nComputed average frequency = ${cav:.1} Hz (${get_note(cav)})")
sum = 0.0
for i in 0..freqs.len {
sum += freqs[i]
}
aav := sum / f64(freqs.len)
println("Actual average frequency = ${aav:.1} Hz (${get_note(aav)})")
}
- Output:
Sample Rate : 44100 Duration : 8 Bytes examined : 20 per sample Computed average frequency = 387.1 Hz (Soh-) Actual average frequency = 385.4 Hz (Soh-)
Wren
We analyze the .wav file (musical_scale.wav) created by the Musical scale task.
import "io" for File
import "./crypto" for Bytes
import "./fmt" for Fmt
import "./math" for Nums
import "./seq" for Lst
var freqs = [261.6, 293.6, 329.6, 349.2, 392.0, 440.0, 493.9, 523.3]
var notes = ["Doh", "Ray", "Mee", "Fah", "Soh", "Lah", "Tee", "doh"]
var getNote = Fn.new { |freq|
var n = freqs.count
var index = n
for (i in 0...n) {
if (freq <= freqs[i]) {
index = i
break
}
}
if (index == 0) {
return "Doh-"
} else if (index == n) {
return "doh+"
} else {
if (freqs[index] - freq <= freq - freqs[index-1]) {
return notes[index] + "-"
}
return notes[index-1] + "+"
}
}
var bytes = File.read("musical_scale.wav").bytes.toList
var hdr = bytes[0..43]
// check header parameters
var sampleRate = Bytes.toIntLE(hdr[24..27])
System.print("Sample Rate : %(sampleRate)")
var dataLength = Bytes.toIntLE(hdr[40..-1])
var duration = dataLength / sampleRate
System.print("Duration : %(duration)")
var sum = 0
var nbytes = 20
System.print("Bytes examined : %(nbytes) per sample")
for (data in Lst.chunks(bytes[44..-1], sampleRate)) {
for (i in 1..nbytes) {
var bf = data[i] / 32
var freq = bf.asin * sampleRate / (i * Num.pi * 2)
sum = sum + freq
}
}
var cav = sum / (duration * nbytes)
Fmt.print("\nComputed average frequency = $.1f Hz ($s)", cav, getNote.call(cav))
var aav = Nums.mean(freqs)
Fmt.print("Actual average frequency = $.1f Hz ($s)", aav, getNote.call(aav))
- Output:
Sample Rate : 44100 Duration : 8 Bytes examined : 20 per sample Computed average frequency = 387.1 Hz (Soh-) Actual average frequency = 385.4 Hz (Soh-)