midi file parser
Sean McIlroy
sean_mcilroy at yahoo.com
Tue Nov 24 00:15:52 EST 2009
"""
A Sequence is a list [FormatType, TimeDivision, Tracks] where
*) FormatType is in [0,1,2]
*) TimeDivision is either [TicksPerBeat] with TicksPerBeat in range
(2**15) or
[FramesPerSecond, TicksPerFrame] with FramesPerSecond in range
(2**7)
and TicksPerFrame in range(2**8)
*) Tracks is a list of Events
An Event is either a ChannelEvent or a MetaEvent.
A ChannelEvent is [DeltaTime, EventType, Channel, Parameters]
and a MetaEvent is [DeltaTime, MetaType, Message] where
*) DeltaTime is a nonnegative integer
*) EventType is in range(7)
*) Channel is in range(2**4)
*) Parameters is a list with elements in range(2**7)
*) MetaType is in range(2**7)
*) Message is a string
The EventTypes and Parameters of ChannelEvents have the following
verbal handles:
EventType Parameters
0 = NoteOff [NoteNumber, Velocity]
1 = NoteOn [NoteNumber, Velocity]
2 = NoteAftertouch [NoteNumber, Amount]
3 = Controller [ControllerType, Value]
4 = ProgramChange [ProgramNumber]
5 = ChannelAftertouch [Amount]
6 = PitchBend [ValueLSB, ValueMSB]
"""
def concat(xs):
from itertools import chain
return list(chain(*xs))
def zeropadded(digits,minlength):
return [0]*(minlength-len(digits)) + digits
def noleadingzeros(digits):
while digits[0]==0 and len(digits)>1: digits = digits[1:]
return digits
def number2digits(number,base):
digits = [number]
while digits[0]>=base: digits[0:1] = [digits[0]//base, digits
[0]%base]
return digits
def digits2number(digits,base):
reversedigits = reversed(noleadingzeros(digits))
basepowers = [base**n for n in range(len(digits))]
return sum([x*y for (x,y) in zip(reversedigits,basepowers)])
def number2fixedlength(number,length):
return zeropadded(number2digits(number,2**8),length)
def fixedlength2number(digits):
return digits2number(digits,2**8)
def number2variablelength(number):
digits = number2digits(number,2**7)
padding = [2**7]*(len(digits)-1) + [0]
return [x+y for (x,y) in zip(digits,padding)]
def variablelength2number(variablelength):
padding = [2**7]*(len(variablelength)-1) + [0]
digits = [x-y for (x,y) in zip(variablelength,padding)]
return digits2number(digits,2**7)
def smallbyte(number):
return number < (2**7)
def getfixedlength(numbers,startindex,numbytes):
endindex = startindex + numbytes
return (endindex, numbers[startindex:endindex])
def getvariablelength(numbers,startindex):
index = startindex
while not smallbyte(numbers[index]): index = index + 1
endindex = index + 1
return (endindex, numbers[startindex:endindex])
def analyzetimedivision(numbers):
[byte1, byte2] = numbers
indicator = byte1 // (2**7)
firstbyte = byte1 % (2**7)
secondbyte = byte2
if indicator==0:
ticksperbeat = (2**8) * firstbyte + secondbyte
return [ticksperbeat]
if indicator==1:
framespersecond = firstbyte
ticksperframe = secondbyte
return [framespersecond, ticksperframe]
def synthesizetimedivision(numbers):
if len(numbers)==1:
[ticksperbeat] = numbers
firstbyte = ticksperbeat // (2**8)
secondbyte = ticksperbeat % (2**8)
indicator = 0
if len(numbers)==2:
[framespersecond, ticksperframe] = numbers
firstbyte = framespersecond
secondbyte = ticksperframe
indicator = 1
byte1 = indicator * (2**7) + firstbyte
byte2 = secondbyte
return [byte1, byte2]
def analyzeheaderdata(numbers):
formattype = fixedlength2number(numbers[0:2])
numtracks = fixedlength2number(numbers[2:4])
timedivision = analyzetimedivision(numbers[4:6])
return (formattype, numtracks, timedivision)
def synthesizeheaderdata(formattype,numtracks,timedivision):
formattype = number2fixedlength(formattype, 2)
numtracks = number2fixedlength(numtracks, 2)
timedivision = synthesizetimedivision(timedivision)
return formattype + numtracks + timedivision
def analyzestatus(statusbyte):
number = statusbyte - (2**7)
eventtype = number // (2**4)
channel = number % (2**4)
return (eventtype, channel)
def synthesizestatus(eventtype,channel):
statusbyte = (2**7) + (2**4) * eventtype + channel
return [statusbyte]
def synthesizeevent(event):
if len(event)==4:
[deltatime, eventtype, channel, parameters] = event
return number2variablelength(deltatime) + synthesizestatus
(eventtype,channel) + parameters
if len(event)==3:
[deltatime, metatype, message] = event
quantifiedmessage = number2variablelength(len(message)) + [ord
(x) for x in message]
return number2variablelength(deltatime) + synthesizestatus
(7,15) + [metatype] + quantifiedmessage
def makechunk(identifier,numbers):
return identifier + number2fixedlength(len(numbers),4) + numbers
def makeheader(formattype,numtracks,timedivision):
headeridentifier = [77, 84, 104, 100]
return makechunk(headeridentifier,synthesizeheaderdata
(formattype,numtracks,timedivision))
def maketrack(events):
trackidentifier = [77, 84, 114, 107]
return makechunk(trackidentifier,concat([synthesizeevent(x) for x
in events]))
def getchunks(numbers):
numbytes = len(numbers)
index = 0
chunks = []
while index < numbytes:
i = index + 4
j = index + 8
k = j + fixedlength2number(numbers[i:j])
index = k
chunks.append(numbers[j:k])
return chunks
def getevent(numbers,startindex,runningstatus):
(i, deltatime) = getvariablelength(numbers,startindex)
deltatime = variablelength2number(deltatime)
(j, status) = smallbyte(numbers[i]) and (i, []) or (i+1,
[numbers[i]])
nextrunningstatus = status or runningstatus
(eventtype, channel) = analyzestatus(nextrunningstatus[0])
if not eventtype==7:
numparameters = eventtype in [4,5] and 1 or 2
(nextstartindex, parameters) = getfixedlength
(numbers,j,numparameters)
event = [deltatime, eventtype, channel,
parameters]
if eventtype==7 and channel==15:
(k, metatype) = (j+1, numbers[j])
(m, messagelength) = getvariablelength(numbers,k)
(nextstartindex, message) = getfixedlength
(numbers,m,variablelength2number(messagelength))
message = ''.join([chr(x) for x in message])
event = [deltatime, metatype, message]
if eventtype==7 and not channel==15:
(k, messagelength) = getvariablelength(numbers,j)
(nextstartindex, message) = getfixedlength
(numbers,k,variablelength2number(messagelength))
event = None
return (nextstartindex, nextrunningstatus, event)
def getevents(numbers):
numbytes = len(numbers)
index = 0
runningstatus = []
events = []
while index < numbytes:
(nextindex, nextrunningstatus, event) = getevent
(numbers,index,runningstatus)
index = nextindex
runningstatus = nextrunningstatus
if not event==None: events.append(event)
return events
def parse(filedata):
numbers = list(filedata)
chunks = getchunks(numbers)
(formattype, numtracks, timedivision) = analyzeheaderdata(chunks
[0])
tracks = [getevents(x) for x in chunks[1:]]
return [formattype, timedivision, tracks]
def unparse(sequence):
[formattype, timedivision, tracks] = sequence
numtracks = len(tracks)
header = makeheader(formattype,numtracks,timedivision)
numbers = header + concat([maketrack(x) for x in tracks])
return bytes(numbers)
########################################
## from midiparser import parse, unparse
def readmidi(filepath):
return parse(open(filepath,'rb').read())
def writemidi(sequence,filepath):
open(filepath,'wb').write(unparse(sequence))
def replace(replacee,replacer,string):
return replacer.join(string.split(replacee))
def notename(notenumber):
names = ('C','C#','D','D#','E','F','F#','G','G#','A','A#','B')
return names[notenumber % 12] + '-' + str(notenumber // 12)
def gettrackname(track):
names = [event[2] for event in track if len(event)==3 and event[1]
==3]
return names and numbers2string(names[0]) or None
def noteevent(event):
return len(event)==4 and event[1] in range(3)
def switchevent(event):
return len(event)==4 and event[1] in range(2)
def firstnoteindices(track):
for i in range(len(track)):
if noteevent(track[i]): return [i]
return []
def lastnoteindices(track):
for i in reversed(range(len(track))):
if noteevent(track[i]): return [i]
return []
def explodefile(filepath,directorypath):
[formattype, timedivision, tracks] = readmidi(filepath)
index = formattype==1 and not firstnoteindices(tracks[0]) and 1 or
0
temposettings, tracks = tracks[:index], tracks[index:]
for i in range(len(tracks)):
trackname = gettrackname(tracks[i]) or ('track_' + str(i))
rewrite = lambda basename: basename + '_' + replace('/', '_',
trackname)
singletrackfilepath = changefilepath
(filepath,directorypath,rewrite)
singletrackfile = (formattype, timedivision, temposettings +
[tracks[i]])
writemidi(singletrackfile,singletrackfilepath)
def reflectpitch(event):
if not noteevent(event): return event
[deltatime, eventtype, channel, parameters] = event
[notenumber, velocity] = parameters
newparameters = [(2**7)-notenumber, velocity]
return [deltatime, eventtype, channel, newparameters]
def translatepitch(event,deltapitch):
if not noteevent(event): return event
[deltatime, eventtype, channel, parameters] = event
[notenumber, velocity] = parameters
newnotenumber = notenumber + deltapitch
assert newnotenumber in range(2**7)
newparameters = [newnotenumber, velocity]
return [deltatime, eventtype, channel, newparameters]
def switch(event):
noteoff, noteon = range(2)
if not switchevent(event): return event
[deltatime, eventtype, channel, parameters] = event
[notenumber, velocity] = parameters
neweventtype = noteon
newvelocity = (eventtype==noteoff or velocity==0) and (2**6) or 0
newparameters = [notenumber, newvelocity]
return [deltatime, neweventtype, channel, newparameters]
def invert(track):
return [reflectpitch(x) for x in track]
def transpose(track,deltapitch):
return [translatepitch(x,deltapitch) for x in track]
def retrograde(track):
prefixindex = firstnoteindices(track)[0]
suffixindex = lastnoteindices(track)[0] + 1
prefix, noteevents, suffix = track[:prefixindex], track
[prefixindex: suffixindex], track[suffixindex:]
newnoteevents = [switch(event) for event in reversed(noteevents)]
nextdeltatime = noteevents[-1][0]
for i in range(len(newnoteevents)):
[deltatime, eventtype, channel, parameters] = newnoteevents[i]
newnoteevents[i] = [nextdeltatime, eventtype, channel,
parameters]
nextdeltatime = deltatime
return prefix + newnoteevents + suffix
def sequences(length,elements):
if length==0: return [[]]
return [[x] + ys for x in elements for ys in sequences
(length-1,elements)]
def toggle(notenumber):
on = [0, 1, 0, [notenumber, (2**7)-1]]
off = [300, 0, 0, [notenumber, 0]]
return [on, off]
def eartrainer(notenumbers):
from functools import reduce
endoftrack = [0, 47, []]
track = reduce(lambda x,y: x+y, [toggle(x) for x in notenumbers])
+ [endoftrack]
return [0, [120], [track]]
def makeflashcards(length,lowest,highest):
from os import mkdir
from random import shuffle
mkdir('questions')
mkdir('answers')
notesequences = sequences(length, range(lowest, highest + 1))
shuffle(notesequences)
for i in range(len(notesequences)):
writemidi(eartrainer(notesequences[i]), 'questions/sequence_'
+ str(i) + '.mid')
open('answers/sequence_' + str(i) + '.txt','w').write(' '.join
([notename(x) for x in notesequences[i]]))
def noemptytracks(mididata):
[formattype, timedivision, tracks] = mididata
index = (formattype==1 and not firstnoteindices(tracks[0])) and 1
or 0
temposettings, tracks = tracks[:index], tracks[index:]
newtracks = temposettings + [track for track in tracks if
firstnoteindices(track)]
return [formattype, timedivision, newtracks]
def nocountin(mididata):
[formattype, timedivision, tracks] = mididata
TrackEventOldTime = [(i,j,tracks[i][j][0]) for i in range(len
(tracks)) for j in firstnoteindices(tracks[i])]
starttime = min([t for (i,j,t) in TrackEventOldTime])
TrackEventNewTime = [(i,j,t-starttime) for (i,j,t) in
TrackEventOldTime]
newtracks = tracks[:]
for (i,j,t) in TrackEventNewTime: newtracks[i][j][0] = t
return [formattype, timedivision, newtracks]
def processfiles(directorypath,function):
from os import listdir, mkdir
filenames = listdir(directorypath)
subdirectorypath = directorypath + '/preprocessed'
mkdir(subdirectorypath)
for filename in filenames:
oldfilepath = directorypath + '/' + filename
newfilepath = subdirectorypath + '/' + filename
writemidi(function(readmidi(oldfilepath)),newfilepath)
More information about the Python-list
mailing list