[Tutor] Problem with logic while extracting data from binary file

Wed Mar 26 20:01:17 CET 2008

Please always reply to the list not just me.

Bryan Fodness wrote:
>
> Thanks Bob,
>  
> I was having trouble with that loop from the start.  Could you tell me 
> what a=3 is doing, I cannot seem to figure it out.
I accidentally left that in. It was a place for me to set a breakpoint 
in the debugger.

I also failed to point out that I made the creation of the dictionary 
"search" straightforward rather than collecting all the keys and values. 
However neither your code or mine handles the multi-value cases (e.g. 
\n0\x82). To solve that I changed search to be of type defaultdict so 
each new element is a list and we can use append() to add values to the 
lists. The printed output now looks like lists but that is easy to alter.

In this version I also "import time", and changed the names start and 
time so as not to conflict (even in appearance) with other names.

Also in this version I modified parseDataElement to be a generator (the 
quickest way I could think of to solve the while loop problem) and added 
a for loop in parseDICOM to access the generator. This also required 
adding "start = pos" at the end of the while loop.

BUT ... on further inspection of your program and data I note that there 
is NOTHING that looks for \n0\x82. It sits in the middle of a block of 
data of length 544. There needs to be some code to examine the contents 
of that block and look for \n0\x82.

Also discovered and fixed "name 'LeafJawPostions' is not defined".

Code follows 
------------------------------------------------------------------

import struct
import time
import collections
print "Creating Variables...\n"
startTime = time.clock()
rawData = open('file.dcm', 'rb').read()
# Need to get Transfer Syntax UID (0002,0010) for encoding type

def parseDICOM(data):
    search = collections.defaultdict(list)
    try:
        preamble, next = parsePreamble(data)
    except NotAPreambleException:
        preamble, next = None, 0
    while next < len(data):
        for element, next, value, length in parseDataElement(data, next):
            if element.startswith('\n0') and element.endswith('\x00') 
and element[2] in ('\x10', '@', 'p', '\xb0'):
                start = 0
                while  start < length:
                    element, start, svalue = parseSequence(value, start)
                    search[element].append(svalue)
            else:
                search[element].append(value)
    return search

def parsePreamble(data):
    preamble = data[:128]
    dicm = data[128:132]
    if dicm == 'DICM':
        return preamble, 132
    else:
        raise NotAPreambleException

def parseMetaElementGL(data):
    vl_field = data[138:140]
    length = struct.unpack('h', vl_field)[0]
    value = struct.unpack('hh',data[140:(140+length)])[0]
    return value

def parseDataElement(data, start):
    if start < (144 + parseMetaElementGL(rawData)):
        group_num = data[start:start+2]
        element_num = data[start+2:start+4]
        vr_field = data[start+4:start+6]
        if vr_field in ('UN', 'SQ', 'OB','OW'):
            unused = data[start+6:start+8]
            vl_field = data[start+8:start+12]
            length = struct.unpack('hh', vl_field)[0]       # 4-byte
            value = data[start+12:(start+12+length)]
            pos = start+12+length
            element = (group_num+element_num)
            yield element, pos, value, length
        else:
            vl_field = data[start+6:start+8]
            length = struct.unpack('h', vl_field)[0]        # 2-byte
            value = data[start+8:(start+8+length)]
            pos = start+8+length
            element = (group_num+element_num)
            yield element, pos, value, length
    else:
        while start < len(data):
            group_num = data[start:start+2]
            element_num = data[start+2:start+4]
            vl_field = data[start+4:start+8]       
            length = struct.unpack('hh', vl_field)[0]
            value = data[start+8:(start+8+length)]
            pos = start+8+length
            element = (group_num+element_num)
            yield element, pos, value, length
            start = pos
        else:
            print "End of File"

def parseSequence(data, start):
    group_num = data[start:start+2]
    element_num = data[start+2:start+4]
    vl_field = data[start+4:start+8]
    length = struct.unpack('hh', vl_field)[0]
    value = data[start+8:(start+8+length)]
    pos = start+8+length
    element = (group_num+element_num)
    if element == '\xfe\xff\x00\xe0':
        start = start+8
        group_num = data[start:start+2]
        element_num = data[start+2:start+4]
        vl_field = data[start+4:start+8]
        length = struct.unpack('hh', vl_field)[0]
        value = data[start+8:(start+8+length)]
        pos = start+8+length
        element = (group_num+element_num)
        if element == '\xfe\xff\x00\xe0':
            start = start+8
            group_num = data[start:start+2]
            element_num = data[start+2:start+4]
            vl_field = data[start+4:start+8]
            length = struct.unpack('hh', vl_field)[0]
            value = data[start+8:(start+8+length)]
            pos = start+8+length
            element = (group_num+element_num)
            return  element, pos, value
        else:
            return  element, pos, value
    else:
         return  element, pos, value

# Access variables for use in calculation
parsedData = parseDICOM(rawData)
InstanceCreationDate = parsedData['\x08\x00\x12\x00']
InstanceCreationTime = parsedData['\x08\x00\x13\x00']
PatientsName = parsedData['\x10\x00\x10\x00']
RTPlanLabel = parsedData['\n0\x02\x00']
DoseReferenceDescription = parsedData['\n0\x16\x00']
ToleranceTableLabel = parsedData['\n0C\x00']
NumberOfBeams = int(parsedData['\n0\x80\x00'][0])
BeamDoseSpecificationPoint = parsedData['\n0\x82']

print "Instance Creation Date\t\t\t=\t%s" %InstanceCreationDate
print "Instance Creation Time\t\t\t=\t%s" %InstanceCreationTime
print "Patients Name\t\t\t\t=\t%s" %PatientsName
print "RT Plan Label\t\t\t\t=\t%s" %RTPlanLabel
print "DoseReference Description\t\t=\t%s" %DoseReferenceDescription
print "Tolerance Table Label\t\t\t=\t%s" %ToleranceTableLabel
print "Number Of Beams\t\t\t\t=\t%i" %NumberOfBeams
print "Beam Dose Specification Point\t\t=\t%s" % BeamDoseSpecificationPoint

end = time.clock()
runTime = end - startTime

print "\nVariables created in %.3f seconds\n" % runTime

-- 
Bob Gailer
919-636-4239 Chapel Hill, NC