[python-win32] Re: generating data plots.

Wed, 20 Mar 2002 08:58:39 -0600

This is a multi-part message in MIME format.
--------------010307070106070508040508
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit

My advice would be to use the PSDraw module (part of PIL) to output 
postscript and then convert that to pdf with ghostscript. I used the pay 
version of Acrobat for the conversion so if you've got that or want to 
lay out the $ then that's a good option. You might want to get a book on 
postscript, it is actually a pretty high-level drawing language. The 
PSDraw module though simplifies things a little bit. Attached is a 
script I used to get information from a SQL Server database trace about 
when locks were happening so I could see the contention between threads. 
I generated a massive graph which spanned like 30 pages which I taped 
together and put up on the wall. In the end the graph didn't help us 
that much but it was cool to build and worked great.

Mark Ainsworth wrote:

>Can anyone point me to the right site/pages to read to figure out what
>packages I need to be able to read a text file of data, convert this to
>a simple line graph and then save it in a PDF file.
>
>Thanks.
>
>Mark Ainsworth
>Quality Manager
>Sundance Multiprocessor Technology Ltd.
>Chiltern House, Waterside, Chesham, Bucks HP5 1PS
>Tel. +44 1494 793167 Fax. +44 1494 793168
>MarkA@sundance.com http://www.sundance.com
>
>_______________________________________________
>Python-win32 mailing list
>Python-win32@python.org
>http://mail.python.org/mailman/listinfo/python-win32
>

-- 
Jens B. Jorgensen
jens.jorgensen@tallan.com

--------------010307070106070508040508
Content-Type: text/plain;
 name="big_graph.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="big_graph.py"

from win32com.client import Dispatch
import ADO, sys, time, os, re

sys.path.append('c:\\py21\\DLLs')
sys.path.append('c:\\py21\\PIL')

import PSDraw

# draw the time lines
margin = 0.25 # leave a quarter inch margin all around
page_cols = 5 # five time lines per page
vert_pages = 4 # four pages long on the lines

tl_width = (8.5 - margin * 2) / page_cols

def inch2pts(v) :
    return v * 72

def draw_lines(ps) :
    start_y = 11 - margin
    end_y = margin

    x = margin
    for i in xrange(0, page_cols) :
        ps.line((inch2pts((margin) + i * tl_width), inch2pts(start_y)),
                (inch2pts((margin) + i * tl_width), inch2pts(end_y)))

class EventClass :
    SQL_StmtStarting = 40
    SQL_StmtCompleted = 41
    Lock_Timeout = 27
    Object_Opened = 48
    Object_Closed = 49

cn = ADO.Connection()
cn.Provider = 'sqloledb'
cn.Open('Server=XXXXXX', 'sa', 'XXXXX')

# get the object names for Object_Opens so we can map them when we have a
# LockTimeout
(rs, rows) = cn.Execute("select ObjectID, TextData from c2ctrace where EventClass = 48 and EventSubClass = 8277")
object_ids = {}
while not rs.EOF :
    object_ids[rs.Fields.Item(0).Value] = rs.Fields.Item(1).Value
    rs.MoveNext()

def get_objectid(id) :
    try :
        return object_ids[id]
    except :
        return '<unknown>'

# based on the number of pages we're drawing over we need to determine what
# time quantity will go on each page. so, we need to select the min and max
# values of the time
(rs, rows) = cn.Execute("select min(StartTime), max(StartTime) from c2ctrace where EventClass = 40 or EventClass = 27 and SPID is not NULL")

min_time = float(rs.Fields.Item(0).Value)
max_time = float(rs.Fields.Item(1).Value)
rs.Close()

# we adjust the min_time and max_time so that we get 0.5 inch of space at the top
# and bottom of the timelines
buffer_time = 0.5 * (max_time - min_time) / (vert_pages * (11 - 2 * margin))
min_time -= buffer_time
max_time += buffer_time

time_per_page = (max_time - min_time) / vert_pages

# first get the unique SPIDS
(rs, rows) = cn.Execute("select distinct(SPID) from c2ctrace")

spids = []
while not rs.EOF :
    if rs.Fields.Item(0).Value != None :
        spids.append(rs.Fields.Item(0).Value)
    rs.MoveNext()
rs.Close()

# now that we know how many SPIDs there are we can set up the PSDraw instances.
# we do each page individually so it's easier for us to do with the SQL as it's
# coming back. the variable pages is a list of lists of PSDraw instances. Once
# it is created thus we can access it like pages[1][2]
pages = []
for i in xrange(0, (len(spids) + (page_cols-1)) / page_cols) :
    col = []
    for j in xrange(0, vert_pages) :
        ps = PSDraw.PSDraw(open('c:\\temp\\trace_%d-%d.ps' % (i+1,j+1), 'wb'))
        ps.begin_document()

        # draw the time-lines
        ps.setfont('Times-Roman', 12)
        draw_lines(ps)

        # draw the page coordinates in the lower-right corner
        ps.text((inch2pts(8.5-margin-0.25), inch2pts(margin)), '(%d, %d)' % (i+1,j+1))

        col.append(ps)
    pages.append(col)

# now the output pages are initialized we can go through and get our data
# and render it onto the pages
for i in xrange(0, len(spids)) :
    (rs, rows) = cn.Execute("select StartTime, TextData, EventClass, ObjectID from c2ctrace where (EventClass = 40 or EventClass = 27) and SPID = %s order by StartTime" % spids[i])

    # we can figure out here which column of pages and which column on that page
    # we're dealing with
    page_col = pages[i / page_cols]
    col = i % page_cols

    # while we're at it we can get the base x value where the timeline is
    # drawn
    x_ref = margin + col * tl_width

    # write our spid at the top of the timeline
    page_col[0].text((inch2pts(x_ref+0.1), inch2pts(11 - margin) - 12), "SPID %s" % spids[i])

    while not rs.EOF :
        s = rs.Fields.Item(1).Value
        if s == 'IF @@TRANCOUNT > 0' :
            rs.MoveNext()
            continue

        # we need to take the start time and figure out from it what page row
        # we're on
        timestamp = float(rs.Fields.Item(0).Value)
        timestamp -= min_time # adjust for the origin of our time
        secs = timestamp
        page_row = int(timestamp / time_per_page)
        if page_row >= len(page_col) :
            page_row = len(page_col) - 1

        # now adjust the timestamp value for intra-page placement
        timestamp -= page_row * time_per_page
        y_ref = (11 - 2 * margin) * (1.0 - timestamp / time_per_page) + margin

        # draw a short horizontal line to mark this spot
        page_col[page_row].line((inch2pts(x_ref), inch2pts(y_ref)),
                                       (inch2pts(x_ref+0.125), inch2pts(y_ref)))

        # write the timestamp, and following lines
        lines = []
        lines.append('%0.3f' % (secs * 24 * 60 * 60))

        if rs.Fields.Item(2).Value == '40' :
            s = rs.Fields.Item(1).Value
            s = s[0:40] # just the first (no more than) 40 chars
            #print repr(s)
            lines.extend(re.split('[\\r\\n]+', s))
            #f.write("%s %s\n" % (time.ctime(int(rs.Fields.Item(0).Value)), ))
        elif rs.Fields.Item(2).Value == '27' :
            lines.append("LockTimeout:")
            lines.append(str(get_objectid(rs.Fields.Item(3).Value)))
            #f.write("%s LockTimeout\n" % time.ctime(int(rs.Fields.Item(0).Value)))

        x = x_ref + 0.1
        y = y_ref + 2.0/72.0 # just to get it above the horizontal line
        for l in lines :
            page_col[page_row].text((inch2pts(x), inch2pts(y)), l)
            y -= 12.0 / 72.0

        rs.MoveNext()

# now go through all the pages and finish them up
for i in pages :
    for j in i :
        j.end_document()

# fini!

--------------010307070106070508040508--