Controlling the passing of data
Sayth Renshaw
flebber.crue at gmail.com
Thu Apr 28 08:02:06 EDT 2016
Hi
This file contains my biggest roadblock with programming and that's the abstract nature of needing to pass data from one thing to the next.
In my file here I needed to traverse and modify the XML file I don't want to restore it or put it in a new variable or other format I just want to alter it and let it flow onto the list comprehensions as they were.
Once I can get on top of this mentally I will be able to do so much better, I think I am trying to manage it in my head as if it was water and plumbing.
In particular here I am taking the id from race and putting it into the children of each race called nomination.
I have put a comment above the new code which is causing the difficulty.
from pyquery import PyQuery as pq
import pandas as pd
import argparse
import numpy as np
# from glob import glob
parser = argparse.ArgumentParser(description=None)
def GetArgs(parser):
"""Parser function using argparse"""
# parser.add_argument('directory', help='directory use',
# action='store', nargs='*')
parser.add_argument("files", nargs="+")
return parser.parse_args()
fileList = GetArgs(parser)
# print(fileList.files)
data = []
horseattrs = ('race_id', 'id', 'horse', 'number', 'finished', 'age', 'sex',
'blinkers', 'trainernumber', 'career', 'thistrack', 'firstup',
'secondup', 'variedweight', 'weight', 'pricestarting')
meetattrs = ('id', 'venue', 'date', 'rail', 'weather', 'trackcondition')
raceattrs = ('id', 'number', 'shortname', 'stage', 'distance',
'grade', 'age', 'weightcondition', 'fastesttime', 'sectionaltime')
clubattrs = ('code')
frames = pd.DataFrame([])
noms = []
for items in fileList.files:
d = pq(filename=items)
meet = d('meeting')
club = d('club')
race = d('race')
res = d('nomination')
# d('p').filter(lambda i: i == 1)
# Here i need to traverse and modify but I don't want to restore the
# structure just pass it on. So I can use it in the following list
# comprehensions as I had before.
for race_el in d('race'):
race = pq(race_el)
race_id = race.attr('id')
for nom_el in race.items('nomination'):
res.append((pq(nom_el).attr('raceid', race_id)))
resdata = [[res.eq(i).attr(x)
for x in horseattrs] for i in range(len(res))]
# print(dataSets)
meetdata = [[meet.eq(i).attr(x)
for x in meetattrs] for i in range(len(meet))]
racedata = [[race.eq(i).attr(x)
for x in raceattrs] for i in range(len(race))]
clubdata = [[club.eq(i).attr(x)
for x in clubattrs] for i in range(len(club))]
raceid = [row[0] for row in racedata]
# L = [x + [0] for x in L]
# print(resdata)
# resdata = [raceid[i] for i in raceid x + i for x in resdata]
# for number of classes equalling nomination in the each category of
# race inset raceid into resdata
#
# print(resdata)
# clubdf = pd.DataFrame(clubdata)
# meetdf = pd.DataFrame(meetdata)
# racedf = pd.DataFrame(racedata)
# resdf = pd.DataFrame(resdata)
# frames = frames.append(clubdf)
# frames = frames.append(meetdf)
#
# frames = frames.append(racedf)
# frames = frames.append(resdf)
# print(frames)
# frames.to_csv('~/testingFrame5.csv', encoding='utf-8')
Thanks
Sayth
More information about the Python-list
mailing list