Small Problem with Weather Underground XML API...

Benjamin Schollnick bschollnick at schollnick.net
Sun Jun 5 18:18:37 EDT 2011


I am working scraping the Weather Underground using the XML interface...  

I am hoping to to add this into the pywapi, but that looks like it's been abandoned?  I haven't seen any updates in ages to it...

And I'm using the Weather Underground XML API (http://wiki.wunderground.com/index.php/API_-_XML)...  And it's working, except something is happening odd with the Forecast portion...

When parsed the Forecast, the Highs & Lows are the same value...

I don't see another approach to this though.  Weather Underground is presenting the same data structure for the forecast, which is why I am breaking it into a list...      I'm not the best expert at XML, but I believe that I have etree working fine...  But not necessarily the best way, Is there a better way to read this via etree?

The only limitation I have is the code has to be python 2.51, due to limitations in the Indigo framework...

The scan_node function scans the individual node, and works fine for the Weather forecast...  but due to the duplicate XML tags in the forecast XML interface, I had to manually break it out into a list...

But this doesn't explain the issue with the high's not being read properly...

Anyone?

WUND_WEATHER_URL	= 'http://api.wunderground.com/auto/wui/geo/WXCurrentObXML/index.xml?query=%s'
WUND_FORECAST_URL	= 'http://api.wunderground.com/auto/wui/geo/ForecastXML/index.xml?query=%s'
WUND_PWS_WEATHER_URL = 'http://api.wunderground.com/weatherstation/WXCurrentObXML.asp?ID=%s'

def	scan_node ( data, node, ns_wund_data_structure):
	for (category, attrs) in ns_wund_data_structure.iteritems():
		if node.tag in attrs:
			for attrsname in attrs:
				if attrsname == node.tag:
					if not(category in data.keys() ):
						#
						#	key not in dictionary, create subdictionary
						#
						data [category] = {}

					if node.text <> None:
						data [category] [node.tag.strip()] = node.text.strip()
	return data

def get_weather_from_wund(location_id, hl = ''):
	url = WUND_WEATHER_URL % (location_id)
	handler = urllib2.urlopen(url)
	tree = parse ( handler)
	handler.close()
	weather_data = {}
	elem = tree.getroot ()
	
	ns_wund_data_structure = { 
		'display_location': ('full', 'city', 'state', 'state_name', 'country', 'zip', 'latitude', 'longitude', 'elevation'),
		'current_observation': ('station_id', 'observation_time', 'observation_time_rfc822', 'local_time', 'local_time_rfc822',
								'local_epoch', 'weather', 'temperature_string', 'temp_f', 'temp_c', 'relative_humidity',
								'wind_string', 'wind_dir', 'wind_degrees', 'wind_mpg', 'wind_gust', 'pressure_string',
								'pressure_mb', 'pressure_in', 'dewpoint_string', 'dewpoint_f', 'dewpoint_c', 
								'heat_index_string', 'heat_index_f', 'heat_index_c', 'windchill_string', 'windchill_f', 
								'windchill_c', 'visibility_mi', 'visibility_km', 'forceast_url','history_url',
								'ob_url', 'icon_url_base', 'icon_url_name', 'icon', 'forecast_url'),
		'icons'				: ('icon_set', 'icon_url', 'icon_url_base', 'icon_url_name', 'icon')
	}		

	for category in ns_wund_data_structure:
		weather_data[category] = {}
		
	for node in elem.getchildren():
		children = node.getchildren()
		if children <> []:
			for subnode in children:
				weather_data = scan_node( weather_data, subnode, ns_wund_data_structure)
					
		else:
			weather_data = scan_node ( weather_data, node, ns_wund_data_structure)
	return weather_data

def	walk_tree (root_node, data, dstructure):
	for node in root_node.getchildren():
		children = node.getchildren()
		if children <> []:
			for subnode in children:
				if subnode.getchildren() <> []:
					walk_tree (subnode, data, dstructure)
				else:
					data = scan_node ( data, subnode, dstructure)
		else:
			data = scan_node ( data, node, dstructure)
	return data
	
def get_forecast_from_wund(location_id, weather_data = None, hl = ''):
	url = WUND_FORECAST_URL % (location_id)
	handler = urllib2.urlopen(url)
	tree = parse ( handler)
	handler.close()
	if weather_data == None:
		weather_data = {}
	elem = tree.getroot ()

	ns_forecast_structure = { 
		'txt_forecast'	: ( 'number', 'forecastday'),
		'high'			: ('fahrenheit', 'celsius'),
		'low'			: ('fahrenheit', 'celsius'),
		'simpleforecast': ('forecastday', 'conditions', 'icon', 'skyicon'),
		'forecastday'	: ('period', 'title', 'fcttext', 'date', 'high', 'low', 'conditions', 'icon', 'skyicon'),
		'date'			: ('epoch', 'pretty_short', 'pretty', 'day', 'month', 'year', 'yday','hour', 'min', 
							'sec', 'isdst', 'monthname', 'weekday_short', 'weekday', 'ampm', 'tz_short', 'tz_long')	}		
	weather_data = walk_tree (elem, weather_data, ns_wund_data_structure)
	weather_data["forecast"] = []
	forecast_data = {}
	forecast_root = tree.find ("//simpleforecast")
	
	for subnode in forecast_root.getchildren():
		forecast_data = {}
		forecast_data = walk_tree (subnode, forecast_data, ns_forecast_structure)
		weather_data["forecast"].append (forecast_data)
		
	return weather_data

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20110605/ddb3d766/attachment.html>


More information about the Python-list mailing list