Another fix for Weather.com API changes (#559)
It appears that the page data loads in one of two ways. I haven't figured out what causes either, but this makes it so that we detect both methods and load the relevant data either way.
This commit is contained in:
parent
79e90260a8
commit
5bf3b2b32a
@ -40,25 +40,79 @@ class WeathercomHTMLParser(HTMLParser):
|
|||||||
exc_info=True
|
exc_info=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def load_json(self, json_input):
|
||||||
|
self.logger.debug('Loading the following data as JSON: %s', json_input)
|
||||||
|
try:
|
||||||
|
return json.loads(json_string)
|
||||||
|
except json.decoder.JSONDecodeError as exc:
|
||||||
|
self.logger.debug('Error loading JSON: %s', exc)
|
||||||
|
self.logger.debug('String that failed to load: %s', json_input)
|
||||||
|
return None
|
||||||
|
|
||||||
def handle_data(self, content):
|
def handle_data(self, content):
|
||||||
|
'''
|
||||||
|
Sometimes the weather data is set under an attribute of the "window"
|
||||||
|
DOM object. Sometimes it appears as part of a javascript function.
|
||||||
|
Catch either possibility.
|
||||||
|
'''
|
||||||
|
if self.weather_data is not None:
|
||||||
|
# We've already found weather data, no need to continue parsing
|
||||||
|
return
|
||||||
|
content = content.strip().rstrip(';')
|
||||||
try:
|
try:
|
||||||
tag_text = self.get_starttag_text().lower()
|
tag_text = self.get_starttag_text().lower()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
tag_text = ''
|
tag_text = ''
|
||||||
if tag_text.startswith('<script'):
|
if tag_text.startswith('<script'):
|
||||||
|
# Look for feed information embedded as a javascript variable
|
||||||
|
begin = content.find('window.__data')
|
||||||
|
if begin != -1:
|
||||||
|
self.logger.debug('Located window.__data')
|
||||||
|
# Look for end of JSON dict and end of javascript statement
|
||||||
|
end = content.find('};', begin)
|
||||||
|
if end == -1:
|
||||||
|
self.logger.debug('Failed to locate end of javascript statement')
|
||||||
|
else:
|
||||||
|
# Strip the "window.__data=" from the beginning
|
||||||
|
json_data = self.load_json(
|
||||||
|
content[begin:end + 1].split('=', 1)[1].lstrip()
|
||||||
|
)
|
||||||
|
if json_data is not None:
|
||||||
|
def _find_weather_data(data):
|
||||||
|
'''
|
||||||
|
Helper designed to minimize impact of potential
|
||||||
|
structural changes to this data.
|
||||||
|
'''
|
||||||
|
if isinstance(data, dict):
|
||||||
|
if 'observation' in data and 'dailyForecast' in data:
|
||||||
|
return data
|
||||||
|
else:
|
||||||
|
for key in data:
|
||||||
|
ret = _find_weather_data(data[key])
|
||||||
|
if ret is not None:
|
||||||
|
return ret
|
||||||
|
return None
|
||||||
|
|
||||||
|
weather_data = _find_weather_data(json_data)
|
||||||
|
if weather_data is None:
|
||||||
|
self.logger.debug(
|
||||||
|
'Failed to locate weather data in the '
|
||||||
|
'following data structure: %s', json_data
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.weather_data = weather_data
|
||||||
|
return
|
||||||
|
|
||||||
for line in content.splitlines():
|
for line in content.splitlines():
|
||||||
line = line.strip()
|
line = line.strip().rstrip(';')
|
||||||
if line.startswith('var adaptorParams'):
|
if line.startswith('var adaptorParams'):
|
||||||
# Strip off the "var adaptorParams = " from the beginning,
|
# Strip off the "var adaptorParams = " from the beginning,
|
||||||
# and the javascript semicolon from the end. This will give
|
# and the javascript semicolon from the end. This will give
|
||||||
# us JSON that we can load.
|
# us JSON that we can load.
|
||||||
line = line.split('=', 1)[1].lstrip().rstrip(';')
|
weather_data = self.load_json(line.split('=', 1)[1].lstrip())
|
||||||
self.logger.debug('Loading the following data as JSON: %s', line)
|
if weather_data is not None:
|
||||||
try:
|
self.weather_data = weather_data
|
||||||
self.weather_data = json.loads(line)
|
return
|
||||||
except json.decoder.JSONDecodeError as exc:
|
|
||||||
self.logger.error('Error loading JSON: %s', exc)
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
class Weathercom(WeatherBackend):
|
class Weathercom(WeatherBackend):
|
||||||
|
Loading…
Reference in New Issue
Block a user