# -*- coding: utf-8 -*-
"""
**Module to access the Open Meteo Data Api of the province of Bolzano**
"""
import sys
import os
import pandas as pd
from omdpb.utils import check_date_format, http_get_api
API_URL = 'http://daten.buergernetz.bz.it/services/meteo/v1/'
[docs]def station_code_lookup(station_code=None):
""" *Interactive station code (SCODE) selection*.
If ``station_code`` is not specified, the user is asked to enter the
desired station on the console. If ``station_code`` is specified, checks if
it is valid.
Parameters
----------
station_code : str, optional
The station identifier.
Returns
-------
str
Returns the selected or specified ``station_code`` if it is valid.
Raises
------
TypeError
If ``station_code`` is not a str.
ValueError
If ``station_code`` is not a valid station identifier.
AssertionError
If the response status (HTML) code is not 200.
urllib.error.HTTPError
If there is a problem with the connection (internal server error,
bad request, ...).
Examples
--------
>>> station_code_lookup()
Asks the user to enter the desired ``station_code`` on the console.
>>> station_code_lookup('83200MS')
'83200MS'
"""
metadata = station_metadata()
selection = metadata[['SCODE', 'NAME_D', 'NAME_I']]
if station_code is None:
# print selection to console, make sure to not truncate the print
with pd.option_context('display.max_rows', None,
'display.max_columns', None):
print(selection)
while True:
try:
# ask user for the station identifier
station_code = str(input('Enter the desired ' +
'station code (SCODE): '))
# if the entered id exists, break the console input loop
if station_code in set(selection.SCODE):
pass
else:
print('The station code {}'.format(station_code) +
' does not exist, try again.')
continue
return station_code
except KeyboardInterrupt:
sys.exit()
if not isinstance(station_code, str):
raise TypeError('The station_code should be type str,' +
' received {}.'.format(type(station_code)))
if station_code not in set(selection.SCODE):
raise ValueError('The station code {}'.format(station_code) +
' does not exist.')
return station_code
[docs]def sensor_code_lookup(station_code, sensor_code=None):
""" *Interactive sensor code (TYPE) selection*.
If ``sensor_code`` is not specified, asks user for input.
If ``sensor_code`` is specified, checks if it is valid.
Check the chapter 'sensor acronyms' in the documentation omdpb.pdf
to find a list of the abbreviated sensor names and their meanings.
Parameters
----------
station_code : str
The station identifier.
sensor_code : str, list, -1, optional
The sensor identifier(s).
Returns
-------
list
Returns the selected or specified ``sensor_code`` if it is valid.
Raises
------
TypeError
If ``station_code`` is not a str.
ValueError
If ``station_code`` is not a valid station identifier.
TypeError
If ``sensor_code`` is not a str, list or -1.
ValueError
If any sensor identifier in ``sensor_code`` is not valid.
TypeError
If any sensor identifier in ``sensor_code`` (list) is not a string.
AssertionError
If the response status (HTML) code is not 200.
urllib.error.HTTPError
If there is a problem with the connection (internal server error,
bad request, ...).
Examples
--------
>>> sensor_code_lookup(station_code='83200MS')
Asks the user to choose from the sensors of station '83200MS'.
>>> sensor_code_lookup(station_code='83200MS', sensor_code='LT')
['LT']
"""
# sensors of the station with id station_code
selection = sensor_metadata(station_code=station_code)
if sensor_code is None:
# print selection to console, make sure to not truncate the print
with pd.option_context('display.max_rows', None):
print('Sensors = ', list(selection.TYPE))
# ask user for the sensor type
sensor_code = str(input('Type the name(s) of the sensor(s) or ' +
'press <ENTER> to choose all sensors: '))
# if the input is empty, get all sensors
if not sensor_code:
return list(selection.TYPE)
# get the desired sensors
sensor_code = sensor_code.strip(' ').upper().split(' ')
# if the entered type is valid, return it
if not set(sensor_code).issubset(set(selection.TYPE)):
print('The sensor code(s) "{}"'.format(sensor_code) +
' does not exist, try again.')
sys.exit()
return sensor_code
if sensor_code is -1:
return list(selection.TYPE)
if not isinstance(sensor_code, (str, list)):
raise TypeError('The sensor code should be type str or list,' +
' received {}.'.format(type(sensor_code)))
if isinstance(sensor_code, list):
try:
sensor_code = [s.upper() for s in sensor_code]
except AttributeError:
raise TypeError('Sensor types should be str, received ' +
'{}'.format([type(s) for s in sensor_code]))
if not set(sensor_code).issubset(set(selection.TYPE)):
raise ValueError('One or more of the sensor codes in ' +
'{}'.format(sensor_code) +
' do not exist. Existing sensor codes' +
' are:\n {}.'.format(tuple(selection.TYPE)))
return sensor_code
if isinstance(sensor_code, str):
if sensor_code.upper() not in set(selection.TYPE):
raise ValueError('The sensor code {}'.format(sensor_code) +
' does not exist. Existing sensor codes' +
' are:\n {}.'.format(tuple(selection.TYPE)))
return [sensor_code.upper()]
[docs]def sensor_data(url=API_URL, station_code=None, sensor_code=None,
date_from=None, date_to=None, save=False, sformat='csv',
path=os.path.expanduser('~\\Documents\\'),
filename='omdpb'):
""" *Get a time series of the specified sensor at the specified station*.
If ``station_code`` and ``sensor_code`` are None, the user is asked to
choose them interactively on the console. If ``date_from`` and ``date_to``
are None, a time series of the last 24h before the current datetime is
returned. If ``date_from`` or ``date_to`` is None, a time series of the
last 24h before (after) ``date_to`` (``date_from``) is returned. If both
``date_from`` and ``date_to`` are specified, a time series of the
respective time period is returned.
Regarding **sensor_code**, you can either pass a single sensor code as a
string or more sensor codes as a list of strings. If you pass -1 to
sensor_code, automatically all sensors of the specified station are
selected.
Allowed DATE_FORMATS = ('%Y%m%d', '%Y%m%d%H%M')
Parameters
----------
url : str
The url of the API: 'http://daten.buergernetz.bz.it/services/meteo/v1/'
station_code : str, optional
The station identifier.
sensor_code : str, list, optional
The sensor identifier(s).
date_from : str, optional
The starting date.
date_to : str, optional
The ending date.
save : boolean, optional
Save time series to file.
sformat: str, optional
Output file format, either csv or json.
path : str, optional
Output file path, default is home/<user>/documents/. This should be
an absolute path on your system.
filename : str, optional
Output file name, default is ``station_code``.
Returns
-------
pandas.core.frame.DataFrame
A DataFrame containing a time series (timestamp, value)
of the specified sensor.
Raises
------
ValueError
If ``date_from`` or ``date_to`` is not of type str.
ValueError
If ``date_from`` or ``date_to`` is not one of the
formats in DATE_FORMATS.
TypeError
If ``station_code`` is not a str.
ValueError
If ``station_code`` is not a valid station identifier.
TypeError
If ``sensor_code`` is not a str, list or -1.
ValueError
If any sensor identifier in ``sensor_code`` is not valid.
TypeError
If any sensor identifier in ``sensor_code`` (list) is not a string.
AssertionError
If the response status (HTML) code is not 200.
urllib.error.HTTPError
If there is a problem with the connection (internal server error,
bad request, ...).
Examples
--------
>>> sensor_data()
Asks the user to enter the desired ``station_code`` and ``sensor_code``.
Returns a time series of the last 24h before the current datetime.
>>> sensor_data(station_code='83200MS', date_from='20180903')
Asks the user to enter the desired ``sensor_code`` and returns a time
series of the last 24h after ``date_from``.
>>> sensor_data(station_code='83200MS', sensor_code='LT',
date_to='20180903')
Returns a time series of the last 24h before ``date_to`` of sensor 'LT'
at station '83200MS'.
LT
DATE
2018-09-02T00:00:00CEST 15.3
2018-09-02T00:10:00CEST 15.2
2018-09-02T00:20:00CEST 15.1
2018-09-02T00:30:00CEST 15.0
2018-09-02T00:40:00CEST 15.2
>>> sensor_data(station_code='83200MS', sensor_code='LT',
date_from='20180801', date_to='20180831')
Returns a time series of the specified time period of sensor 'LT'
at station '83200MS'.
LT
DATE
2018-08-01T00:00:00CEST 27.8
2018-08-01T00:10:00CEST 26.8
2018-08-01T00:20:00CEST 26.6
2018-08-01T00:30:00CEST 26.6
2018-08-01T00:40:00CEST 26.6
>>> sensor_data(station_code='83200MS', sensor_code='LT',
date_from='20180801', date_to='20180831',
save=True)
Saves the time series to the file: home/<user>/documents/83200MS.csv
"""
# check the date formats (if dates are specified), raises a ValueError in
# case of a wrong format
check_date_format(date_from)
check_date_format(date_to)
# check if station code is specified
station_code = station_code_lookup(station_code)
# check if sensor code is specified, if not return all sensors of station
sensor_code = sensor_code_lookup(station_code, sensor_code)
# request(s) to retrieve timeseries of a specific sensor
request = [(url + 'timeseries' + '?station_code=' + station_code +
'&sensor_code=' + s) for s in sensor_code]
# check if a date range is given
# if no date is specified, the API automatically returns the last 24h of
# data
if date_from is None and date_to is None:
pass
# check if one date (from, to) is given
# if date_from is given, the API returns the last 24h from this date
# onwards
# if date_to is given, the API returns the last 24h before this date
elif date_from is None or date_to is None:
if date_from is None:
request = [r + '&date_to=' + date_to for r in request]
else:
request = [r + '&date_from=' + date_from for r in request]
# if both dates are specified, the API returns the time period in between
else:
request = [r + '&date_from=' + date_from
+ '&date_to=' + date_to for r in request]
print('Downloading data from Api ...')
# make the GET request(s) to the API
response = [http_get_api(r) for r in request]
# convert the response to a DataFrame if it is not empty
time_series = [pd.DataFrame(r) for r in response if r]
# check if the time series is empty (no data available for selected dates)
if not time_series:
# return an empty DataFrame
return pd.DataFrame(time_series)
# set the DataFrame index to the sampling dates
time_series = [ts.set_index('DATE') for ts in time_series]
# rename the columns of the DataFrame to the sensor names
time_series = [t.rename(index=str, columns={'VALUE': sensor_code[i]})
for i, t in enumerate(time_series)]
# sort the time series by their length in case of different sampling
# intervals of the sensors
time_series.sort(key=len)
# merge listed data frames and sort them by index
time_series = pd.concat(time_series, axis=1,
ignore_index=False,
sort=True)
# just return the time series if saving is not required
if not save:
return time_series
# if no filename is specified, set it to the station_code
if filename == 'omdpb':
filename = station_code
# save the time series to file:
# home/<user>/documents/<station_code>.sformat
if sformat == 'csv':
time_series.to_csv(path + filename + '.csv',
sep=',', encoding='utf-8')
else:
time_series.to_json(path + filename + '.json',
orient='columns')
return time_series