Source code for omdpb.open_meteo_data

# -*- coding: utf-8 -*-
"""
**Module to access the Open Meteo Data Api of the province of Bolzano**
"""

import sys
import os
import pandas as pd
from omdpb.utils import check_date_format, http_get_api


API_URL = 'http://daten.buergernetz.bz.it/services/meteo/v1/'


[docs]def station_code_lookup(station_code=None): """ *Interactive station code (SCODE) selection*. If ``station_code`` is not specified, the user is asked to enter the desired station on the console. If ``station_code`` is specified, checks if it is valid. Parameters ---------- station_code : str, optional The station identifier. Returns ------- str Returns the selected or specified ``station_code`` if it is valid. Raises ------ TypeError If ``station_code`` is not a str. ValueError If ``station_code`` is not a valid station identifier. AssertionError If the response status (HTML) code is not 200. urllib.error.HTTPError If there is a problem with the connection (internal server error, bad request, ...). Examples -------- >>> station_code_lookup() Asks the user to enter the desired ``station_code`` on the console. >>> station_code_lookup('83200MS') '83200MS' """ metadata = station_metadata() selection = metadata[['SCODE', 'NAME_D', 'NAME_I']] if station_code is None: # print selection to console, make sure to not truncate the print with pd.option_context('display.max_rows', None, 'display.max_columns', None): print(selection) while True: try: # ask user for the station identifier station_code = str(input('Enter the desired ' + 'station code (SCODE): ')) # if the entered id exists, break the console input loop if station_code in set(selection.SCODE): pass else: print('The station code {}'.format(station_code) + ' does not exist, try again.') continue return station_code except KeyboardInterrupt: sys.exit() if not isinstance(station_code, str): raise TypeError('The station_code should be type str,' + ' received {}.'.format(type(station_code))) if station_code not in set(selection.SCODE): raise ValueError('The station code {}'.format(station_code) + ' does not exist.') return station_code
[docs]def sensor_code_lookup(station_code, sensor_code=None): """ *Interactive sensor code (TYPE) selection*. If ``sensor_code`` is not specified, asks user for input. If ``sensor_code`` is specified, checks if it is valid. Check the chapter 'sensor acronyms' in the documentation omdpb.pdf to find a list of the abbreviated sensor names and their meanings. Parameters ---------- station_code : str The station identifier. sensor_code : str, list, -1, optional The sensor identifier(s). Returns ------- list Returns the selected or specified ``sensor_code`` if it is valid. Raises ------ TypeError If ``station_code`` is not a str. ValueError If ``station_code`` is not a valid station identifier. TypeError If ``sensor_code`` is not a str, list or -1. ValueError If any sensor identifier in ``sensor_code`` is not valid. TypeError If any sensor identifier in ``sensor_code`` (list) is not a string. AssertionError If the response status (HTML) code is not 200. urllib.error.HTTPError If there is a problem with the connection (internal server error, bad request, ...). Examples -------- >>> sensor_code_lookup(station_code='83200MS') Asks the user to choose from the sensors of station '83200MS'. >>> sensor_code_lookup(station_code='83200MS', sensor_code='LT') ['LT'] """ # sensors of the station with id station_code selection = sensor_metadata(station_code=station_code) if sensor_code is None: # print selection to console, make sure to not truncate the print with pd.option_context('display.max_rows', None): print('Sensors = ', list(selection.TYPE)) # ask user for the sensor type sensor_code = str(input('Type the name(s) of the sensor(s) or ' + 'press <ENTER> to choose all sensors: ')) # if the input is empty, get all sensors if not sensor_code: return list(selection.TYPE) # get the desired sensors sensor_code = sensor_code.strip(' ').upper().split(' ') # if the entered type is valid, return it if not set(sensor_code).issubset(set(selection.TYPE)): print('The sensor code(s) "{}"'.format(sensor_code) + ' does not exist, try again.') sys.exit() return sensor_code if sensor_code is -1: return list(selection.TYPE) if not isinstance(sensor_code, (str, list)): raise TypeError('The sensor code should be type str or list,' + ' received {}.'.format(type(sensor_code))) if isinstance(sensor_code, list): try: sensor_code = [s.upper() for s in sensor_code] except AttributeError: raise TypeError('Sensor types should be str, received ' + '{}'.format([type(s) for s in sensor_code])) if not set(sensor_code).issubset(set(selection.TYPE)): raise ValueError('One or more of the sensor codes in ' + '{}'.format(sensor_code) + ' do not exist. Existing sensor codes' + ' are:\n {}.'.format(tuple(selection.TYPE))) return sensor_code if isinstance(sensor_code, str): if sensor_code.upper() not in set(selection.TYPE): raise ValueError('The sensor code {}'.format(sensor_code) + ' does not exist. Existing sensor codes' + ' are:\n {}.'.format(tuple(selection.TYPE))) return [sensor_code.upper()]
[docs]def station_metadata(url=API_URL): """ *Lists all available stations and their properties*. Parameters ---------- url : str The url of the API: 'http://daten.buergernetz.bz.it/services/meteo/v1/' Returns ------- pandas.core.frame.DataFrame A DataFrame containing all stations and their properties. Raises ------ AssertionError If the response status (HTML) code is not 200. urllib.error.HTTPError If there is a problem with the connection (internal server error, bad request, ...). Examples -------- >>> station_metadata() ALT LAT ... coordinates type 0 210.00 46.243333 ... [669803.015640121, 5123442.04315501] Point 1 873.99 46.621876 ... [626295.144332811, 5164467.60475602] Point 2 2152.00 46.841700 ... [744723.350800056, 5192575.70046406] Point 3 2260.00 46.615600 ... [688387.444866793, 5165389.11604176] Point 4 2747.00 46.856100 ... [743964.273827689, 5194147.3777763] Point 5 2926.00 46.776700 ... [613580.465752357, 5181424.77411351] Point """ # make the GET request to the API response = http_get_api(url + 'stations') # list of all available stations stations = response['features'] # station metadata # Python3 metadata = pd.DataFrame.from_records( [{**s['properties'], **s['geometry']} for s in stations] ) # Python2 does not support the ** operator for dictionaries # metadata = pd.DataFrame.from_records( # [merge_dicts(s['properties'], s['geometry']) for s in stations] # ) return metadata
[docs]def sensor_metadata(url=API_URL, station_code=None, sensor_code=None): """ *Lists all available sensors of a station*. If ``sensor_code`` is not specified, all sensors of the station with identifier ``station_code`` are listed. Otherwise, only the sensor(s) specified by ``sensor_code`` is returned. Parameters ---------- url : str The url of the API: 'http://daten.buergernetz.bz.it/services/meteo/v1/' station_code : str, optional The station identifier. sensor_code : str, list, optional The sensor identifier(s). Returns ------- pandas.core.frame.DataFrame A DataFrame containing sensors of a station and their properties together with the last recorded values. Raises ------ TypeError If ``station_code`` is not a str. ValueError If ``station_code`` is not a valid station identifier. TypeError If ``sensor_code`` is not a str or list. ValueError If any sensor identifier in ``sensor_code`` is not valid. TypeError If any sensor identifier in ``sensor_code`` (list) is not a string. AssertionError If the response status (HTML) code is not 200. urllib.error.HTTPError If there is a problem with the connection (internal server error, bad request, ...). Examples -------- >>> sensor_metadata() Asks the user to enter the desired ``station_code``. >>> sensor_metadata(station_code='83200MS') DATE DESC_D ... UNIT VALUE 0 2018-09-03T15:00:00CEST Lufttemperatur ... °C 22.90 1 2018-09-03T15:05:00CEST Niederschlag ... mm 0.00 2 2018-09-03T15:00:00CEST Windrichtung ... ° 245.60 3 2018-09-03T15:00:00CEST Windgeschwindigkeit ... m/s 0.87 4 2018-09-03T15:00:00CEST Windgeschwindigkeit Böe ... m/s 3.20 5 2018-09-03T15:00:00CEST relative Luftfeuchte ... % 49.80 6 2018-09-03T15:00:00CEST Luftdruck ... hPa 1012.20 7 2018-09-03T15:00:00CEST Globalstrahlung ... W/m² 611.60 >>> sensor_metadata(station_code='83200MS', sensor_code='LT') DATE DESC_D ... UNIT VALUE 0 2018-09-03T15:00:00CEST Lufttemperatur ... °C 22.9 >>> sensor_metadata(station_code='83200MS', sensor_code=['LT', 'N']) DATE DESC_D ... UNIT VALUE 0 2018-09-06T09:50:00CEST Lufttemperatur ... °C 19.7 1 2018-09-06T09:55:00CEST Niederschlag ... mm 0.0 """ # check if station code is specified station_code = station_code_lookup(station_code) # request to list all sensors of a station request = url + 'sensors' + '?station_code=' + station_code # return all sensors by default if sensor_code is None: request = [request] else: # check if one or more sensors are specified if isinstance(sensor_code, str): # request to list a specific sensor with code sensor_code request = [request + '&sensor_code=' + sensor_code] if isinstance(sensor_code, list): # requests to list metadata of desired sensors request = [request + '&sensor_code=' + s for s in sensor_code] # make the GET request(s) to the API response = [http_get_api(r) for r in request] # sensor(s) metadata together with last observation metadata = [pd.DataFrame.from_dict(s) for s in response] metadata = pd.concat(metadata, axis=0, ignore_index=True, sort=False) return metadata
[docs]def sensor_data(url=API_URL, station_code=None, sensor_code=None, date_from=None, date_to=None, save=False, sformat='csv', path=os.path.expanduser('~\\Documents\\'), filename='omdpb'): """ *Get a time series of the specified sensor at the specified station*. If ``station_code`` and ``sensor_code`` are None, the user is asked to choose them interactively on the console. If ``date_from`` and ``date_to`` are None, a time series of the last 24h before the current datetime is returned. If ``date_from`` or ``date_to`` is None, a time series of the last 24h before (after) ``date_to`` (``date_from``) is returned. If both ``date_from`` and ``date_to`` are specified, a time series of the respective time period is returned. Regarding **sensor_code**, you can either pass a single sensor code as a string or more sensor codes as a list of strings. If you pass -1 to sensor_code, automatically all sensors of the specified station are selected. Allowed DATE_FORMATS = ('%Y%m%d', '%Y%m%d%H%M') Parameters ---------- url : str The url of the API: 'http://daten.buergernetz.bz.it/services/meteo/v1/' station_code : str, optional The station identifier. sensor_code : str, list, optional The sensor identifier(s). date_from : str, optional The starting date. date_to : str, optional The ending date. save : boolean, optional Save time series to file. sformat: str, optional Output file format, either csv or json. path : str, optional Output file path, default is home/<user>/documents/. This should be an absolute path on your system. filename : str, optional Output file name, default is ``station_code``. Returns ------- pandas.core.frame.DataFrame A DataFrame containing a time series (timestamp, value) of the specified sensor. Raises ------ ValueError If ``date_from`` or ``date_to`` is not of type str. ValueError If ``date_from`` or ``date_to`` is not one of the formats in DATE_FORMATS. TypeError If ``station_code`` is not a str. ValueError If ``station_code`` is not a valid station identifier. TypeError If ``sensor_code`` is not a str, list or -1. ValueError If any sensor identifier in ``sensor_code`` is not valid. TypeError If any sensor identifier in ``sensor_code`` (list) is not a string. AssertionError If the response status (HTML) code is not 200. urllib.error.HTTPError If there is a problem with the connection (internal server error, bad request, ...). Examples -------- >>> sensor_data() Asks the user to enter the desired ``station_code`` and ``sensor_code``. Returns a time series of the last 24h before the current datetime. >>> sensor_data(station_code='83200MS', date_from='20180903') Asks the user to enter the desired ``sensor_code`` and returns a time series of the last 24h after ``date_from``. >>> sensor_data(station_code='83200MS', sensor_code='LT', date_to='20180903') Returns a time series of the last 24h before ``date_to`` of sensor 'LT' at station '83200MS'. LT DATE 2018-09-02T00:00:00CEST 15.3 2018-09-02T00:10:00CEST 15.2 2018-09-02T00:20:00CEST 15.1 2018-09-02T00:30:00CEST 15.0 2018-09-02T00:40:00CEST 15.2 >>> sensor_data(station_code='83200MS', sensor_code='LT', date_from='20180801', date_to='20180831') Returns a time series of the specified time period of sensor 'LT' at station '83200MS'. LT DATE 2018-08-01T00:00:00CEST 27.8 2018-08-01T00:10:00CEST 26.8 2018-08-01T00:20:00CEST 26.6 2018-08-01T00:30:00CEST 26.6 2018-08-01T00:40:00CEST 26.6 >>> sensor_data(station_code='83200MS', sensor_code='LT', date_from='20180801', date_to='20180831', save=True) Saves the time series to the file: home/<user>/documents/83200MS.csv """ # check the date formats (if dates are specified), raises a ValueError in # case of a wrong format check_date_format(date_from) check_date_format(date_to) # check if station code is specified station_code = station_code_lookup(station_code) # check if sensor code is specified, if not return all sensors of station sensor_code = sensor_code_lookup(station_code, sensor_code) # request(s) to retrieve timeseries of a specific sensor request = [(url + 'timeseries' + '?station_code=' + station_code + '&sensor_code=' + s) for s in sensor_code] # check if a date range is given # if no date is specified, the API automatically returns the last 24h of # data if date_from is None and date_to is None: pass # check if one date (from, to) is given # if date_from is given, the API returns the last 24h from this date # onwards # if date_to is given, the API returns the last 24h before this date elif date_from is None or date_to is None: if date_from is None: request = [r + '&date_to=' + date_to for r in request] else: request = [r + '&date_from=' + date_from for r in request] # if both dates are specified, the API returns the time period in between else: request = [r + '&date_from=' + date_from + '&date_to=' + date_to for r in request] print('Downloading data from Api ...') # make the GET request(s) to the API response = [http_get_api(r) for r in request] # convert the response to a DataFrame if it is not empty time_series = [pd.DataFrame(r) for r in response if r] # check if the time series is empty (no data available for selected dates) if not time_series: # return an empty DataFrame return pd.DataFrame(time_series) # set the DataFrame index to the sampling dates time_series = [ts.set_index('DATE') for ts in time_series] # rename the columns of the DataFrame to the sensor names time_series = [t.rename(index=str, columns={'VALUE': sensor_code[i]}) for i, t in enumerate(time_series)] # sort the time series by their length in case of different sampling # intervals of the sensors time_series.sort(key=len) # merge listed data frames and sort them by index time_series = pd.concat(time_series, axis=1, ignore_index=False, sort=True) # just return the time series if saving is not required if not save: return time_series # if no filename is specified, set it to the station_code if filename == 'omdpb': filename = station_code # save the time series to file: # home/<user>/documents/<station_code>.sformat if sformat == 'csv': time_series.to_csv(path + filename + '.csv', sep=',', encoding='utf-8') else: time_series.to_json(path + filename + '.json', orient='columns') return time_series