Source code for pyiem.windrose_utils

"""util script to call `windrose` package"""

from calendar import month_abbr
from datetime import datetime, timezone
from zoneinfo import ZoneInfo

import numpy as np
import pandas as pd
from metpy.units import units as mpunits

# Local
from pyiem.database import get_sqlalchemy_conn, sql_helper
from pyiem.network import Table as NetworkTable
from pyiem.plot.util import fitbox
from pyiem.plot.windrose import (
    PLOT_CONVENTION_FROM,
    WindrosePlot,
    histogram,
    plot,
)
from pyiem.util import utc

WINDUNITS = {
    "mph": {
        "label": "miles per hour",
        "units": mpunits("mph"),
        "bins": (2, 5, 7, 10, 15, 20),
    },
    "kts": {
        "label": "knots",
        "units": mpunits("knots"),
        "bins": (2, 5, 7, 10, 15, 20),
    },
    "mps": {
        "label": "meters per second",
        "units": mpunits("meter / second"),
        "bins": (1, 4, 6, 8, 10, 12),
    },
    "kph": {
        "label": "kilometers per hour",
        "units": mpunits("kilometer / hour"),
        "bins": (4, 10, 14, 20, 30, 40),
    },
}
RAOB_BINS = {
    "mph": [2, 25, 50, 75, 100, 150],
    "kts": [2, 25, 50, 75, 100, 15],
    "mps": [1, 10, 15, 25, 50, 75],
    "kph": [4, 10, 14, 20, 30, 40],
}


def _make_timelimit_string(kwargs):
    """Generate a string for the time limiters"""
    hours = kwargs.get("hours", range(24))
    months = kwargs.get("months", range(1, 13))
    limit_by_doy = kwargs.get("limit_by_doy", False)
    if len(hours) == 24 and len(months) == 12 and not limit_by_doy:
        return ""
    parts = []
    if limit_by_doy:
        parts.append(f"{kwargs['sts']:%b %-d} - {kwargs['ets']:%b %-d}")
    elif months is not None and len(months) < 12:
        for h in months:
            parts.append(f"{month_abbr[h]}")  # noqa
    if hours is not None and len(hours) != 24:
        if len(hours) > 4:
            parts.append(
                f"{datetime(2000, 1, 1, hours[0]):%-I %p}-"
                f"{datetime(2000, 1, 1, hours[-1]):%-I %p}"
            )
        else:
            for h in hours:
                parts.append(f"{datetime(2000, 1, 1, h):%-I %p}")  # noqa
    return f" ↳ constraints: {', '.join(parts)}"


def _get_data(station, **kwargs):
    """Helper function to get data out of IEM databases

    Args:
      station (str): the station identifier
      database (str): the name of the database to connect to, we assume we can
        then query a table called `alldata`
      sts (datetime): the floor to query data for
      ets (datetime): the ceiling to query data for
      monthinfo (dict): information on how to query for months
      hourinfo (dict): information on how to query for hours
      level (int): in case of RAOB, which pressure level (hPa)

    Returns:
      pandas.DataFrame of the data
    """
    rlimiter = ""
    sts = kwargs.get("sts")
    ets = kwargs.get("ets")
    database = kwargs.get("database", "asos")
    if database == "asos":
        rlimiter = " and report_type = 3 "
    tlimit = ""
    sqlargs = {}
    tzname = kwargs.get("tzname")
    te = "" if tzname is None else f" at time zone '{tzname}'"
    if kwargs.get("limit_by_doy", False):
        tlimit = (
            f"and to_char(valid{te}, 'mmdd') >= '{sts:%m%d}' and "
            f"to_char(valid{te}, 'mmdd') < '{ets:%m%d}' "
        )
        if sts.strftime("%m%d") > ets.strftime("%m%d"):
            tlimit = (
                f"and (to_char(valid{te}, 'mmdd') >= '{sts:%m%d}' or "
                f"to_char(valid{te}, 'mmdd') < '{ets:%m%d}') "
            )
    elif kwargs.get("months") is not None and len(kwargs["months"]) < 12:
        sqlargs["months"] = kwargs["months"]
        tlimit += f" and extract(month from valid{te}) = ANY(:months) "
    # allowed combination with the above
    if kwargs.get("hours") is not None and len(kwargs["hours"]) < 24:
        sqlargs["hours"] = kwargs["hours"]
        tlimit += f" and extract(hour from valid{te}) = ANY(:hours) "
    sql = sql_helper(
        """
        SELECT sknt, drct, valid at time zone 'UTC' as valid
        from alldata WHERE station = :station
        and valid > :sts and valid < :ets and sknt >= 0 and drct >= 0
        {tlimit} {rlimiter}
        """,
        tlimit=tlimit,
        rlimiter=rlimiter,
    )
    if database == "rwis":
        sql = sql_helper(
            """
            SELECT sknt, drct, valid at time zone 'UTC' as valid
            from alldata a JOIN stations t on (a.iemid = t.iemid)
            WHERE t.id = :station and t.network ~* 'RWIS'
            and valid > :sts and valid < :ets and sknt >= 0 and drct >= 0
            {tlimit} {rlimiter}
            """,
            tlimit=tlimit,
            rlimiter=rlimiter,
        )

    sqlargs["station"] = station
    sqlargs["sts"] = sts
    sqlargs["ets"] = ets
    if kwargs.get("level") is not None:  # HACK!
        database = "raob"
        # here comes another hack, stations with starting with _ are virtual
        sqlargs["stations"] = [station, "ZZZZ"]
        if station.startswith("_"):
            nt = NetworkTable("RAOB")
            sqlargs["stations"] = (
                nt.sts.get(station, {})
                .get("name", "X--YYY ZZZ")
                .split("--")[1]
                .strip()
                .split(" ")
            )
        sql = sql_helper(
            """SELECT p.smps * 1.94384 as sknt, p.drct,
        f.valid at time zone 'UTC' as valid from
        raob_flights f JOIN raob_profile p on (f.fid = p.fid) WHERE
        f.station = ANY(:stations) and p.pressure = :level and
        p.smps is not null
        and p.drct is not null and valid >= :sts and valid < :ets
        {tlimit}""",
            tlimit=tlimit,
        )
        sqlargs["level"] = kwargs["level"]
    with get_sqlalchemy_conn(database) as conn:
        df = pd.read_sql(sql, conn, params=sqlargs, index_col=None)
    if not df.empty:
        # Make valid column timezone aware
        df["valid"] = df["valid"].dt.tz_localize(timezone.utc)
    # If sknt or drct are null, we want to set the other to null as well
    df.loc[pd.isnull(df["drct"]), "sknt"] = np.nan
    df.loc[pd.isnull(df["sknt"]), "drct"] = np.nan

    return df


def _make_textresult(station, df, **kwargs):
    """Generate a text table of windrose information

    Args:
      station (str): the station identifier
      df (pd.DataFrame): The dataframe with data
      units (str): the units of the `sknt` values
      nsector (int): number of sectors to divide rose into
      sname (str): the station name
      monthinfo (dict): information on month limiting
      hourinfo (dict): information on hour limiting
      level (int): in case of RAOB, which level do we care for
      bins (list): values to bin the wind speeds
      tzname (str): Time zone for the report.

    Returns:
      str of information"""
    if df.empty:
        return "No Data Found"
    wu = WINDUNITS[kwargs.get("units", "mph")]
    bins = kwargs.get("bins")
    if not hasattr(bins, "units"):
        bins = wu["bins"] * wu["units"]
        if kwargs.get("level") is not None:
            bins = RAOB_BINS[kwargs.get("units", "mph")] * wu["units"]
    # Effectively filters out the nulls
    df2 = df[df["drct"] >= 0]
    speed = df2["sknt"].values * mpunits("knots")
    direction = df2["drct"].values * mpunits("degree")
    calm_percent, dir_centers, table = histogram(
        speed, direction, bins, kwargs.get("nsector", 36)
    )
    sn = kwargs.get("sname", f"(({station}))")
    res = f"# Windrose Data Table (Percent Frequency) for {sn} ({station})\n"
    res += (
        f"# Observations Used/Missing/Total: {len(df2.index)}/"
        f"{len(df.index) - len(df2.index)}/{len(df.index)}\n"
    )
    res += f"# {_time_domain_string(df, kwargs.get('tzname'))}\n"
    res += f"# {_make_timelimit_string(kwargs).replace('↳', '')}\n"
    res += f"# Wind Speed Units: {wu['label']}\n"
    if kwargs.get("level") is not None:
        res += f"# RAOB Pressure (hPa) Level: {kwargs['level']}\n"
    res += (
        f"# Generated {utc():%d %b %Y %H:%M} UTC, "
        "contact: akrherz@iastate.edu\n"
    )
    res += "# First value in table is CALM\n"
    cols = ["Direction", "Calm"]
    # Print out Speed Bins
    for i, val in enumerate(bins.m):
        maxval = (
            "+"
            if i == bins.m.shape[0] - 1
            else f" {(bins.m[i + 1] - 0.1):4.1f}"
        )
        cols.append(f"{val:4.1f}{maxval}")

    delta = dir_centers.m[1] - dir_centers.m[0]
    res += ",".join([f"{c:9s}" for c in cols]) + "\n"
    for i, val in enumerate(dir_centers.m):
        minval = val - delta / 2.0
        if minval < 0:
            minval += 360.0
        maxval = np.min([360, val + delta / 2.0 - 1])
        ll = np.round(calm_percent.m, 2) if i == 0 else ""
        res += f"{minval:03.0f}-{maxval:03.0f}  ,{str(ll):9s},"
        res += ",".join(
            [f"{table.m[i, j]:9.3f}" for j in range(bins.m.shape[0])]
        )
        res += "\n"
    return res


def _time_domain_string(df, tzname):
    """Custom time label option."""
    sts = df["valid"].min().to_pydatetime()
    ets = df["valid"].max().to_pydatetime()
    timeformat = "%d %b %Y %I:%M %p"
    if tzname is not None:
        sts = sts.astimezone(ZoneInfo(tzname))
        ets = ets.astimezone(ZoneInfo(tzname))
    if tzname == "UTC":
        timeformat = "%d %b %Y %H:%M"
    return (
        f"{sts.strftime(timeformat)} - {ets.strftime(timeformat)} "
        f"{'' if tzname is None else tzname}"
    )


def _make_plot(station, df, **kwargs):
    """Generate a matplotlib windrose plot

    Args:
      station (str): station identifier
      df (pd.DataFrame): observations
      drct (list): list of wind directions
      units (str): units of wind speed
      nsector (int): number of bins to use for windrose
      rmax (float): radius of the plot
      hours (list): hour limit for plot
      month (list): month limit for plot
      sname (str): station name
      level (int): RAOB level in hPa of interest
      bins (list): values for binning the wind speeds
      tzname (str): Time zone this plot is produced in.
      cmap (colormap): Matplotlib colormap to use.
      plot_convention (str): Either `from` or `to`.
      nogenerated (bool): Disable plotting of Generated: string.
      generated_string (str): Hard code this field.

    Returns:
      matplotlib.Figure
    """
    wu = WINDUNITS[kwargs.get("units", "mph")]
    # Filters the missing values
    df2 = df[df["drct"] >= 0]
    direction = df2["drct"].values * mpunits("degree")
    if "speed" in df2.columns:
        speed = df2["speed"].values * wu["units"]
    else:
        speed = df2["sknt"].values * mpunits("knots")
    bins = kwargs.get("bins")
    if not hasattr(bins, "units"):
        bins = wu["bins"] * wu["units"]
        if kwargs.get("level") is not None:
            bins = RAOB_BINS[kwargs.get("units", "mph")] * wu["units"]
    if len(df2.index) < 5:
        wp = WindrosePlot()
        wp.ax.text(
            0.5,
            0.5,
            "Not Enough Data For Plot.",
            ha="center",
            transform=wp.ax.transAxes,
        )
        return wp.fig
    pc = kwargs.get("plot_convention", PLOT_CONVENTION_FROM)
    wp = plot(
        direction,
        speed,
        bins=bins,
        nsector=kwargs.get("nsector", 36),
        rmax=kwargs.get("rmax"),
        cmap=kwargs.get("cmap"),
        plot_convention=pc,
    )

    # Now we put some fancy debugging info on the plot
    sn = kwargs.get("sname", f"(({station}))")
    level = kwargs.get("level")
    sl = "" if level is None else f" @{level} hPa"
    label = f"Windrose Plot for [{station}] {sn}{sl}\n"
    label += f"Obs Between: {_time_domain_string(df, kwargs.get('tzname'))}\n"
    tlimit = _make_timelimit_string(kwargs)
    if tlimit != "":
        label += f"{tlimit}"
    fitbox(wp.fig, label, 0.1, 0.99, 0.92, 0.99, ha="left")
    label = (
        "Summary\n"
        f"Obs Used: {len(df.index)}\n"
        f"Obs Without Wind: {len(df.index) - len(df2.index)}\n"
        f"Avg Speed: {speed.m.mean():.1f} {kwargs.get('units', 'mph')}"
    )
    wp.fig.text(0.96, 0.11, label, ha="right", fontsize=14)
    if not kwargs.get("nogenerated", False):
        wp.fig.text(
            0.02,
            0.075,
            kwargs.get("generated_string", f"Generated: {utc():%d %b %Y}"),
            va="bottom",
            fontsize=14,
        )
    # Denote the direction blowing from
    con = "Meteorology" if pc == PLOT_CONVENTION_FROM else "Engineering"
    lbl = (
        f"Calm values are < {bins.m[0]:.1f} {kwargs.get('units', 'mph')}\n"
        f"Bar Convention: {con}\n"
        "Flow arrows relative to plot center."
    )
    wp.fig.text(0.02, 0.1, lbl, va="bottom")

    return wp.fig


[docs] def windrose(station, **kwargs): """Utility function that generates a windrose plot Args: station (str): station identifier to search database for database (str,optional): database name to look for data within months (list,optional): optional list of months to limit plot to hours (list,optional): optional list of hours to limit plot to sts (datetime,optional): start datetime ets (datetime,optional): end datetime units (str,optional): units to plot values as, default to `mph`. nsector (int,optional): number of bins to devide the windrose into justdata (boolean,optional): if True, write out the data only sname (str,optional): The name of this station, if not specified it will default to the ((`station`)) identifier sknt (list,optional): A list of wind speeds in knots already generated drct (list,optional): A list of wind directions (deg N) already generated valid (list,optional): A list of valid datetimes (with tzinfo set), used in the case of providing sknt and drct. level (int,optional): In case of RAOB, which level interests us (hPa) bins (list,optional): bins to use for the wind speed tzname (str,optional): Time zone to use for the plot. cmap (cmap,optional): Matplotlib colormap to pass to barplot. limit_by_doy (bool,optional): Use the `sts` and `ets` to define a period of days each year to limit the data by. Default `false`. plot_convention (str): How to orient the bars. The default is the meteorological convention of `from`, the other option is `to`. This only impacts the plot, the provided data should still follow `from`. Returns: matplotlib.Figure instance or textdata """ wu = WINDUNITS[kwargs.get("units", "mph")] if kwargs.get("sts") is None: kwargs["sts"] = datetime(1970, 1, 1) if kwargs.get("ets") is None: kwargs["ets"] = datetime(2050, 1, 1) sknt = kwargs.get("sknt") drct = kwargs.get("drct") if sknt is None or drct is None: df = _get_data(station, **kwargs) else: df = pd.DataFrame( {"sknt": sknt, "drct": drct, "valid": kwargs.get("valid")} ) # Make sure our bins have units bins = kwargs.get("bins") if not hasattr(bins, "units") and bins: kwargs["bins"] = bins * wu["units"] # Convert wind speed into the units we want here if df["sknt"].max() > 0: df["speed"] = (df["sknt"].values * mpunits("knots")).to(wu["units"]).m if kwargs.get("justdata", False): return _make_textresult(station, df, **kwargs) return _make_plot(station, df, **kwargs)