Updated timeseries to handle all 2D variables.

NREL · May 15, 2024 · 753bcb0 · 753bcb0
1 parent f313855
commit 753bcb0
Show file tree

Hide file tree

Showing 6 changed files with 96 additions and 96 deletions.
diff --git a/reView/components/divs/time_series.py b/reView/components/divs/time_series.py
@@ -143,14 +143,9 @@ def above_time_options_div(id_prefix, class_name=None):
                     html.Div(                  
                         className="six columns",
                         children=dcc.Dropdown(
-                            id=f"{id_prefix}_variable_time",
+                            id=f"{id_prefix}_time_var_options",
                             style={"width": "100%"},
                             clearable=False,
-                            value="capacity factor",
-                            options=[
-                                {"label": "Capacity Factor", "value": "capacity factor"},
-                                {"label": "Generation (MW)", "value": "generation"},
-                            ],
                             multi=False
                         )
                     )

diff --git a/reView/index.py b/reView/index.py
@@ -20,7 +20,12 @@ def main():
     """Run reView."""
     init_logger(level=LOG_LEVEL)
     log_versions()
-    app.run_server(host=APP_HOST, port=APP_PORT, debug=False)
+    app.run_server(
+        host=APP_HOST,
+        port=APP_PORT,
+        debug=False,
+        dev_tools_hot_reload=True
+    )
 
 
 if __name__ == "__main__":

diff --git a/reView/pages/rev/controller/callbacks.py b/reView/pages/rev/controller/callbacks.py
@@ -17,6 +17,7 @@
 
 from pathlib import Path
 
+import h5py
 import numpy as np
 import pandas as pd
 import plotly.graph_objects as go
@@ -823,6 +824,34 @@ def dropdown_x_variables(
     return variable_options, val
 
 
+@app.callback(
+    Output("rev_time_var_options", "options"),
+    Output("rev_time_var_options", "value"),
+    Input("submit", "n_clicks"),
+    State("scenario_dropdown_a", "value"),
+    State("rev_time_var_options", "value")
+)
+@calls.log
+def dropdown_time_variables(_, scenario_a, old_variable):
+    """Return dropdown options for the timeseries variable."""
+    logger.debug("Setting timeseries variable options")
+
+    # Get the 2D (timeseries) datasets and create an option list
+    with h5py.File(scenario_a) as ds:
+        dsets = list(ds)
+        shapes = [len(ds[dset].shape) for dset in dsets]
+    variables = [dsets[i] for i, s in enumerate(shapes) if s == 2]
+    variable_options = [{"label": var, "value": var} for var in variables]
+
+    # If this has already been built, use the existing value
+    if old_variable in variables:
+        variable = old_variable
+    else:
+        variable = variables[0]
+
+    return variable_options, variable
+
+
 # @app.callback(
 #     Output("rev_additional_scenarios", "options"),
 #     Output("rev_additional_scenarios_time", "options"),
@@ -1126,7 +1155,7 @@ def figure_map(
     Input("map_signal", "children"),
     Input("rev_time_trace_options_tab", "value"),
     Input("rev_time_period_options_tab", "value"),
-    Input("rev_variable_time", "value"),
+    Input("rev_time_var_options", "value"),
     Input("rev_additional_scenarios", "value"),
     Input("rev_chart", "selectedData"),
     Input("rev_map", "selectedData"),
@@ -1170,7 +1199,8 @@ def figure_timeseries(
                 file,
                 map_selection,
                 chart_selection,
-                map_click
+                map_click,
+                variable
             )
         except (KeyError, ValueError) as exc:
             raise PreventUpdate from exc

diff --git a/reView/pages/rev/controller/element_builders.py b/reView/pages/rev/controller/element_builders.py
@@ -299,7 +299,7 @@ def cumulative_sum(self, x_var, y_var):
 
         return self._update_fig_layout(fig, y_var)
 
-    def figure(self, chart_type="cumsum", x_var=None, y_var=None, bins=None, 
+    def figure(self, chart_type="cumsum", x_var=None, y_var=None, bins=None,
                trace_type="bar", time_period="original"):
         """Return plotly figure for requested chart type."""
         if chart_type == "cumsum":
@@ -318,7 +318,7 @@ def figure(self, chart_type="cumsum", x_var=None, y_var=None, bins=None,
         elif chart_type == "timeseries":
             fig = self.timeseries(y_var, trace_type, time_period)
         elif chart_type == "summary_table":
-            fig= self.summary_table()
+            fig = self.summary_table()
 
         return fig
 
@@ -399,17 +399,14 @@ def summary_table(self):
         table = None
         for key, df in self.datasets.items():
             break
-        return None
+        return table
 
     def timeseries(self, y_var="capacity factor", trace_type="bar",
                    time_period="original"):
         """Render time series."""
         # Check for valid options
-        try:
-            assert trace_type in ["bar", "line"]
-        except:
-            raise AssertionError(f"{trace_type} traces not available for this "
-                                 "graph.")
+        msg = f"{trace_type} traces not available for this graph."
+        assert trace_type in ["bar", "line"], msg
 
         # Create the plottable dataframe
         main_df = None
@@ -460,7 +457,7 @@ def timeseries(self, y_var="capacity factor", trace_type="bar",
         # Update the layout and axes
         ymin = main_df[y].min()
         ymax = main_df[y].max()
-        # fig.update_layout(yaxis_range=[ymin, ymax * 1.1])
+        fig.update_layout(yaxis_range=[ymin, ymax * 1.1])
         fig.update_xaxes(showspikes=True)
         fig.update_yaxes(showspikes=True)
 
@@ -471,53 +468,50 @@ def timeseries(self, y_var="capacity factor", trace_type="bar",
                     main_df["time"].iloc[500]
                 ]
             )
-
-        return self._update_fig_layout(fig, y_var)
+        fig = self._update_fig_layout(fig, y_var)
+        return fig
 
     def _aggregate_timeseries(self, data, y_var="capacity factor",
-                              time_period="daily"):
+                              time_period="daily", fun="mean"):
         """Aggregate timeseries to a given time period."""
         # Check inputs
-        try:
-            assert time_period in ["daily", "hour", "weekly", "monthly", "cdf",
-                                   "pdf"]
-        except:
-            raise AssertionError("Cannot aggregate timeseries to "
-                                 f"{time_period} steps.")
+        msg = f"Cannot aggregate timeseries to {time_period} steps."
+        periods = ["daily", "hour", "weekly", "monthly", "cdf", "pdf"]
+        assert time_period in periods, msg
 
         # Aggregate temporally, or via a distribution
         if time_period not in ["cdf", "pdf"]:
             # Aggregate data
             grouped = data.groupby(time_period)
-            if y_var == "capacity factor":
+            if fun == "mean":
                 out = grouped[y_var].mean()
             else:
                 out = grouped[y_var].sum()
-    
+
             # Reset time stamp
-            t1 = data["time"].iloc[0]
-            t2 = data["time"].iloc[-1]
+            time1 = data["time"].iloc[0]
+            time2 = data["time"].iloc[-1]
 
             if time_period == "daily":
-                time = pd.date_range(t1, t2, freq="1D")
+                time = pd.date_range(time1, time2, freq="1D")
             elif time_period == "hour":
                 hours = range(0, 24)
                 time = [dt.datetime(1, 1, 1, h) for h in hours]
                 time = [t.strftime("%H:%M") for t in time]
             elif time_period == "weekly":
-                time = pd.date_range(t1, t2, freq="1W")
+                time = pd.date_range(time1, time2, freq="1W")
             elif time_period == "monthly":
-                time = pd.date_range(t1, t2, freq="MS")
+                time = pd.date_range(time1, time2, freq="MS")
                 time = [t + pd.offsets.MonthEnd() for t in time]
             time = [str(t) for t in time]
-    
+
             # Rebuild data
             data = pd.DataFrame({y_var: out, "time": time})
 
         else:
             data = self._distributions(data, y_var)
             if time_period == "cdf":
-                data = data[[y_var, "cdf"]]     
+                data = data[[y_var, "cdf"]]
             elif time_period == "pdf":
                 data = data[[y_var, "pdf"]]
             data.columns = [y_var, "Probability"]
@@ -618,6 +612,6 @@ def _update_fig_layout(self, fig, y_var=None):
         layout["title"]["text"] = self.plot_title
         layout["legend_title_text"] = self.GROUP
         fig.update_layout(**layout)
-        if y_var:
-            fig.update_layout(yaxis={"range": self._plot_range(y_var)})
+        # if y_var:
+        #     fig.update_layout(yaxis={"range": self._plot_range(y_var)})
         return fig
diff --git a/reView/pages/rev/model.py b/reView/pages/rev/model.py
@@ -398,13 +398,14 @@ def cache_map_data(signal_dict):
 
 
 @cache4.memoize()
-def cache_timeseries(file, map_selection, chart_selection, map_click=None):
+def cache_timeseries(file, map_selection, chart_selection, map_click=None,
+                     variable="rep_profiles_0"):
     """Read and store a timeseries data frame with site selections."""
     # Convert map and chart selections into site indices
     gids = point_filter(map_selection, chart_selection, map_click)
 
     # Read in data frame
-    data = read_timeseries(file, gids)
+    data = read_timeseries(file, gids, nsteps=None, variable=variable)
 
     return data
 

diff --git a/reView/utils/functions.py b/reView/utils/functions.py
@@ -440,7 +440,7 @@ def read_file(file, nrows=None):
     return data
 
 
-def read_timeseries(file, gids=None, nsteps=None):
+def read_timeseries(file, gids=None, nsteps=None, variable="rep_profiles_0"):
     # pylint: disable=no-member
     """Read in a time-series from an HDF5 file.
 
@@ -452,6 +452,8 @@ def read_timeseries(file, gids=None, nsteps=None):
         List of sc_point_gids to use to filter sites.
     nsteps : int
         Number of time-steps to read in.
+    variable : str
+        Name of the HDF5 data set to return.
 
     Returns
     -------
@@ -479,70 +481,43 @@ def read_timeseries(file, gids=None, nsteps=None):
         meta = meta[meta["sc_point_gid"].isin(gids)]
     idx = list(meta.index)
 
-    # Get capacity, time index, format
-    capacity = meta["capacity"].values
-
-    # If it has any "rep_profiles_" datasets it rep-profiles
-    if "bespoke" not in str(file):
-        # Break down time entries
-        time = [t.decode() for t in ds["time_index"][:nsteps]]
-        dtime = [dt.datetime.strptime(t, TIME_PATTERN) for t in time]
-        minutes = [t.minute for t in dtime]
-        hours = [t.hour for t in dtime]
-        days = [t.timetuple().tm_yday for t in dtime]
-        weeks = [t.isocalendar().week for t in dtime]
-        months = [t.month for t in dtime]
-
-        # Process generation data
-        cf = ds["rep_profiles_0"][:nsteps, idx]
-        gen = cf * capacity
-        cf = cf.mean(axis=1)
-        gen = gen.sum(axis=1)
-
-    # Otherwise, it's bespoke and has each year
-    else:
-        # Get all capacity factor keys
-        cf_keys = [key for key in ds.keys() if "cf_profile-" in key]
-        time_keys = [key for key in ds.keys() if "time_index-" in key]
-        scale = ds[cf_keys[0]].attrs["scale_factor"]
-
-        # Build complete time-series at each site
-        all_cfs = []
-        all_time = []
-        for i, cf_key in enumerate(cf_keys):
-            time_key = time_keys[i]
-            cf = ds[cf_key][:nsteps, idx]
-            time = ds[time_key][:nsteps]
-            all_cfs.append(cf)
-            all_time.append(time)
-        site_cfs = np.concatenate(all_cfs)
-        time = np.concatenate(all_time)
-        site_gen = site_cfs * capacity
-
-        # Build single long-term average timeseries for all sites
-        cf = np.mean(site_cfs, axis=1) / scale
-        gen = site_gen.sum(axis=1)
-
-        # This will only take the average across the year
-        time = [t.decode() for t in time]
-        dtime = [dt.datetime.strptime(t, TIME_PATTERN) for t in time]
-        days = [t.timetuple().tm_yday for t in dtime]
-        weeks = [t.isocalendar().week for t in dtime]
-        months = [t.month for t in dtime]
-        hours = [t.hour for t in dtime]
-        minutes = [t.minute for t in dtime]
-
+    # If no time index found, raise error
+    variables = list(ds)
+    if not any("time_index" in var for var in variables):
+        raise NotImplementedError("Cannot handle the time series formatting "
+                                  f"in {file}.")
+
+    # If dset is associated with a year time index, use that time index
+    time_index = "time_index"
+    if "-" in variable and "time_index" not in variables:
+        year = int(variable.split("-")[-1])
+        time_index = f"time_index-{year}"
+
+    # Break down time entries
+    time = [t.decode() for t in ds[time_index][:nsteps]]
+    dtime = [dt.datetime.strptime(t, TIME_PATTERN) for t in time]
+    minutes = [t.minute for t in dtime]
+    hours = [t.hour for t in dtime]
+    days = [t.timetuple().tm_yday for t in dtime]
+    weeks = [t.isocalendar().week for t in dtime]
+    months = [t.month for t in dtime]
+
+    # Process target data set
+    data = ds[variable][:nsteps, idx]
+    data = data.mean(axis=1)
+
+    # Close dataset, how do we handle read errors with context management?
     ds.close()
 
+    # Compile data frame
     data = pd.DataFrame({
         "time": time,
         "minute": minutes,
         "hour": hours,
         "daily": days,
         "weekly": weeks,
         "monthly": months,
-        "capacity factor": cf,
-        "generation": gen
+        variable: data
     })
 
     return data