From b22e97632eadb9a9e657586ae3ca65cd35ceb7f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20M=C3=A4rz?= Date: Tue, 29 Aug 2023 12:11:44 +0200 Subject: [PATCH] Update dist_select plots --- .../distributions/distribution_utils.py | 9 ++++---- xgboostlss/distributions/flow_utils.py | 9 ++++---- .../mixture_distribution_utils.py | 2 +- .../multivariate_distribution_utils.py | 23 +++---------------- 4 files changed, 12 insertions(+), 31 deletions(-) diff --git a/xgboostlss/distributions/distribution_utils.py b/xgboostlss/distributions/distribution_utils.py index 739e1ab0..790aa0cc 100644 --- a/xgboostlss/distributions/distribution_utils.py +++ b/xgboostlss/distributions/distribution_utils.py @@ -616,12 +616,11 @@ def dist_select(self, } ) dist_list.append(fit_df) - fit_df = pd.concat(dist_list).sort_values(by=self.loss_fn, ascending=True) - fit_df["rank"] = fit_df[self.loss_fn].rank().astype(int) - fit_df.set_index(fit_df["rank"], inplace=True) pbar.update(1) pbar.set_description(f"Fitting of candidate distributions completed") - + fit_df = pd.concat(dist_list).sort_values(by=self.loss_fn, ascending=True) + fit_df["rank"] = fit_df[self.loss_fn].rank().astype(int) + fit_df.set_index(fit_df["rank"], inplace=True) if plot: # Select best distribution best_dist = fit_df[fit_df["rank"] == 1].reset_index(drop=True) @@ -652,7 +651,7 @@ def dist_select(self, sns.kdeplot(target.reshape(-1, ), label="Actual") sns.kdeplot(dist_samples.reshape(-1, ), label=f"Best-Fit: {best_dist['distribution'].values[0]}") plt.legend() - plt.title("Actual vs. Best-Fit Density") + plt.title("Actual vs. Best-Fit Density", fontweight="bold", fontsize=16) plt.show() fit_df.drop(columns=["rank", "params"], inplace=True) diff --git a/xgboostlss/distributions/flow_utils.py b/xgboostlss/distributions/flow_utils.py index c7d4b8c6..cf8fafad 100644 --- a/xgboostlss/distributions/flow_utils.py +++ b/xgboostlss/distributions/flow_utils.py @@ -676,12 +676,11 @@ def flow_select(self, } ) flow_list.append(fit_df) - fit_df = pd.concat(flow_list).sort_values(by=flow_sel.loss_fn, ascending=True) - fit_df["rank"] = fit_df[flow_sel.loss_fn].rank().astype(int) - fit_df.set_index(fit_df["rank"], inplace=True) pbar.update(1) pbar.set_description(f"Fitting of candidate normalizing flows completed") - + fit_df = pd.concat(flow_list).sort_values(by=flow_sel.loss_fn, ascending=True) + fit_df["rank"] = fit_df[flow_sel.loss_fn].rank().astype(int) + fit_df.set_index(fit_df["rank"], inplace=True) if plot: # Select normalizing flow with the lowest loss best_flow = fit_df[fit_df["rank"] == 1].reset_index(drop=True) @@ -706,7 +705,7 @@ def flow_select(self, sns.kdeplot(target.reshape(-1, ), label="Actual") sns.kdeplot(flow_samples.reshape(-1, ), label=f"Best-Fit: {best_flow['NormFlow'].values[0]}") plt.legend() - plt.title("Actual vs. Best-Fit Density") + plt.title("Actual vs. Best-Fit Density", fontweight="bold", fontsize=16) plt.show() fit_df.drop(columns=["rank", "params"], inplace=True) diff --git a/xgboostlss/distributions/mixture_distribution_utils.py b/xgboostlss/distributions/mixture_distribution_utils.py index 7fd5b764..2c176959 100644 --- a/xgboostlss/distributions/mixture_distribution_utils.py +++ b/xgboostlss/distributions/mixture_distribution_utils.py @@ -672,7 +672,7 @@ def dist_select(self, sns.kdeplot(target.reshape(-1,), label="Actual") sns.kdeplot(dist_samples.reshape(-1,), label=f"Best-Fit: {best_dist['distribution'].values[0]}") plt.legend() - plt.title("Actual vs. Best-Fit Density") + plt.title("Actual vs. Best-Fit Density", fontweight="bold", fontsize=16) plt.show() fit_df.drop(columns=["rank", "params", "dist_pos", "M"], inplace=True) diff --git a/xgboostlss/distributions/multivariate_distribution_utils.py b/xgboostlss/distributions/multivariate_distribution_utils.py index 2173766a..358f5fc4 100644 --- a/xgboostlss/distributions/multivariate_distribution_utils.py +++ b/xgboostlss/distributions/multivariate_distribution_utils.py @@ -568,12 +568,11 @@ def dist_select(self, } ) dist_list.append(fit_df) - fit_df = pd.concat(dist_list).sort_values(by=dist_sel.loss_fn, ascending=True) - fit_df["rank"] = fit_df[dist_sel.loss_fn].rank().astype(int) - fit_df.set_index(fit_df["rank"], inplace=True) pbar.update(1) pbar.set_description(f"Fitting of candidate distributions completed") - + fit_df = pd.concat(dist_list).sort_values(by=dist_sel.loss_fn, ascending=True) + fit_df["rank"] = fit_df[dist_sel.loss_fn].rank().astype(int) + fit_df.set_index(fit_df["rank"], inplace=True) if plot: warnings.simplefilter(action='ignore', category=UserWarning) # Select distribution @@ -630,22 +629,6 @@ def dist_select(self, g.fig.suptitle("Actual vs. Best-Fit Density", weight="bold", fontsize=16) g.fig.tight_layout(rect=[0, 0, 1, 0.9]) - # print( - # ggplot(plot_df, - # aes(x="value", - # color="type")) + - # geom_density(alpha=0.5) + - # facet_wrap("target", - # scales="free", - # ncol=ncol) + - # theme_bw(base_size=15) + - # theme(figure_size=figure_size, - # legend_position="right", - # legend_title=element_blank(), - # plot_title=element_text(hjust=0.5)) + - # labs(title=f"Actual vs. Fitted Density") - # ) - fit_df.drop(columns=["rank", "params"], inplace=True) return fit_df