From 4100f27c78a648d82be88b48123c85db294bcc6f Mon Sep 17 00:00:00 2001 From: ErinWeisbart Date: Mon, 24 Jan 2022 12:49:51 -0800 Subject: [PATCH 01/10] add output_single_cell_by_guide to normalize --- 1.generate-profiles/2.normalize.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/1.generate-profiles/2.normalize.py b/1.generate-profiles/2.normalize.py index ad9f664..caf93f3 100644 --- a/1.generate-profiles/2.normalize.py +++ b/1.generate-profiles/2.normalize.py @@ -75,6 +75,7 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): normalize_singlecell_from_single_file = sc_config["output_one_single_cell_file_only"] normalize_args = config["options"]["profile"]["normalize"] +output_single_cell_by_guide = normalize_args["output_single_cell_by_guide"] normalize_levels = normalize_args["levels"] normalize_by_samples = normalize_args["by_samples"] normalize_these_features = normalize_args["features"] @@ -128,5 +129,22 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): compression_options=compression, float_format=float_format, ) + + if data_level == "single_cell": + if output_single_cell_by_guide: + sc_by_guide_folder = os.path.join(single_cell_input_dir, "single_cell_by_guide") + if not os.path.isdir(sc_by_guide_folder): + os.mkdir(sc_by_guide_folder) + df = read_csvs_with_chunksize(output_file) + for guide in df['Metadata_Foci_Barcode_MatchedTo_Barcode']: + guide_file_name = f"{output_file.split('__')[0].split('/')[-1]}__{guide}.csv.gz" + guide_path = os.path.join(sc_by_guide_folder, guide_file_name) + if not os.path.exists(guide_path): + guide_df = pd.DataFrame() + else: + guide_df = read_csvs_with_chunksize(guide_path) + guide_df = guide_df.append(df.loc[df['Metadata_Foci_Barcode_MatchedTo_Barcode'] == guide]) + guide_df.to_csv(guide_path, index=False) + print("Finished 2.normalize.") logging.info("Finished 2.normalize.") From 833a3e9a9d34d7148f5dfaeb50c189ec50c22b48 Mon Sep 17 00:00:00 2001 From: ErinWeisbart Date: Mon, 24 Jan 2022 14:42:07 -0800 Subject: [PATCH 02/10] add image align to single_cell_by_guide --- 1.generate-profiles/2.normalize.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/1.generate-profiles/2.normalize.py b/1.generate-profiles/2.normalize.py index caf93f3..97ebbe7 100644 --- a/1.generate-profiles/2.normalize.py +++ b/1.generate-profiles/2.normalize.py @@ -70,6 +70,7 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): normalize_input_files = config["files"]["aggregate_files"] normalize_output_files = config["files"]["normalize_files"] single_cell_file = config["files"]["single_file_only_output_file"] +image_file = config["files"]["image_file"] sc_config = config["options"]["profile"]["single_cell"] normalize_singlecell_from_single_file = sc_config["output_one_single_cell_file_only"] @@ -132,6 +133,14 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): if data_level == "single_cell": if output_single_cell_by_guide: + image_df = pd.read_csv(image_file, sep='\t') + keep_columns = [] + for col in image_df.columns: + if 'Align_' in col: + keep_columns.append(col) + keep_columns.append('Metadata_site') + image_df = image_df.loc[:, keep_columns] + sc_by_guide_folder = os.path.join(single_cell_input_dir, "single_cell_by_guide") if not os.path.isdir(sc_by_guide_folder): os.mkdir(sc_by_guide_folder) @@ -143,7 +152,9 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): guide_df = pd.DataFrame() else: guide_df = read_csvs_with_chunksize(guide_path) - guide_df = guide_df.append(df.loc[df['Metadata_Foci_Barcode_MatchedTo_Barcode'] == guide]) + append_df = df.loc[df['Metadata_Foci_Barcode_MatchedTo_Barcode'] == guide] + append_df = append_df.merge(image_df, left_on='Metadata_Foci_site', right_on='Metadata_site', validate='1:1') + guide_df = guide_df.append(append_df) guide_df.to_csv(guide_path, index=False) print("Finished 2.normalize.") From fae7216b5500eb2ee842afd94f2bb9d418c3056d Mon Sep 17 00:00:00 2001 From: ErinWeisbart Date: Thu, 27 Jan 2022 13:04:28 -0800 Subject: [PATCH 03/10] debug output_single_cell_by_guide --- 1.generate-profiles/2.normalize.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/1.generate-profiles/2.normalize.py b/1.generate-profiles/2.normalize.py index 97ebbe7..2e0bcb1 100644 --- a/1.generate-profiles/2.normalize.py +++ b/1.generate-profiles/2.normalize.py @@ -133,6 +133,8 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): if data_level == "single_cell": if output_single_cell_by_guide: + print (f"Now outputting normalized single cell profiles by guide for split {data_split_site}") + logging.info(f"Now outputting normalized single cell profiles by guide for split {data_split_site}") image_df = pd.read_csv(image_file, sep='\t') keep_columns = [] for col in image_df.columns: @@ -146,14 +148,14 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): os.mkdir(sc_by_guide_folder) df = read_csvs_with_chunksize(output_file) for guide in df['Metadata_Foci_Barcode_MatchedTo_Barcode']: - guide_file_name = f"{output_file.split('__')[0].split('/')[-1]}__{guide}.csv.gz" + guide_file_name = f"{str(output_file).split('__')[0].split('/')[-1]}__{guide}.csv.gz" guide_path = os.path.join(sc_by_guide_folder, guide_file_name) if not os.path.exists(guide_path): guide_df = pd.DataFrame() else: guide_df = read_csvs_with_chunksize(guide_path) append_df = df.loc[df['Metadata_Foci_Barcode_MatchedTo_Barcode'] == guide] - append_df = append_df.merge(image_df, left_on='Metadata_Foci_site', right_on='Metadata_site', validate='1:1') + append_df = append_df.merge(image_df, left_on='Metadata_Foci_site', right_on='Metadata_site') guide_df = guide_df.append(append_df) guide_df.to_csv(guide_path, index=False) From c3d2a78440c56be282398ece8cd05f8662eb5551 Mon Sep 17 00:00:00 2001 From: ErinWeisbart Date: Fri, 28 Jan 2022 10:20:34 -0800 Subject: [PATCH 04/10] iterate set of guides --- 1.generate-profiles/2.normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/1.generate-profiles/2.normalize.py b/1.generate-profiles/2.normalize.py index 2e0bcb1..3d85c06 100644 --- a/1.generate-profiles/2.normalize.py +++ b/1.generate-profiles/2.normalize.py @@ -147,7 +147,7 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): if not os.path.isdir(sc_by_guide_folder): os.mkdir(sc_by_guide_folder) df = read_csvs_with_chunksize(output_file) - for guide in df['Metadata_Foci_Barcode_MatchedTo_Barcode']: + for guide in set(df['Metadata_Foci_Barcode_MatchedTo_Barcode']): guide_file_name = f"{str(output_file).split('__')[0].split('/')[-1]}__{guide}.csv.gz" guide_path = os.path.join(sc_by_guide_folder, guide_file_name) if not os.path.exists(guide_path): From 5ce8d1768e1e758b0b763a8c3e76d5dd4b558c2d Mon Sep 17 00:00:00 2001 From: ErinWeisbart Date: Fri, 28 Jan 2022 10:30:07 -0800 Subject: [PATCH 05/10] add gene to guide_file_name --- 1.generate-profiles/2.normalize.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/1.generate-profiles/2.normalize.py b/1.generate-profiles/2.normalize.py index 3d85c06..3d6f398 100644 --- a/1.generate-profiles/2.normalize.py +++ b/1.generate-profiles/2.normalize.py @@ -148,7 +148,8 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): os.mkdir(sc_by_guide_folder) df = read_csvs_with_chunksize(output_file) for guide in set(df['Metadata_Foci_Barcode_MatchedTo_Barcode']): - guide_file_name = f"{str(output_file).split('__')[0].split('/')[-1]}__{guide}.csv.gz" + gene = df[df['Metadata_Foci_Barcode_MatchedTo_Barcode']==guide]['Metadata_Foci_Barcode_MatchedTo_GeneCode'].tolist()[0] + guide_file_name = f"{str(output_file).split('__')[0].split('/')[-1]}__{guide}_{gene}.csv.gz" guide_path = os.path.join(sc_by_guide_folder, guide_file_name) if not os.path.exists(guide_path): guide_df = pd.DataFrame() From 7c4b58b23222e4bfdb58a4b77fb94d6ccd4c48a0 Mon Sep 17 00:00:00 2001 From: ErinWeisbart Date: Fri, 28 Jan 2022 10:30:21 -0800 Subject: [PATCH 06/10] black --- 1.generate-profiles/2.normalize.py | 32 ++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/1.generate-profiles/2.normalize.py b/1.generate-profiles/2.normalize.py index 3d6f398..90b6dbe 100644 --- a/1.generate-profiles/2.normalize.py +++ b/1.generate-profiles/2.normalize.py @@ -133,30 +133,42 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): if data_level == "single_cell": if output_single_cell_by_guide: - print (f"Now outputting normalized single cell profiles by guide for split {data_split_site}") - logging.info(f"Now outputting normalized single cell profiles by guide for split {data_split_site}") - image_df = pd.read_csv(image_file, sep='\t') + print( + f"Now outputting normalized single cell profiles by guide for split {data_split_site}" + ) + logging.info( + f"Now outputting normalized single cell profiles by guide for split {data_split_site}" + ) + image_df = pd.read_csv(image_file, sep="\t") keep_columns = [] for col in image_df.columns: - if 'Align_' in col: + if "Align_" in col: keep_columns.append(col) - keep_columns.append('Metadata_site') + keep_columns.append("Metadata_site") image_df = image_df.loc[:, keep_columns] - sc_by_guide_folder = os.path.join(single_cell_input_dir, "single_cell_by_guide") + sc_by_guide_folder = os.path.join( + single_cell_input_dir, "single_cell_by_guide" + ) if not os.path.isdir(sc_by_guide_folder): os.mkdir(sc_by_guide_folder) df = read_csvs_with_chunksize(output_file) - for guide in set(df['Metadata_Foci_Barcode_MatchedTo_Barcode']): - gene = df[df['Metadata_Foci_Barcode_MatchedTo_Barcode']==guide]['Metadata_Foci_Barcode_MatchedTo_GeneCode'].tolist()[0] + for guide in set(df["Metadata_Foci_Barcode_MatchedTo_Barcode"]): + gene = df[df["Metadata_Foci_Barcode_MatchedTo_Barcode"] == guide][ + "Metadata_Foci_Barcode_MatchedTo_GeneCode" + ].tolist()[0] guide_file_name = f"{str(output_file).split('__')[0].split('/')[-1]}__{guide}_{gene}.csv.gz" guide_path = os.path.join(sc_by_guide_folder, guide_file_name) if not os.path.exists(guide_path): guide_df = pd.DataFrame() else: guide_df = read_csvs_with_chunksize(guide_path) - append_df = df.loc[df['Metadata_Foci_Barcode_MatchedTo_Barcode'] == guide] - append_df = append_df.merge(image_df, left_on='Metadata_Foci_site', right_on='Metadata_site') + append_df = df.loc[ + df["Metadata_Foci_Barcode_MatchedTo_Barcode"] == guide + ] + append_df = append_df.merge( + image_df, left_on="Metadata_Foci_site", right_on="Metadata_site" + ) guide_df = guide_df.append(append_df) guide_df.to_csv(guide_path, index=False) From f8a1fb08b168be9ee2337b8461072b9f074ff82f Mon Sep 17 00:00:00 2001 From: ErinWeisbart Date: Tue, 8 Feb 2022 10:04:15 -0800 Subject: [PATCH 07/10] parallelize single_cell_by_guide creation --- 1.generate-profiles/2.normalize.py | 40 ++++++++++++++++-------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/1.generate-profiles/2.normalize.py b/1.generate-profiles/2.normalize.py index 90b6dbe..d1ec3b3 100644 --- a/1.generate-profiles/2.normalize.py +++ b/1.generate-profiles/2.normalize.py @@ -6,6 +6,7 @@ import logging import traceback import pandas as pd +from joblib import Parallel, delayed from pycytominer import normalize @@ -91,6 +92,25 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): config["experiment"], sites, split_info, separator="___" ) +def append_to_guide_csv(guide, image_df): + append_df = df.loc[ + df["Metadata_Foci_Barcode_MatchedTo_Barcode"] == guide + ] + gene = list(append_df["Metadata_Foci_Barcode_MatchedTo_Barcode"])[0] + guide_file_name = f"{str(output_file).split('__')[0].split('/')[-1]}__{guide}_{gene}.csv.gz" + guide_path = os.path.join(sc_by_guide_folder, guide_file_name) + if not os.path.exists(guide_path): + guide_df = pd.DataFrame() + else: + guide_df = read_csvs_with_chunksize(guide_path) + + append_df = append_df.merge( + image_df, left_on="Metadata_Foci_site", right_on="Metadata_site" + ) + guide_df = guide_df.append(append_df) + guide_df.to_csv(guide_path, index=False) + + for data_split_site in site_info_dict: for data_level in normalize_levels: if data_level == "single_cell": @@ -139,6 +159,7 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): logging.info( f"Now outputting normalized single cell profiles by guide for split {data_split_site}" ) + # Load image alignment information for appending to single_cell_by_guide csvs image_df = pd.read_csv(image_file, sep="\t") keep_columns = [] for col in image_df.columns: @@ -153,24 +174,7 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): if not os.path.isdir(sc_by_guide_folder): os.mkdir(sc_by_guide_folder) df = read_csvs_with_chunksize(output_file) - for guide in set(df["Metadata_Foci_Barcode_MatchedTo_Barcode"]): - gene = df[df["Metadata_Foci_Barcode_MatchedTo_Barcode"] == guide][ - "Metadata_Foci_Barcode_MatchedTo_GeneCode" - ].tolist()[0] - guide_file_name = f"{str(output_file).split('__')[0].split('/')[-1]}__{guide}_{gene}.csv.gz" - guide_path = os.path.join(sc_by_guide_folder, guide_file_name) - if not os.path.exists(guide_path): - guide_df = pd.DataFrame() - else: - guide_df = read_csvs_with_chunksize(guide_path) - append_df = df.loc[ - df["Metadata_Foci_Barcode_MatchedTo_Barcode"] == guide - ] - append_df = append_df.merge( - image_df, left_on="Metadata_Foci_site", right_on="Metadata_site" - ) - guide_df = guide_df.append(append_df) - guide_df.to_csv(guide_path, index=False) + Parallel(n_jobs=-2)(delayed(append_to_guide_csv)(guide, image_df) for guide in set(df["Metadata_Foci_Barcode_MatchedTo_Barcode"])) print("Finished 2.normalize.") logging.info("Finished 2.normalize.") From da524ffe41743573baed7d16a38b711bf93dd995 Mon Sep 17 00:00:00 2001 From: ErinWeisbart Date: Tue, 22 Feb 2022 13:13:30 -0800 Subject: [PATCH 08/10] undo parallelization --- 1.generate-profiles/2.normalize.py | 50 ++++++++++++++---------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/1.generate-profiles/2.normalize.py b/1.generate-profiles/2.normalize.py index d1ec3b3..5199826 100644 --- a/1.generate-profiles/2.normalize.py +++ b/1.generate-profiles/2.normalize.py @@ -6,7 +6,6 @@ import logging import traceback import pandas as pd -from joblib import Parallel, delayed from pycytominer import normalize @@ -92,25 +91,6 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): config["experiment"], sites, split_info, separator="___" ) -def append_to_guide_csv(guide, image_df): - append_df = df.loc[ - df["Metadata_Foci_Barcode_MatchedTo_Barcode"] == guide - ] - gene = list(append_df["Metadata_Foci_Barcode_MatchedTo_Barcode"])[0] - guide_file_name = f"{str(output_file).split('__')[0].split('/')[-1]}__{guide}_{gene}.csv.gz" - guide_path = os.path.join(sc_by_guide_folder, guide_file_name) - if not os.path.exists(guide_path): - guide_df = pd.DataFrame() - else: - guide_df = read_csvs_with_chunksize(guide_path) - - append_df = append_df.merge( - image_df, left_on="Metadata_Foci_site", right_on="Metadata_site" - ) - guide_df = guide_df.append(append_df) - guide_df.to_csv(guide_path, index=False) - - for data_split_site in site_info_dict: for data_level in normalize_levels: if data_level == "single_cell": @@ -127,18 +107,17 @@ def append_to_guide_csv(guide, image_df): file_to_normalize.name.replace(".csv.gz", f"_{data_split_site}.csv.gz"), ) + output_file = normalize_output_files[data_level] + output_file = pathlib.Path( + normalize_output_files[data_level].parents[0], + output_file.name.replace(".csv.gz", f"_{data_split_site}.csv.gz"), + ) print( f"Now normalizing {data_level}...with operation: {normalize_method} for split {data_split_site}" ) logging.info( f"Normalizing {data_level}...with operation: {normalize_method} for split {data_split_site}" ) - - output_file = normalize_output_files[data_level] - output_file = pathlib.Path( - normalize_output_files[data_level].parents[0], - output_file.name.replace(".csv.gz", f"_{data_split_site}.csv.gz"), - ) df = read_csvs_with_chunksize(file_to_normalize) normalize( @@ -174,7 +153,24 @@ def append_to_guide_csv(guide, image_df): if not os.path.isdir(sc_by_guide_folder): os.mkdir(sc_by_guide_folder) df = read_csvs_with_chunksize(output_file) - Parallel(n_jobs=-2)(delayed(append_to_guide_csv)(guide, image_df) for guide in set(df["Metadata_Foci_Barcode_MatchedTo_Barcode"])) + for guide in set(df["Metadata_Foci_Barcode_MatchedTo_Barcode"]): + gene = df[df["Metadata_Foci_Barcode_MatchedTo_Barcode"] == guide][ + "Metadata_Foci_Barcode_MatchedTo_GeneCode" + ].tolist()[0] + guide_file_name = f"{str(output_file).split('__')[0].split('/')[-1]}__{guide}_{gene}.csv.gz" + guide_path = os.path.join(sc_by_guide_folder, guide_file_name) + if not os.path.exists(guide_path): + guide_df = pd.DataFrame() + else: + guide_df = read_csvs_with_chunksize(guide_path) + append_df = df.loc[ + df["Metadata_Foci_Barcode_MatchedTo_Barcode"] == guide + ] + append_df = append_df.merge( + image_df, left_on="Metadata_Foci_site", right_on="Metadata_site" + ) + guide_df = guide_df.append(append_df) + guide_df.to_csv(guide_path, index=False) print("Finished 2.normalize.") logging.info("Finished 2.normalize.") From fe3d8444bde7f4477d4764e72ae96835e86a25ca Mon Sep 17 00:00:00 2001 From: ErinWeisbart Date: Tue, 22 Feb 2022 13:19:29 -0800 Subject: [PATCH 09/10] use force argument --- 1.generate-profiles/2.normalize.py | 56 +++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/1.generate-profiles/2.normalize.py b/1.generate-profiles/2.normalize.py index 5199826..344f6ab 100644 --- a/1.generate-profiles/2.normalize.py +++ b/1.generate-profiles/2.normalize.py @@ -112,23 +112,45 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): normalize_output_files[data_level].parents[0], output_file.name.replace(".csv.gz", f"_{data_split_site}.csv.gz"), ) - print( - f"Now normalizing {data_level}...with operation: {normalize_method} for split {data_split_site}" - ) - logging.info( - f"Normalizing {data_level}...with operation: {normalize_method} for split {data_split_site}" - ) - df = read_csvs_with_chunksize(file_to_normalize) - - normalize( - profiles=df, - features=normalize_these_features, - samples=normalize_by_samples, - method=normalize_method, - output_file=output_file, - compression_options=compression, - float_format=float_format, - ) + if os.path.exists(output_file): + if force: + print(f"Force overwriting {output_file}") + logging.info(f"Force overwriting {output_file}") + print( + f"Now normalizing {data_level}...with operation: {normalize_method} for split {data_split_site}" + ) + logging.info( + f"Normalizing {data_level}...with operation: {normalize_method} for split {data_split_site}" + ) + df = read_csvs_with_chunksize(file_to_normalize) + + normalize( + profiles=df, + features=normalize_these_features, + samples=normalize_by_samples, + method=normalize_method, + output_file=output_file, + compression_options=compression, + float_format=float_format, + ) + else: + print( + f"Now normalizing {data_level}...with operation: {normalize_method} for split {data_split_site}" + ) + logging.info( + f"Normalizing {data_level}...with operation: {normalize_method} for split {data_split_site}" + ) + df = read_csvs_with_chunksize(file_to_normalize) + + normalize( + profiles=df, + features=normalize_these_features, + samples=normalize_by_samples, + method=normalize_method, + output_file=output_file, + compression_options=compression, + float_format=float_format, + ) if data_level == "single_cell": if output_single_cell_by_guide: From 50455de24d02ad211e6aa81daea6e82c890c5dee Mon Sep 17 00:00:00 2001 From: ErinWeisbart Date: Tue, 22 Feb 2022 14:58:38 -0800 Subject: [PATCH 10/10] don't normalize locations --- 1.generate-profiles/2.normalize.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/1.generate-profiles/2.normalize.py b/1.generate-profiles/2.normalize.py index 344f6ab..3a27cd8 100644 --- a/1.generate-profiles/2.normalize.py +++ b/1.generate-profiles/2.normalize.py @@ -112,6 +112,7 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): normalize_output_files[data_level].parents[0], output_file.name.replace(".csv.gz", f"_{data_split_site}.csv.gz"), ) + if os.path.exists(output_file): if force: print(f"Force overwriting {output_file}") @@ -124,9 +125,16 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): ) df = read_csvs_with_chunksize(file_to_normalize) + # Don't normalize locations + meta_cols=list(df.columns[df.columns.str.contains("Metadata")]) + remove_locs = list(filter(lambda x: "_Location_Center_X" in x or "_Location_Center_Y" in x , df.columns)) + remove_cents = list(filter(lambda x: "AreaShape_Center_X" in x or "AreaShape_Center_Y" in x , df.columns)) + meta_cols = meta_cols + remove_locs + remove_cents + normalize( profiles=df, features=normalize_these_features, + meta_features=meta_cols, samples=normalize_by_samples, method=normalize_method, output_file=output_file, @@ -142,9 +150,16 @@ def handle_excepthook(exc_type, exc_value, exc_traceback): ) df = read_csvs_with_chunksize(file_to_normalize) + # Don't normalize locations + meta_cols=list(df.columns[df.columns.str.contains("Metadata")]) + remove_locs = list(filter(lambda x: "_Location_Center_X" in x or "_Location_Center_Y" in x , df.columns)) + remove_cents = list(filter(lambda x: "AreaShape_Center_X" in x or "AreaShape_Center_Y" in x , df.columns)) + meta_cols = meta_cols + remove_locs + remove_cents + normalize( profiles=df, features=normalize_these_features, + meta_features=meta_cols, samples=normalize_by_samples, method=normalize_method, output_file=output_file,