From 8cef2b060342cd1979ca59cb9c35845fee2dc38d Mon Sep 17 00:00:00 2001 From: Simon Warchol Date: Thu, 27 Jun 2024 17:48:10 -0400 Subject: [PATCH] Fixes for karin --- minerva_analysis/server/models/data_model.py | 56 +- .../server/routes/import_routes.py | 511 ++++++++++-------- requirements.yml | 29 +- 3 files changed, 356 insertions(+), 240 deletions(-) diff --git a/minerva_analysis/server/models/data_model.py b/minerva_analysis/server/models/data_model.py index 36addaa86..15f02e2c9 100644 --- a/minerva_analysis/server/models/data_model.py +++ b/minerva_analysis/server/models/data_model.py @@ -3,7 +3,9 @@ from sklearn.mixture import GaussianMixture from sklearn.preprocessing import MinMaxScaler from numba import prange - +import palom.pyramid +import palom.reader +import ome_types import math import hdbscan import numpy_indexed as npi @@ -111,9 +113,11 @@ def load_datasource(datasource_name, reload=False): else: seg_io = tf.TiffFile(config[datasource_name]['segmentation'], is_ome=False) seg = zarr.open(seg_io.series[0].aszarr()) - if seg.dtype.kind in 'f': - seg = seg.astype('uint32') - print('dt',seg.dtype) + try: + if seg.dtype.kind in 'f': + seg = seg.astype('uint32') + except: + pass channel_io = tf.TiffFile(config[datasource_name]['channelFile'], is_ome=False) metadata = get_ome_metadata(datasource_name) channels = zarr.open(channel_io.series[0].aszarr()) @@ -148,8 +152,7 @@ def load_csv(datasource_name, numpy=False): if 'celltype' in config[datasource_name]['featureData'][0]: df = df.rename(columns={config[datasource_name]['featureData'][0]['celltype']: 'phenotype'}) - if np.issubdtype(df['phenotype'].dtype, np.number) is False: - df['phenotype'] = df['phenotype'].apply(lambda x: x.strip()) + if 'celltypeData' in config[datasource_name]['featureData'][0]: cellTypePath = Path(config[datasource_name]['featureData'][0]['celltypeData']) @@ -158,6 +161,7 @@ def load_csv(datasource_name, numpy=False): type_string = [e[1].strip() for e in type_list] df['phenotype'] = df['phenotype'].replace(type_ids, type_string) + df = df.replace(-np.Inf, 0) if numpy: # np_df = df.compute().to_numpy() @@ -1422,21 +1426,49 @@ def get_ome_metadata(datasource_name): timer = time.time() if config is None: load_datasource(datasource_name) + + try: + channel_io = tf.TiffFile(config[datasource_name]['channelFile'], is_ome=False) + xml = channel_io.pages[0].tags['ImageDescription'].value + image_metadata = from_xml(xml).images[0].pixels - channel_io = tf.TiffFile(config[datasource_name]['channelFile'], is_ome=False) - xml = channel_io.pages[0].tags['ImageDescription'].value - image_metadata = from_xml(xml).images[0].pixels - - print('Metadata Time', time.time() - timer) + print('Metadata Time', time.time() - timer) + except: + image_metadata = {} return image_metadata +def detect_pixel_size(img_path): + try: + metadata = ome_types.from_tiff(img_path) + pixel_size = metadata.images[0].pixels.physical_size_x + except Exception as err: + print(err) + print() + print('Pixel size detection using ome-types failed') + pixel_size = 1 + return pixel_size + def convertOmeTiff(filePath, channelFilePath=None, dataDirectory=None, isLabelImg=False): channel_info = {} channelNames = [] if isLabelImg == False: channel_io = tf.TiffFile(str(filePath), is_ome=False) channels = zarr.open(channel_io.series[0].aszarr()) + + if isinstance(channels, zarr.Group) is False: + print('Convering to Pyramid') + pixel_size = detect_pixel_size(filePath) + directory = Path(dataDirectory + "/" + filePath.name) + readers = [palom.reader.OmePyramidReader(in_path) for in_path in [filePath]] + mosaics = [reader.pyramid[0] for reader in readers] + palom.pyramid.write_pyramid( + mosaics, directory, downscale_factor=2, pixel_size=pixel_size + ) + filePath = directory + + channel_io = tf.TiffFile(str(filePath), is_ome=False) + channels = zarr.open(channel_io.series[0].aszarr()) if isinstance(channels, zarr.Array): channel_info['maxLevel'] = 1 chunks = channels.chunks @@ -1454,6 +1486,7 @@ def convertOmeTiff(filePath, channelFilePath=None, dataDirectory=None, isLabelIm channel_info['height'] = shape[1] channel_info['width'] = shape[2] channel_info['num_channels'] = shape[0] + channel_info['filePath'] = str(filePath) for i in range(shape[0]): channelName = re.sub(r'\.ome|\.tiff|\.tif|\.png', '', filePath.name) + "_" + str(i) channelNames.append(channelName) @@ -1824,7 +1857,6 @@ def get_perm_data(datasource_name, matrix_paths): image_ball_tree = BallTree(points, metric='euclidean') print('P Ball Tree,', time.time() - test) test = time.time() - image_metadata = get_ome_metadata(datasource_name) print('P Metadata,', time.time() - test) test = time.time() if 'neighborhood_radius' in config[datasource_name]: diff --git a/minerva_analysis/server/routes/import_routes.py b/minerva_analysis/server/routes/import_routes.py index c7d8c0ca3..651e5ad65 100644 --- a/minerva_analysis/server/routes/import_routes.py +++ b/minerva_analysis/server/routes/import_routes.py @@ -1,7 +1,10 @@ # CRUD for Datasources from minerva_analysis import app, get_config_names, config_json_path, data_path -from minerva_analysis.server.utils import mostFrequentLongestSubstring, pre_normalization +from minerva_analysis.server.utils import ( + mostFrequentLongestSubstring, + pre_normalization, +) from minerva_analysis.server.models import data_model from flask import render_template, request, Response, jsonify @@ -15,21 +18,21 @@ total_tasks = 100 completed_task = 0 -current_task = '' +current_task = "" -@app.route('/edit_config', methods=['GET']) +@app.route("/edit_config", methods=["GET"]) def edit_config_with_request_object(): config_name = request.args.get("config") return edit_config_with_config_name(config_name) -@app.route('/edit_config/') +@app.route("/edit_config/") def edit_config_with_request_name(config_name): return edit_config_with_config_name(config_name) -@app.route('/delete/') +@app.route("/delete/") def delete_with_datasource_name(config_name): global config_json_path @@ -42,7 +45,9 @@ def delete_with_datasource_name(config_name): configJson.seek(0) # <--- should reset file position to the beginning. json.dump(config_data, configJson, indent=4) configJson.truncate() - return render_template("index.html", data={'datasource': '', 'datasources': get_config_names()}) + return render_template( + "index.html", data={"datasource": "", "datasources": get_config_names()} + ) def edit_config_with_config_name(config_name): @@ -51,87 +56,87 @@ def edit_config_with_config_name(config_name): with open(config_json_path, "r+") as configJson: config_csv = json.load(configJson) config_data = config_csv[config_name] - data['datasetName'] = config_name + data["datasetName"] = config_name # test_data['channelFileNames'] = ['channel_01', 'channel_02'] - data['csvName'] = config_data['featureData'][0]['src'].split("/")[-1] - if 'celltypeData' in config_data['featureData'][0]: - data['celltypeData'] = config_data['featureData'][0]['celltypeData'] + data["csvName"] = config_data["featureData"][0]["src"].split("/")[-1] + if "celltypeData" in config_data["featureData"][0]: + data["celltypeData"] = config_data["featureData"][0]["celltypeData"] - if 'shapes' in config_data: - data['shapes'] = config_data['shapes'] + if "shapes" in config_data: + data["shapes"] = config_data["shapes"] - if 'activeChannel' in config_data: - data['activeChannel'] = config_data['activeChannel'] + if "activeChannel" in config_data: + data["activeChannel"] = config_data["activeChannel"] - if 'normalization' in config_data['featureData'][0]: - data['normalization'] = config_data['featureData'][0]['normalization'] + if "normalization" in config_data["featureData"][0]: + data["normalization"] = config_data["featureData"][0]["normalization"] - if 'clusterData' in config_data: - data['normCsvName'] = config_data['clusterData'] + if "clusterData" in config_data: + data["normCsvName"] = config_data["clusterData"] - if 'maxLevel' in config_data: - data['maxLevel'] = config_data['maxLevel'] - if 'height' in config_data: - data['height'] = config_data['height'] + if "maxLevel" in config_data: + data["maxLevel"] = config_data["maxLevel"] + if "height" in config_data: + data["height"] = config_data["height"] - if 'width' in config_data: - data['width'] = config_data['width'] + if "width" in config_data: + data["width"] = config_data["width"] - if 'segmentation' in config_data: - data['segmentation'] = config_data['segmentation'] + if "segmentation" in config_data: + data["segmentation"] = config_data["segmentation"] - if 'channelFile' in config_data: - data['channelFile'] = config_data['channelFile'] + if "channelFile" in config_data: + data["channelFile"] = config_data["channelFile"] - if 'num_channels' in config_data: - data['num_channels'] = config_data['num_channels'] + if "num_channels" in config_data: + data["num_channels"] = config_data["num_channels"] - if 'tileHeight' in config_data: - data['tileHeight'] = config_data['tileHeight'] + if "tileHeight" in config_data: + data["tileHeight"] = config_data["tileHeight"] - if 'tileWidth' in config_data: - data['tileWidth'] = config_data['tileWidth'] + if "tileWidth" in config_data: + data["tileWidth"] = config_data["tileWidth"] csvHeaders = [] channelFileNames = [] - if 'idField' in config_data['featureData'][0]: - data['idField'] = True + if "idField" in config_data["featureData"][0]: + data["idField"] = True elem = {} - elem['fullName'] = config_data['featureData'][0]['idField'] - elem['displayName'] = config_data['featureData'][0]['idField'] + elem["fullName"] = config_data["featureData"][0]["idField"] + elem["displayName"] = config_data["featureData"][0]["idField"] csvHeaders.append(elem) - channelFileNames = ['ID'] + channelFileNames = ["ID"] else: - data['idField'] = False; + data["idField"] = False # add x cord elem = {} - elem['fullName'] = config_data['featureData'][0]['xCoordinate'] - elem['displayName'] = config_data['featureData'][0]['xCoordinate'] + elem["fullName"] = config_data["featureData"][0]["xCoordinate"] + elem["displayName"] = config_data["featureData"][0]["xCoordinate"] csvHeaders.append(elem) # add y cord elem = {} - elem['fullName'] = config_data['featureData'][0]['yCoordinate'] - elem['displayName'] = config_data['featureData'][0]['yCoordinate'] + elem["fullName"] = config_data["featureData"][0]["yCoordinate"] + elem["displayName"] = config_data["featureData"][0]["yCoordinate"] csvHeaders.append(elem) # add cell type elem = {} - elem['fullName'] = config_data['featureData'][0]['celltype'] - elem['displayName'] = config_data['featureData'][0]['celltype'] + elem["fullName"] = config_data["featureData"][0]["celltype"] + elem["displayName"] = config_data["featureData"][0]["celltype"] csvHeaders.append(elem) - channelFileNames.extend(['X Position', 'Y Position', 'Cell Type']) - channelFileNames.extend(['X Position', 'Y Position']) + channelFileNames.extend(["X Position", "Y Position", "Cell Type"]) + channelFileNames.extend(["X Position", "Y Position"]) - for i in range(len(config_data['imageData'])): - elem = config_data['imageData'][i] - channelName = elem['src'].split("/")[-2] + for i in range(len(config_data["imageData"])): + elem = config_data["imageData"][i] + channelName = elem["src"].split("/")[-2] header = {} - header['fullName'] = elem['fullname'] - header['displayName'] = elem['name'] + header["fullName"] = elem["fullname"] + header["displayName"] = elem["name"] # Special handling for label channel if i == 0: - data['labelName'] = channelName - if data['idField']: + data["labelName"] = channelName + if data["idField"]: csvHeaders.insert(1, header) else: csvHeaders.insert(0, header) @@ -139,15 +144,17 @@ def edit_config_with_config_name(config_name): channelFileNames.append(channelName) csvHeaders.append(header) - data['csvHeader'] = csvHeaders - header_full_names = [elem['displayName'] for elem in csvHeaders] - data['substring'] = mostFrequentLongestSubstring.find_substring(header_full_names) - data['channelFileNames'] = channelFileNames - data['datasources'] = [key for key in config_csv.keys()] - return render_template('channel_match.html', data=data) + data["csvHeader"] = csvHeaders + header_full_names = [elem["displayName"] for elem in csvHeaders] + data["substring"] = mostFrequentLongestSubstring.find_substring( + header_full_names + ) + data["channelFileNames"] = channelFileNames + data["datasources"] = [key for key in config_csv.keys()] + return render_template("channel_match.html", data=data) -@app.route('/upload', methods=['GET', 'POST']) +@app.route("/upload", methods=["GET", "POST"]) def upload_file_page(): global total_tasks global completed_task @@ -155,26 +162,26 @@ def upload_file_page(): total_tasks = 1 completed_task = 0 csvHeader = None - if request.method == 'POST': + if request.method == "POST": try: - if request.form['action'] == 'Upload': - if request.form.get('name-1') is None or not request.form.get('name-1'): + if request.form["action"] == "Upload": + if request.form.get("name-1") is None or not request.form.get("name-1"): raise Exception("Please Name Dataset") else: combined_config_data = [] for i in range(1, int((len(request.form) - 2) / 3) + 1): current_task = "Uploading" datasetName = None - csvName = '' - celltypeName = '' - channelFileNames = ['ID', 'X Position', 'Y Position'] - labelName = '' - datasetName = request.form['name' + '-' + str(i)] + csvName = "" + celltypeName = "" + channelFileNames = ["ID", "X Position", "Y Position"] + labelName = "" + datasetName = request.form["name" + "-" + str(i)] file_path = str(PurePath(Path.cwd(), data_path, datasetName)) if not Path(file_path).exists(): Path(file_path).mkdir() - csvFile = request.files.getlist("csv_file" + '-' + str(i)) + csvFile = request.files.getlist("csv_file" + "-" + str(i)) if len(csvFile) > 1: raise Exception("Please only Upload Only 1 CSV") elif len(csvFile) == 0: @@ -183,10 +190,10 @@ def upload_file_page(): if len(celltypeFile) > 1: raise Exception("Please only Upload Only 1 Cell Type File") elif len(celltypeFile) == 1: - channelFileNames.extend(['Phenotype']) + channelFileNames.extend(["Phenotype"]) # labelFile = request.files.getlist("label_file") - labelFile = request.form.get('label_file' + '-' + str(i)) + labelFile = request.form.get("label_file" + "-" + str(i)) if labelFile.startswith('"'): labelFile = labelFile[1:] if labelFile.endswith('"'): @@ -195,9 +202,11 @@ def upload_file_page(): labelName = os.path.splitext(labelFile.name)[0] # labelName = labelFile.name.split('.')[0] - neighborhood_radius = int(request.form.get('neighborhood_radius')) + neighborhood_radius = int( + request.form.get("neighborhood_radius") + ) - channelFile = request.form.get('channel_file' + '-' + str(i)) + channelFile = request.form.get("channel_file" + "-" + str(i)) if channelFile.startswith('"'): channelFile = channelFile[1:] if channelFile.endswith('"'): @@ -211,7 +220,7 @@ def upload_file_page(): csvName = file.filename csvPath = str(Path(file_path) / csvName) file.save(csvPath) - with open(csvPath, 'r') as infile: + with open(csvPath, "r") as infile: reader = csv.DictReader(infile) csvHeader = reader.fieldnames @@ -225,63 +234,78 @@ def upload_file_page(): # Process Channel File - current_task = "Converting OME-TIFF Channels (This Will Take a While)" - channel_info = data_model.convertOmeTiff(channelFile, isLabelImg=False) - channelFileNames.extend(channel_info['channel_names']) + current_task = ( + "Converting OME-TIFF Channels (This Will Take a While)" + ) + channel_info = data_model.convertOmeTiff( + channelFile, + isLabelImg=False, + dataDirectory=file_path, + ) + channelFileNames.extend(channel_info["channel_names"]) completed_task += 1 current_task = "Converting Segmentation Mask" - label_info = data_model.convertOmeTiff(labelFile, channelFilePath=channelFile, - dataDirectory=file_path, - isLabelImg=True) + label_info = data_model.convertOmeTiff( + labelFile, + channelFilePath=channelFile, + dataDirectory=file_path, + isLabelImg=True, + ) completed_task += 1 current_task = total_tasks - current_task = 'Complete' + current_task = "Complete" config_data = {} full_csv_header = [] for header in csvHeader: elem = {} - elem['fullName'] = header + elem["fullName"] = header full_csv_header.append(elem) - config_data['csvHeader'] = full_csv_header - header_full_names = [elem['fullName'] for elem in full_csv_header] - config_data['substring'] = mostFrequentLongestSubstring.find_substring(header_full_names) - config_data['datasetName'] = datasetName - - config_data['maxLevel'] = channel_info['maxLevel'] - config_data['height'] = channel_info['height'] - config_data['width'] = channel_info['width'] - config_data['segmentation'] = label_info['segmentation'] - - config_data['num_channels'] = channel_info['num_channels'] - config_data['tileHeight'] = channel_info['tileHeight'] - config_data['tileWidth'] = channel_info['tileWidth'] - config_data['neighborhood_radius'] = neighborhood_radius - config_data['datasetName'] = datasetName - config_data['channelFileNames'] = channelFileNames - config_data['csvName'] = csvName + config_data["csvHeader"] = full_csv_header + header_full_names = [ + elem["fullName"] for elem in full_csv_header + ] + config_data["substring"] = ( + mostFrequentLongestSubstring.find_substring( + header_full_names + ) + ) + config_data["datasetName"] = datasetName + + config_data["maxLevel"] = channel_info["maxLevel"] + config_data["height"] = channel_info["height"] + config_data["width"] = channel_info["width"] + config_data["segmentation"] = label_info["segmentation"] + + config_data["num_channels"] = channel_info["num_channels"] + config_data["tileHeight"] = channel_info["tileHeight"] + config_data["tileWidth"] = channel_info["tileWidth"] + config_data["neighborhood_radius"] = neighborhood_radius + config_data["datasetName"] = datasetName + config_data["channelFileNames"] = channelFileNames + config_data["csvName"] = csvName if len(celltypeFile) == 1: - config_data['celltypeData'] = celltypeName - config_data['channelFile'] = str(channelFile) - config_data['new'] = True - config_data['labelName'] = labelName - config_data['datasources'] = get_config_names() - config_data['datasources'].append(datasetName) + config_data["celltypeData"] = celltypeName + config_data["channelFile"] = channel_info['filePath'] + config_data["new"] = True + config_data["labelName"] = labelName + config_data["datasources"] = get_config_names() + config_data["datasources"].append(datasetName) combined_config_data.append(config_data) - return render_template('channel_match.html', data=combined_config_data) + return render_template("channel_match.html", data=combined_config_data) except Exception as e: completed_task = -1 current_task = str(e) - return render_template('index.html') + return render_template("index.html") # Now Edit Config.Json With my my Data print("Finished Updating Config.json") - return render_template('index.html') + return render_template("index.html") -@app.route('/progress') +@app.route("/progress") def progress(): def generate(): global total_tasks @@ -290,71 +314,107 @@ def generate(): data = {} # Error Handling if current_task == -1: - data['percentage'] = -1 - data['currentTask'] = current_task + data["percentage"] = -1 + data["currentTask"] = current_task else: if total_tasks == 0: total_tasks = 100 percentage = int((completed_task / total_tasks) * 100) - data['percentage'] = percentage - data['currentTask'] = current_task + data["percentage"] = percentage + data["currentTask"] = current_task print("Percentage:", percentage, completed_task, total_tasks, current_task) yield "data:" + json.dumps(data) + "\n\n" - return Response(generate(), mimetype='text/event-stream') + return Response(generate(), mimetype="text/event-stream") # Matches channels to CSV Headers -@app.route('/channel-test') +@app.route("/channel-test") def channel(): test_data = {} - test_data['csvHeader'] = ['A488_background_none_1Nuclei', 'A555_background_none_2Nuclei', 'A555_Alexa555_18Nuclei', - 'A647_background_none_3Nuclei', 'CATENIN_Alexa647_19Nuclei', 'CD3_Alexa555_10Nuclei', - 'CD45RO_Alexa488_13Nuclei', 'CD45_Alexa555_30Nuclei', 'CD4_Alexa488_9Nuclei', - 'CD8_Alexa647_11Nuclei', 'DNA2_Hoechst33342_4Nuclei', 'DNA3_Hoechst33342_8Nuclei', - 'DNA4_Hoechst33342_12Nuclei', 'DNA5_Hoechst33342_16Nuclei', 'DNA6_Hoechst33342_20Nuclei', - 'DNA7_Hoechst33342_24Nuclei', 'DNA8_Hoechst33342_28Nuclei', 'DNA9_Hoechst33342_32Nuclei', - 'DNA_Hoechst33342_0Nuclei', 'ECAD_Alexa488_17Nuclei', 'FOXP3_Alexa555_14Nuclei', - 'HES1_Alexa647_27Nuclei', 'KERATIN_Alexa555_26Nuclei', 'KI67_Alexa488_5Nuclei', - 'MITF_Alexa488_25Nuclei', 'NGFR_Alexa647_23Nuclei', 'PD1_Alexa647_15Nuclei', - 'PDL1_Alexa647_7Nuclei', 'S100_Alexa488_33Nuclei', 'SMA_Alexa647_35Nuclei', - 'VEGFR2_Alexa555_34Nuclei', 'VIMENTIN_Alexa555_6Nuclei', 'cJUN_Alexa488_21Nuclei', - 'pERKz_Alexa647_31Nuclei', 'pRB_Alexa555_22Nuclei', 'pS6_Alexa488_29Nuclei', - 'NucleusArea', 'CellPosition_X', 'CellPosition_Y'] - test_data['datasetName'] = 'channelConfigs' - test_data['substring'] = mostFrequentLongestSubstring.find_substring(test_data['csvHeader']) - test_data['channelFileNames'] = ['ID', 'X Position', 'Y Position', 'channel_01', 'channel_02'] - test_data['normCsvName'] = 'segResultsRF_norm.csv' - test_data['csvName'] = 'segResultsRF.csv' - test_data['labelName'] = 'nucleiLabelRF' - test_data['new'] = True - test_data['datasources'] = get_config_names() - - return render_template('channel_match.html', data=test_data) - - -@app.route('/save_config', methods=['POST']) + test_data["csvHeader"] = [ + "A488_background_none_1Nuclei", + "A555_background_none_2Nuclei", + "A555_Alexa555_18Nuclei", + "A647_background_none_3Nuclei", + "CATENIN_Alexa647_19Nuclei", + "CD3_Alexa555_10Nuclei", + "CD45RO_Alexa488_13Nuclei", + "CD45_Alexa555_30Nuclei", + "CD4_Alexa488_9Nuclei", + "CD8_Alexa647_11Nuclei", + "DNA2_Hoechst33342_4Nuclei", + "DNA3_Hoechst33342_8Nuclei", + "DNA4_Hoechst33342_12Nuclei", + "DNA5_Hoechst33342_16Nuclei", + "DNA6_Hoechst33342_20Nuclei", + "DNA7_Hoechst33342_24Nuclei", + "DNA8_Hoechst33342_28Nuclei", + "DNA9_Hoechst33342_32Nuclei", + "DNA_Hoechst33342_0Nuclei", + "ECAD_Alexa488_17Nuclei", + "FOXP3_Alexa555_14Nuclei", + "HES1_Alexa647_27Nuclei", + "KERATIN_Alexa555_26Nuclei", + "KI67_Alexa488_5Nuclei", + "MITF_Alexa488_25Nuclei", + "NGFR_Alexa647_23Nuclei", + "PD1_Alexa647_15Nuclei", + "PDL1_Alexa647_7Nuclei", + "S100_Alexa488_33Nuclei", + "SMA_Alexa647_35Nuclei", + "VEGFR2_Alexa555_34Nuclei", + "VIMENTIN_Alexa555_6Nuclei", + "cJUN_Alexa488_21Nuclei", + "pERKz_Alexa647_31Nuclei", + "pRB_Alexa555_22Nuclei", + "pS6_Alexa488_29Nuclei", + "NucleusArea", + "CellPosition_X", + "CellPosition_Y", + ] + test_data["datasetName"] = "channelConfigs" + test_data["substring"] = mostFrequentLongestSubstring.find_substring( + test_data["csvHeader"] + ) + test_data["channelFileNames"] = [ + "ID", + "X Position", + "Y Position", + "channel_01", + "channel_02", + ] + test_data["normCsvName"] = "segResultsRF_norm.csv" + test_data["csvName"] = "segResultsRF.csv" + test_data["labelName"] = "nucleiLabelRF" + test_data["new"] = True + test_data["datasources"] = get_config_names() + + return render_template("channel_match.html", data=test_data) + + +@app.route("/save_config", methods=["POST"]) def save_config(): global config_json_path # try: - combinedOriginalData = request.json['originalData'] - datasets = [e['datasetName'] for e in combinedOriginalData] + combinedOriginalData = request.json["originalData"] + datasets = [e["datasetName"] for e in combinedOriginalData] with open(config_json_path, "r+") as configJson: configData = json.load(configJson) for originalData in combinedOriginalData: - datasetName = originalData['datasetName'] - csvName = originalData['csvName'] - if 'celltypeData' in originalData: - celltypeName = originalData['celltypeData'] - headerList = request.json['headerList'] - normalizeCsv = request.json.get('normalizeCsv') + datasetName = originalData["datasetName"] + csvName = originalData["csvName"] + if "celltypeData" in originalData: + celltypeName = originalData["celltypeData"] + headerList = request.json["headerList"] + normalizeCsv = request.json.get("normalizeCsv") if normalizeCsv: print("Normalizing CSV") skip_columns = [] for i in range(int(len(headerList) / 3)): - column_name = headerList[i * 3]['value'] - normalize_column = headerList[i * 3 + 2]['value'] - if normalize_column != 'on': + column_name = headerList[i * 3]["value"] + normalize_column = headerList[i * 3 + 2]["value"] + if normalize_column != "on": skip_columns.append(column_name) name, ext = os.path.splitext(csvName) normCsvName = "{name}_norm{ext}".format(name=name, ext=ext) @@ -362,100 +422,122 @@ def save_config(): file_path = str(Path(Path.cwd(), data_path, datasetName)) csvPath = str(Path(file_path) / csvName) normPath = str(Path(file_path) / normCsvName) - pre_normalization.preNormalize(csvPath, normPath, skip_columns=skip_columns) + pre_normalization.preNormalize( + csvPath, normPath, skip_columns=skip_columns + ) print("Finished Normalizing CSV") - elif 'normalizeCsvName' in request.json: - normCsvName = request.json.get('normalizeCsvName') + elif "normalizeCsvName" in request.json: + normCsvName = request.json.get("normalizeCsvName") else: normCsvName = None headerList = [x for x in zip(headerList[1::3], headerList[0::3])] - channelList = originalData['channelFileNames'] + channelList = originalData["channelFileNames"] configData[datasetName] = {} - configData[datasetName]['shapes'] = '' + configData[datasetName]["shapes"] = "" if normCsvName: - configData[datasetName]['clusterData'] = normCsvName - configData[datasetName]['activeChannel'] = '' - configData[datasetName]['linkedDatasets'] = datasets - configData[datasetName]['featureData'] = [{}] - configData[datasetName]['featureData'][0]['normalization'] = 'none' - if 'celltypeData' in originalData: - configData[datasetName]['featureData'][0]['celltypeData'] = str( - data_path / datasets[0] / celltypeName) - configData[datasetName]['featureData'][0]['celltype'] = headerList[2][1]['value'] - configData[datasetName]['featureData'][0]['xCoordinate'] = headerList[0][1]['value'] - configData[datasetName]['featureData'][0]['yCoordinate'] = headerList[1][1]['value'] + configData[datasetName]["clusterData"] = normCsvName + configData[datasetName]["activeChannel"] = "" + configData[datasetName]["linkedDatasets"] = datasets + configData[datasetName]["featureData"] = [{}] + configData[datasetName]["featureData"][0]["normalization"] = "none" + if "celltypeData" in originalData: + configData[datasetName]["featureData"][0]["celltypeData"] = str( + data_path / datasets[0] / celltypeName + ) + configData[datasetName]["featureData"][0]["celltype"] = headerList[2][ + 1 + ]["value"] + configData[datasetName]["featureData"][0]["xCoordinate"] = headerList[0][1][ + "value" + ] + configData[datasetName]["featureData"][0]["yCoordinate"] = headerList[1][1][ + "value" + ] # If optional id field - if 'idField' in request.json: + if "idField" in request.json: channelList.pop(0) - configData[datasetName]['featureData'][0]['idField'] = request.json['idField'][1]['value'] + configData[datasetName]["featureData"][0]["idField"] = request.json[ + "idField" + ][1]["value"] - if 'shapes' in originalData: - configData[datasetName]['shapes'] = originalData['shapes'] + if "shapes" in originalData: + configData[datasetName]["shapes"] = originalData["shapes"] - if 'height' in originalData: - configData[datasetName]['height'] = originalData['height'] + if "height" in originalData: + configData[datasetName]["height"] = originalData["height"] - if 'width' in originalData: - configData[datasetName]['width'] = originalData['width'] + if "width" in originalData: + configData[datasetName]["width"] = originalData["width"] - if 'maxLevel' in originalData: - configData[datasetName]['maxLevel'] = originalData['maxLevel'] + if "maxLevel" in originalData: + configData[datasetName]["maxLevel"] = originalData["maxLevel"] - if 'num_channels' in originalData: - configData[datasetName]['num_channels'] = originalData['num_channels'] - if 'neighborhood_radius' in originalData: - configData[datasetName]['neighborhood_radius'] = originalData['neighborhood_radius'] + if "num_channels" in originalData: + configData[datasetName]["num_channels"] = originalData["num_channels"] + if "neighborhood_radius" in originalData: + configData[datasetName]["neighborhood_radius"] = originalData[ + "neighborhood_radius" + ] - if 'tileWidth' in originalData: - configData[datasetName]['tileWidth'] = originalData['tileWidth'] + if "tileWidth" in originalData: + configData[datasetName]["tileWidth"] = originalData["tileWidth"] - if 'tileHeight' in originalData: - configData[datasetName]['tileHeight'] = originalData['tileHeight'] + if "tileHeight" in originalData: + configData[datasetName]["tileHeight"] = originalData["tileHeight"] - if 'segmentation' in originalData: - configData[datasetName]['segmentation'] = originalData['segmentation'] + if "segmentation" in originalData: + configData[datasetName]["segmentation"] = originalData["segmentation"] - if 'channelFile' in originalData: - configData[datasetName]['channelFile'] = originalData['channelFile'] + if "channelFile" in originalData: + configData[datasetName]["channelFile"] = originalData["channelFile"] - if 'activeChannel' in originalData: - configData[datasetName]['activeChannel'] = originalData['activeChannel'] + if "activeChannel" in originalData: + configData[datasetName]["activeChannel"] = originalData["activeChannel"] - if 'normalization' in originalData: - configData[datasetName]['featureData'][0]['normalization'] = originalData['normalization'] + if "normalization" in originalData: + configData[datasetName]["featureData"][0]["normalization"] = ( + originalData["normalization"] + ) - configData[datasetName]['featureData'][0][ - 'src'] = str(data_path / datasetName / csvName) + configData[datasetName]["featureData"][0]["src"] = str( + data_path / datasetName / csvName + ) # Adding the Label Channel as the First Label - configData[datasetName]['imageData'] = [{}] + configData[datasetName]["imageData"] = [{}] # - configData[datasetName]['imageData'][0]['name'] = 'Segmentation' - configData[datasetName]['imageData'][0]['fullname'] = 'Segmentation' - if 'labelName' in originalData and originalData['labelName'] != '': - configData[datasetName]['imageData'][0]['src'] = "/generated/data/" + datasetName + "/" + \ - originalData[ - 'labelName'] + "/" + configData[datasetName]["imageData"][0]["name"] = "Segmentation" + configData[datasetName]["imageData"][0]["fullname"] = "Segmentation" + if "labelName" in originalData and originalData["labelName"] != "": + configData[datasetName]["imageData"][0]["src"] = ( + "/generated/data/" + + datasetName + + "/" + + originalData["labelName"] + + "/" + ) else: - configData[datasetName]['imageData'][0]['src'] = '' + configData[datasetName]["imageData"][0]["src"] = "" - if 'celltypeData' in originalData: + if "celltypeData" in originalData: channelList = channelList[3:] else: channelList = channelList[2:] - if 'celltypeData' in originalData: + if "celltypeData" in originalData: channelStart = 3 else: channelStart = 2 for i in range(len(channelList)): channel = channelList[i] channelData = {} - channelData['src'] = "/generated/data/" + datasetName + "/" + channel + "/" - channelData['name'] = headerList[i + channelStart][0]['value'] - channelData['fullname'] = headerList[i + channelStart][1]['value'] - configData[datasetName]['imageData'].append(channelData) + channelData["src"] = ( + "/generated/data/" + datasetName + "/" + channel + "/" + ) + channelData["name"] = headerList[i + channelStart][0]["value"] + channelData["fullname"] = headerList[i + channelStart][1]["value"] + configData[datasetName]["imageData"].append(channelData) configJson.seek(0) # <--- should reset file position to the beginning. json.dump(configData, configJson, indent=4) configJson.truncate() @@ -473,9 +555,10 @@ def save_config(): # End of Facetto code -@app.route('/init_datasource', methods=['GET']) + +@app.route("/init_datasource", methods=["GET"]) def init_datasource(): - datasource = request.args.get('datasource') + datasource = request.args.get("datasource") data_model.init(datasource) resp = jsonify(success=True) return resp diff --git a/requirements.yml b/requirements.yml index 61fba50ed..78f5b0040 100644 --- a/requirements.yml +++ b/requirements.yml @@ -3,35 +3,36 @@ channels: - conda-forge dependencies: - python=3.9 - - appdirs + - appdirs=1.4.4 - Flask=2.3.2 - flask-sqlalchemy=3.0.2 - hdbscan=0.8.28 - llvmlite=0.39.1 - joblib=1.1.0 - - matplotlib - - numba - - numpy - - orjson - - pandas + - matplotlib=3.8.2 + - numba=0.56.4 + - numpy=1.23.5 + - orjson=3.9.10 + - pandas=2.2.0 - pillow=8.1.1 - - requests + - requests=2.31.0 - scikit-learn=1.1.2 - - scikit-image - - scipy + - scikit-image=0.19.3 + - scipy=1.12.0 - tifffile=2021.4.8 - - waitress + - waitress=2.1.2 - zarr=2.10.1 - - pyinstaller + - pyinstaller=6.3.0 - pip - pip: - elementpath==2.3.2 + - palom==2024.4.1 - pycave==3.1.3 - pydantic==1.10.2 - - numpy-indexed + - numpy-indexed==0.3.7 - ome-types==0.2.9 - - opencv-python - - umap-learn + - opencv-python==4.9.0.80 + - umap-learn==0.5.5 - xmlschema==1.8.0 - imagecodecs[all]