Mercurial > repos > bimib > cobraxy
changeset 309:38c9a958ea78 draft
Uploaded
| author | francesco_lapi | 
|---|---|
| date | Thu, 22 May 2025 16:03:37 +0000 | 
| parents | 797d0e002934 | 
| children | c9647b137ab6 | 
| files | COBRAxy/data_tutorial.zip COBRAxy/marea.py COBRAxy/marea_cluster.py COBRAxy/ras_generator.py | 
| diffstat | 4 files changed, 37 insertions(+), 26 deletions(-) [+] | 
line wrap: on
 line diff
--- a/COBRAxy/marea.py Tue May 20 16:01:21 2025 +0000 +++ b/COBRAxy/marea.py Thu May 22 16:03:37 2025 +0000 @@ -568,16 +568,22 @@ if pd.isnull(classe): continue l :List[List[float]] = [] + sample_ids: List[str] = [] + for j in range(i, len(classes)): if classes.iloc[j, 1] == classe: pat_id :str = classes.iloc[j, 0] # sample name values = dataset_values.get(pat_id, None) # the column of values for that sample if values != None: l.append(values) + sample_ids.append(pat_id) classes.iloc[j, 1] = None # TODO: problems? if l: - class_pat[classe] = list(map(list, zip(*l))) + class_pat[classe] = { + "values": list(map(list, zip(*l))), # trasposta + "samples": sample_ids + } continue utils.logWarning( @@ -957,8 +963,11 @@ values, ids = getDatasetValues(datasetPath, "Dataset Class (not actual name)") if values != None: - # TODO: add the columnNames thing, I didn't because I don't understand the whole "dataset classes" thing - class_pat = split_class(classes, values) + class_pat_with_samples_id = split_class(classes, values) + + for clas, values_and_samples_id in class_pat_with_samples_id.items(): + class_pat[clas] = values_and_samples_id["values"] + columnNames[clas] = values_and_samples_id["samples"] return ids, class_pat, columnNames #^^^ TODO: this could be a match statement over an enum, make it happen future marea dev with python 3.12! (it's why I kept the ifs) @@ -1064,4 +1073,4 @@ print('Execution succeeded') ############################################################################### if __name__ == "__main__": - main() \ No newline at end of file + main()
--- a/COBRAxy/marea_cluster.py Tue May 20 16:01:21 2025 +0000 +++ b/COBRAxy/marea_cluster.py Thu May 22 16:03:37 2025 +0000 @@ -59,14 +59,14 @@ parser.add_argument('-el', '--elbow', type = str, - default = 'false', - choices = ['true', 'false'], + default = 'False', + choices = ['True', 'False'], help = 'choose if you want to generate an elbow plot for kmeans') parser.add_argument('-si', '--silhouette', type = str, - default = 'false', - choices = ['true', 'false'], + default = 'False', + choices = ['True', 'False'], help = 'choose if you want silhouette plots') parser.add_argument('-td', '--tool_dir', @@ -75,7 +75,7 @@ help = 'your tool directory') parser.add_argument('-ms', '--min_samples', - type = float, + type = int, help = 'min samples for dbscan (optional)') parser.add_argument('-ep', '--eps', @@ -106,7 +106,7 @@ Returns: None """ - args = process_args(sys.argv) + with open(args.out_log, 'a') as log: log.write(s + "\n\n") print(s) @@ -213,8 +213,8 @@ k_min (int): The minimum number of clusters to consider. k_max (int): The maximum number of clusters to consider. dataset (pandas.DataFrame): The dataset to perform clustering on. - elbow (str): Whether to generate an elbow plot for kmeans ('true' or 'false'). - silhouette (str): Whether to generate silhouette plots ('true' or 'false'). + elbow (str): Whether to generate an elbow plot for kmeans ('True' or 'False'). + silhouette (str): Whether to generate silhouette plots ('True' or 'False'). best_cluster (str): The file path to save the output of the best cluster. Returns: @@ -224,12 +224,12 @@ os.makedirs(args.output_path) - if elbow == 'true': + if elbow == 'True': elbow = True else: elbow = False - if silhouette == 'true': + if silhouette == 'True': silhouette = True else: silhouette = False @@ -443,7 +443,7 @@ k_min (int): The minimum number of clusters to consider. k_max (int): The maximum number of clusters to consider. best_cluster (str): The file path to save the output of the best cluster. - silhouette (str): Whether to generate silhouette plots ('true' or 'false'). + silhouette (str): Whether to generate silhouette plots ('True' or 'False'). Returns: None @@ -477,7 +477,7 @@ prefix = '' if (i + k_min == best): prefix = '_BEST' - if silhouette == 'true': + if silhouette == 'True': silhouette_draw(dataset, labels[i], i + k_min, f'{args.output_path}/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png') for i in range(len(labels)):
--- a/COBRAxy/ras_generator.py Tue May 20 16:01:21 2025 +0000 +++ b/COBRAxy/ras_generator.py Thu May 22 16:03:37 2025 +0000 @@ -8,6 +8,7 @@ import utils.general_utils as utils import utils.rule_parsing as ruleUtils from typing import Union, Optional, List, Dict, Tuple, TypeVar +import os ERRORS = [] ########################## argparse ########################################## @@ -212,7 +213,7 @@ Returns: dict: A dictionary containing gene data with gene IDs as keys and corresponding values. """ - args = process_args() + for i in range(len(gene)): tmp = gene.iloc[i, 0] gene.iloc[i, 0] = tmp.strip().split('.')[0] @@ -227,16 +228,16 @@ if gene_dup: if gene_custom == None: - if str(args.rules_selector) == 'HMRcore': - gene_in_rule = pk.load(open(args.tool_dir + '/local/pickle files/HMRcore_genes.p', 'rb')) + if str(ARGS.rules_selector) == 'HMRcore': + gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/HMRcore_genes.p', 'rb')) - elif str(args.rules_selector) == 'Recon': - gene_in_rule = pk.load(open(args.tool_dir + '/local/pickle files/Recon_genes.p', 'rb')) + elif str(ARGS.rules_selector) == 'Recon': + gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/Recon_genes.p', 'rb')) - elif str(args.rules_selector) == 'ENGRO2': - gene_in_rule = pk.load(open(args.tool_dir + '/local/pickle files/ENGRO2_genes.p', 'rb')) + elif str(ARGS.rules_selector) == 'ENGRO2': + gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/ENGRO2_genes.p', 'rb')) - utils.logWarning(f"{args.tool_dir}'/local/pickle files/ENGRO2_genes.p'", ARGS.out_log) + utils.logWarning(f"{ARGS.tool_dir}'/local/pickle files/ENGRO2_genes.p'", ARGS.out_log) gene_in_rule = gene_in_rule.get(type_gene) @@ -662,7 +663,7 @@ # get args from frontend (related xml) global ARGS ARGS = process_args(args) - print(ARGS.rules_selector) + # read dataset dataset = read_dataset(ARGS.input, "dataset") dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) @@ -672,6 +673,7 @@ # handle custom models model :utils.Model = ARGS.rules_selector + if model is utils.Model.Custom: rules = load_custom_rules() reactions = list(rules.keys()) @@ -703,4 +705,4 @@ ############################################################################### if __name__ == "__main__": - main() \ No newline at end of file + main()
