# HG changeset patch # User bimib # Date 1582127092 18000 # Node ID 3af9d394367c9fc83fa17541659510e32864ab12 # Parent 5d5d01ef1d68cf66de0c303bfe9693a51e4be842 Uploaded diff -r 5d5d01ef1d68 -r 3af9d394367c Marea/marea.py --- a/Marea/marea.py Wed Jan 22 11:50:54 2020 -0500 +++ b/Marea/marea.py Wed Feb 19 10:44:52 2020 -0500 @@ -26,10 +26,6 @@ parser.add_argument('-cr', '--custom', type = str, help='your dataset if you want custom rules') - parser.add_argument('-na', '--names', - type = str, - nargs = '+', - help = 'input names') parser.add_argument('-n', '--none', type = str, default = 'true', @@ -37,7 +33,7 @@ help = 'compute Nan values') parser.add_argument('-pv' ,'--pValue', type = float, - default = 0.05, + default = 0.1, help = 'P-Value threshold (default: %(default)s)') parser.add_argument('-fc', '--fChange', type = float, @@ -49,14 +45,10 @@ help = 'your tool directory') parser.add_argument('-op', '--option', type = str, - choices = ['datasets', 'dataset_class', 'datasets_rasonly'], + choices = ['datasets', 'dataset_class'], help='dataset or dataset and class') parser.add_argument('-ol', '--out_log', help = "Output log") - parser.add_argument('-ids', '--input_datas', - type = str, - nargs = '+', - help = 'input datasets') parser.add_argument('-id', '--input_data', type = str, help = 'input dataset') @@ -80,15 +72,21 @@ default = 'true', choices = ['true', 'false'], help = 'generate pdf map') - parser.add_argument('-gr', '--generate_ras', + parser.add_argument('-on', '--control', + type = str) + parser.add_argument('-co', '--comparison', + type = str, + default = '1vs1', + choices = ['manyvsmany', 'onevsrest', 'onevsmany']) + parser.add_argument('-ids', '--input_datas', type = str, - default = 'true', - choices = ['true', 'false'], - help = 'generate reaction activity score') - parser.add_argument('-sr', '--single_ras_file', - type = str, - help = 'file that will contain ras') - + nargs = '+', + help = 'input datasets') + parser.add_argument('-na', '--names', + type = str, + nargs = '+', + help = 'input names') + args = parser.parse_args() return args @@ -615,7 +613,6 @@ def resolve(genes, rules, ids, resolve_none, name): resolve_rules = {} - names_array = [] not_found = [] flag = False for key, value in genes.items(): @@ -663,79 +660,157 @@ ', the class has been disregarded\n') return class_pat -############################ create_ras ####################################### - -def create_ras (resolve_rules, dataset_name, single_ras, rules, ids): - - if resolve_rules == None: - warning("Couldn't generate RAS for current dataset: " + dataset_name) - - for geni in resolve_rules.values(): - for i, valori in enumerate(geni): - if valori == None: - geni[i] = 'None' - - output_ras = pd.DataFrame.from_dict(resolve_rules) - - output_ras.insert(0, 'Reactions', ids) - output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) - - if (single_ras): - args = process_args(sys.argv) - text_file = open(args.single_ras_file, "w") - else: - text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w") - - text_file.write(output_to_csv) - text_file.close() - ############################ map ############################################## -def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf): +def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf, comparison, control): args = process_args(sys.argv) if (not class_pat) or (len(class_pat.keys()) < 2): sys.exit('Execution aborted: classes provided for comparisons are ' + 'less than two\n') - for i, j in it.combinations(class_pat.keys(), 2): - tmp = {} - count = 0 - max_F_C = 0 - for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): - try: - stat_D, p_value = st.ks_2samp(l1, l2) - avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) - if not isinstance(avg, str): - if max_F_C < abs(avg): - max_F_C = abs(avg) - tmp[ids[count]] = [float(p_value), avg] - count += 1 - except (TypeError, ZeroDivisionError): - count += 1 - tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' - tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") - tmp_csv = tmp_csv.reset_index() - header = ['ids', 'P_Value', 'Log2(fold change)'] - tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) + + if comparison == "manyvsmany": + for i, j in it.combinations(class_pat.keys(), 2): + + tmp = {} + count = 0 + max_F_C = 0 + for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): + try: + stat_D, p_value = st.ks_2samp(l1, l2) + #sum(l1) da errore secondo me perchè ha null + avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) + if not isinstance(avg, str): + if max_F_C < abs(avg): + max_F_C = abs(avg) + tmp[ids[count]] = [float(p_value), avg] + count += 1 + except (TypeError, ZeroDivisionError): + count += 1 + tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' + tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") + tmp_csv = tmp_csv.reset_index() + header = ['ids', 'P_Value', 'Log2(fold change)'] + tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) + + if create_svg or create_pdf: + if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' + and args.yes_no == 'yes'): + fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) + file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' + with open(file_svg, 'wb') as new_map: + new_map.write(ET.tostring(core_map)) + + + if create_pdf: + file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' + renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) + + if not create_svg: + #Ho utilizzato il file svg per generare il pdf, + #ma l'utente non ne ha richiesto il ritorno, quindi + #lo elimino + + os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') + elif comparison == "onevsrest": + for single_cluster in class_pat.keys(): + t = [] + for k in class_pat.keys(): + if k != single_cluster: + t.append(class_pat.get(k)) + rest = [] + for i in t: + rest = rest + i + + tmp = {} + count = 0 + max_F_C = 0 + + for l1, l2 in zip(rest, class_pat.get(single_cluster)): + try: + stat_D, p_value = st.ks_2samp(l1, l2) + avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) + if not isinstance(avg, str): + if max_F_C < abs(avg): + max_F_C = abs(avg) + tmp[ids[count]] = [float(p_value), avg] + count += 1 + except (TypeError, ZeroDivisionError): + count += 1 + tab = 'result/' + single_cluster + '_vs_rest (Tabular Result).tsv' + tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") + tmp_csv = tmp_csv.reset_index() + header = ['ids', 'P_Value', 'Log2(fold change)'] + tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) + + if create_svg or create_pdf: + if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' + and args.yes_no == 'yes'): + fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) + file_svg = 'result/' + single_cluster + '_vs_ rest (SVG Map).svg' + with open(file_svg, 'wb') as new_map: + new_map.write(ET.tostring(core_map)) + + + if create_pdf: + file_pdf = 'result/' + single_cluster + '_vs_ rest (PDF Map).pdf' + renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) + + if not create_svg: + os.remove('result/' + single_cluster + '_vs_ rest (SVG Map).svg') + + elif comparison == "onevsmany": + for i, j in it.combinations(class_pat.keys(), 2): + + if i != control and j != control: + print(str(control) + " " + str(i) + " " + str(j)) + #Se è un confronto fra elementi diversi dal controllo, skippo + continue + + print('vado') + tmp = {} + count = 0 + max_F_C = 0 + for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): + try: + stat_D, p_value = st.ks_2samp(l1, l2) + #sum(l1) da errore secondo me perchè ha null + avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) + if not isinstance(avg, str): + if max_F_C < abs(avg): + max_F_C = abs(avg) + tmp[ids[count]] = [float(p_value), avg] + count += 1 + except (TypeError, ZeroDivisionError): + count += 1 + tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' + tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") + tmp_csv = tmp_csv.reset_index() + header = ['ids', 'P_Value', 'Log2(fold change)'] + tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) + + if create_svg or create_pdf: + if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' + and args.yes_no == 'yes'): + fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) + file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' + with open(file_svg, 'wb') as new_map: + new_map.write(ET.tostring(core_map)) + + + if create_pdf: + file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' + renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) + + if not create_svg: + #Ho utilizzato il file svg per generare il pdf, + #ma l'utente non ne ha richiesto il ritorno, quindi + #lo elimino + + os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') - if create_svg or create_pdf: - if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' - and args.yes_no == 'yes'): - fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) - file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' - with open(file_svg, 'wb') as new_map: - new_map.write(ET.tostring(core_map)) - - - if create_pdf: - file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' - renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) - - if not create_svg: - #Ho utilizzato il file svg per generare il pdf, - #ma l'utente non ne ha richiesto il ritorno, quindi - #lo elimino - os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') - + + + return None ############################ MAIN ############################################# @@ -745,12 +820,9 @@ create_svg = check_bool(args.generate_svg) create_pdf = check_bool(args.generate_pdf) - generate_ras = check_bool(args.generate_ras) - os.makedirs('result') - - if generate_ras: - os.makedirs('ras') + if os.path.isdir('result') == False: + os.makedirs('result') if args.rules_selector == 'HMRcore': recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) @@ -758,93 +830,60 @@ recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) elif args.rules_selector == 'Custom': ids, rules, gene_in_rule = make_recon(args.custom) - - resolve_none = check_bool(args.none) - + class_pat = {} - if args.option == 'datasets_rasonly': - name = "RAS Dataset" - dataset = read_dataset(args.input_datas[0],"dataset") - - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - - type_gene = gene_type(dataset.iloc[0, 0], name) - - if args.rules_selector != 'Custom': - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(recon.get(type_gene)) - elif args.rules_selector == 'Custom': - genes = data_gene(dataset, type_gene, name, gene_in_rule) - - resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) - - create_ras(resolve_rules, name, True, rules, ids) - - if err != None and err: - warning('Warning: gene\n' + str(err) + '\nnot found in class ' - + name + ', the expression level for this gene ' + - 'will be considered NaN\n') - - print('execution succeded') - return None - - - elif args.option == 'datasets': + if args.option == 'datasets': num = 1 for i, j in zip(args.input_datas, args.names): - name = name_dataset(j, num) - dataset = read_dataset(i, name) - - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - - type_gene = gene_type(dataset.iloc[0, 0], name) + resolve_rules = read_dataset(i, name) + + resolve_rules.iloc[:, 0] = (resolve_rules.iloc[:, 0]).astype(str) - if args.rules_selector != 'Custom': - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(recon.get(type_gene)) - elif args.rules_selector == 'Custom': - genes = data_gene(dataset, type_gene, name, gene_in_rule) - - - resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) + ids = pd.Series.tolist(resolve_rules.iloc[:, 0]) - if generate_ras: - create_ras(resolve_rules, name, False, rules, ids) + resolve_rules = resolve_rules.drop(resolve_rules.columns[[0]], axis=1) + resolve_rules = resolve_rules.replace({'None': None}) + resolve_rules = resolve_rules.to_dict('list') - if err != None and err: - warning('Warning: gene\n' + str(err) + '\nnot found in class ' - + name + ', the expression level for this gene ' + - 'will be considered NaN\n') + #Converto i valori da str a float + to_float = lambda x: float(x) if (x != None) else None + + resolve_rules_float = {} + + for k in resolve_rules: + resolve_rules_float[k] = list(map(to_float, resolve_rules[k])); resolve_rules_float + if resolve_rules != None: - class_pat[name] = list(map(list, zip(*resolve_rules.values()))) + class_pat[name] = list(map(list, zip(*resolve_rules_float.values()))) + num += 1 - elif args.option == 'dataset_class': - name = 'RNAseq' - dataset = read_dataset(args.input_data, name) - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - type_gene = gene_type(dataset.iloc[0, 0], name) + + if args.option == 'dataset_class': + name = 'RAS' + resolve_rules = read_dataset(args.input_data, name) + resolve_rules.iloc[:, 0] = (resolve_rules.iloc[:, 0]).astype(str) + + ids = pd.Series.tolist(resolve_rules.iloc[:, 0]) + + resolve_rules = resolve_rules.drop(resolve_rules.columns[[0]], axis=1) + resolve_rules = resolve_rules.replace({'None': None}) + resolve_rules = resolve_rules.to_dict('list') + + #Converto i valori da str a float + to_float = lambda x: float(x) if (x != None) else None + + resolve_rules_float = {} + + for k in resolve_rules: + resolve_rules_float[k] = list(map(to_float, resolve_rules[k])); resolve_rules_float + classes = read_dataset(args.input_class, 'class') - if not len(classes.columns) == 2: - warning('Warning: more than 2 columns in class file. Extra' + - 'columns have been disregarded\n') classes = classes.astype(str) - if args.rules_selector != 'Custom': - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(recon.get(type_gene)) - elif args.rules_selector == 'Custom': - genes = data_gene(dataset, type_gene, name, gene_in_rule) - resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) - if err != None and err: - warning('Warning: gene\n'+str(err)+'\nnot found in class ' - + name + ', the expression level for this gene ' + - 'will be considered NaN\n') - if resolve_rules != None: - class_pat = split_class(classes, resolve_rules) - if generate_ras: - create_ras(resolve_rules, name, False, rules, ids) - + + if resolve_rules_float != None: + class_pat = split_class(classes, resolve_rules_float) if args.rules_selector == 'Custom': if args.yes_no == 'yes': @@ -857,11 +896,12 @@ else: core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') - maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf) + maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf, args.comparison, args.control) print('Execution succeded') + + return None - return None ############################################################################### diff -r 5d5d01ef1d68 -r 3af9d394367c Marea/marea.xml --- a/Marea/marea.xml Wed Jan 22 11:50:54 2020 -0500 +++ b/Marea/marea.xml Wed Feb 19 10:44:52 2020 -0500 @@ -1,28 +1,19 @@ - - - - marea_macros.xml - - - pandas - scipy - cobra - lxml - svglib - reportlab - - - + + marea_macros.xml + + + pandas + scipy + cobra + lxml + svglib + reportlab + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + - - - - - - + + + + + + + + + + + + + - - - - - - - - - + + + + - - - - + + + + + - - - - - - - - - + + - - - - - - - - - - - - - cond['type_selector'] == "datasets_rasonly" - - - cond['type_selector'] == "datasets" or cond['type_selector'] == "dataset_class" - - - - cond['type_selector'] != "datasets_rasonly" and cond['advanced']['choice'] and cond['advanced']['generateRas'] - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - + + - - - + +undefined diff -r 5d5d01ef1d68 -r 3af9d394367c Marea/ras_generator.py --- a/Marea/ras_generator.py Wed Jan 22 11:50:54 2020 -0500 +++ b/Marea/ras_generator.py Wed Feb 19 10:44:52 2020 -0500 @@ -1,16 +1,10 @@ from __future__ import division import sys import pandas as pd -import itertools as it -import scipy.stats as st import collections -import lxml.etree as ET import pickle as pk import math -import os import argparse -from svglib.svglib import svg2rlg -from reportlab.graphics import renderPDF ########################## argparse ########################################## @@ -26,69 +20,25 @@ parser.add_argument('-cr', '--custom', type = str, help='your dataset if you want custom rules') - parser.add_argument('-na', '--names', - type = str, - nargs = '+', - help = 'input names') parser.add_argument('-n', '--none', type = str, default = 'true', choices = ['true', 'false'], help = 'compute Nan values') - parser.add_argument('-pv' ,'--pValue', - type = float, - default = 0.05, - help = 'P-Value threshold (default: %(default)s)') - parser.add_argument('-fc', '--fChange', - type = float, - default = 1.5, - help = 'Fold-Change threshold (default: %(default)s)') parser.add_argument('-td', '--tool_dir', type = str, required = True, help = 'your tool directory') - parser.add_argument('-op', '--option', - type = str, - choices = ['datasets', 'dataset_class', 'datasets_rasonly'], - help='dataset or dataset and class') parser.add_argument('-ol', '--out_log', help = "Output log") - parser.add_argument('-ids', '--input_datas', - type = str, - nargs = '+', - help = 'input datasets') - parser.add_argument('-id', '--input_data', + parser.add_argument('-id', '--input', type = str, help = 'input dataset') - parser.add_argument('-ic', '--input_class', - type = str, - help = 'sample group specification') - parser.add_argument('-cm', '--custom_map', - type = str, - help = 'custom map') - parser.add_argument('-yn', '--yes_no', - type = str, - choices = ['yes', 'no'], - help = 'if make or not custom map') - parser.add_argument('-gs', '--generate_svg', + parser.add_argument('-ra', '--ras_output', type = str, - default = 'true', - choices = ['true', 'false'], - help = 'generate svg map') - parser.add_argument('-gp', '--generate_pdf', - type = str, - default = 'true', - choices = ['true', 'false'], - help = 'generate pdf map') - parser.add_argument('-gr', '--generate_ras', - type = str, - default = 'true', - choices = ['true', 'false'], - help = 'generate reaction activity score') - parser.add_argument('-sr', '--single_ras_file', - type = str, - help = 'file that will contain ras') - + required = True, + help = 'ras output') + args = parser.parse_args() return args @@ -297,79 +247,6 @@ return False return ris -############################ map_methods ###################################### - -def fold_change(avg1, avg2): - if avg1 == 0 and avg2 == 0: - return 0 - elif avg1 == 0: - return '-INF' - elif avg2 == 0: - return 'INF' - else: - return math.log(avg1 / avg2, 2) - -def fix_style(l, col, width, dash): - tmp = l.split(';') - flag_col = False - flag_width = False - flag_dash = False - for i in range(len(tmp)): - if tmp[i].startswith('stroke:'): - tmp[i] = 'stroke:' + col - flag_col = True - if tmp[i].startswith('stroke-width:'): - tmp[i] = 'stroke-width:' + width - flag_width = True - if tmp[i].startswith('stroke-dasharray:'): - tmp[i] = 'stroke-dasharray:' + dash - flag_dash = True - if not flag_col: - tmp.append('stroke:' + col) - if not flag_width: - tmp.append('stroke-width:' + width) - if not flag_dash: - tmp.append('stroke-dasharray:' + dash) - return ';'.join(tmp) - -def fix_map(d, core_map, threshold_P_V, threshold_F_C, max_F_C): - maxT = 12 - minT = 2 - grey = '#BEBEBE' - blue = '#0000FF' - red = '#E41A1C' - for el in core_map.iter(): - el_id = str(el.get('id')) - if el_id.startswith('R_'): - tmp = d.get(el_id[2:]) - if tmp != None: - p_val = tmp[0] - f_c = tmp[1] - if p_val < threshold_P_V: - if not isinstance(f_c, str): - if abs(f_c) < math.log(threshold_F_C, 2): - col = grey - width = str(minT) - else: - if f_c < 0: - col = blue - elif f_c > 0: - col = red - width = str(max((abs(f_c) * maxT) / max_F_C, minT)) - else: - if f_c == '-INF': - col = blue - elif f_c == 'INF': - col = red - width = str(maxT) - dash = 'none' - else: - dash = '5,5' - col = grey - width = str(minT) - el.set('style', fix_style(el.get('style'), col, width, dash)) - return core_map - ############################ make recon ####################################### def check_and_doWord(l): @@ -615,7 +492,6 @@ def resolve(genes, rules, ids, resolve_none, name): resolve_rules = {} - names_array = [] not_found = [] flag = False for key, value in genes.items(): @@ -652,7 +528,6 @@ for j in range(i, len(classes)): if classes.iloc[j, 1] == classe: pat_id = classes.iloc[j, 0] - tmp = resolve_rules.get(pat_id, None) if tmp != None: l.append(tmp) classes.iloc[j, 1] = None @@ -665,7 +540,7 @@ ############################ create_ras ####################################### -def create_ras (resolve_rules, dataset_name, single_ras, rules, ids): +def create_ras (resolve_rules, dataset_name, rules, ids, file): if resolve_rules == None: warning("Couldn't generate RAS for current dataset: " + dataset_name) @@ -680,78 +555,16 @@ output_ras.insert(0, 'Reactions', ids) output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) - if (single_ras): - args = process_args(sys.argv) - text_file = open(args.single_ras_file, "w") - else: - text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w") + text_file = open(file, "w") text_file.write(output_to_csv) text_file.close() -############################ map ############################################## - -def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf): - args = process_args(sys.argv) - if (not class_pat) or (len(class_pat.keys()) < 2): - sys.exit('Execution aborted: classes provided for comparisons are ' + - 'less than two\n') - for i, j in it.combinations(class_pat.keys(), 2): - tmp = {} - count = 0 - max_F_C = 0 - for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): - try: - stat_D, p_value = st.ks_2samp(l1, l2) - avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) - if not isinstance(avg, str): - if max_F_C < abs(avg): - max_F_C = abs(avg) - tmp[ids[count]] = [float(p_value), avg] - count += 1 - except (TypeError, ZeroDivisionError): - count += 1 - tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' - tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") - tmp_csv = tmp_csv.reset_index() - header = ['ids', 'P_Value', 'Log2(fold change)'] - tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) - - if create_svg or create_pdf: - if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' - and args.yes_no == 'yes'): - fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) - file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' - with open(file_svg, 'wb') as new_map: - new_map.write(ET.tostring(core_map)) - - - if create_pdf: - file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' - renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) - - if not create_svg: - #Ho utilizzato il file svg per generare il pdf, - #ma l'utente non ne ha richiesto il ritorno, quindi - #lo elimino - os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') - - return None - ############################ MAIN ############################################# def main(): args = process_args(sys.argv) - - create_svg = check_bool(args.generate_svg) - create_pdf = check_bool(args.generate_pdf) - generate_ras = check_bool(args.generate_ras) - os.makedirs('result') - - if generate_ras: - os.makedirs('ras') - if args.rules_selector == 'HMRcore': recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) elif args.rules_selector == 'Recon': @@ -761,104 +574,30 @@ resolve_none = check_bool(args.none) - class_pat = {} - if args.option == 'datasets_rasonly': - name = "RAS Dataset" - dataset = read_dataset(args.input_datas[0],"dataset") - - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - - type_gene = gene_type(dataset.iloc[0, 0], name) - - if args.rules_selector != 'Custom': - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(recon.get(type_gene)) - elif args.rules_selector == 'Custom': - genes = data_gene(dataset, type_gene, name, gene_in_rule) - - resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) + name = "RAS Dataset" + dataset = read_dataset(args.input, "dataset") - create_ras(resolve_rules, name, True, rules, ids) - - if err != None and err: - warning('Warning: gene\n' + str(err) + '\nnot found in class ' - + name + ', the expression level for this gene ' + - 'will be considered NaN\n') - - print('execution succeded') - return None - - - elif args.option == 'datasets': - num = 1 - for i, j in zip(args.input_datas, args.names): - - name = name_dataset(j, num) - dataset = read_dataset(i, name) - - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - - type_gene = gene_type(dataset.iloc[0, 0], name) - - if args.rules_selector != 'Custom': - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(recon.get(type_gene)) - elif args.rules_selector == 'Custom': - genes = data_gene(dataset, type_gene, name, gene_in_rule) - - - resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) + dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - if generate_ras: - create_ras(resolve_rules, name, False, rules, ids) - - if err != None and err: - warning('Warning: gene\n' + str(err) + '\nnot found in class ' - + name + ', the expression level for this gene ' + - 'will be considered NaN\n') - if resolve_rules != None: - class_pat[name] = list(map(list, zip(*resolve_rules.values()))) - num += 1 - elif args.option == 'dataset_class': - name = 'RNAseq' - dataset = read_dataset(args.input_data, name) - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - type_gene = gene_type(dataset.iloc[0, 0], name) - classes = read_dataset(args.input_class, 'class') - if not len(classes.columns) == 2: - warning('Warning: more than 2 columns in class file. Extra' + - 'columns have been disregarded\n') - classes = classes.astype(str) - if args.rules_selector != 'Custom': - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(recon.get(type_gene)) - elif args.rules_selector == 'Custom': - genes = data_gene(dataset, type_gene, name, gene_in_rule) - resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) - if err != None and err: - warning('Warning: gene\n'+str(err)+'\nnot found in class ' - + name + ', the expression level for this gene ' + - 'will be considered NaN\n') - if resolve_rules != None: - class_pat = split_class(classes, resolve_rules) - if generate_ras: - create_ras(resolve_rules, name, False, rules, ids) + type_gene = gene_type(dataset.iloc[0, 0], name) + + if args.rules_selector != 'Custom': + genes = data_gene(dataset, type_gene, name, None) + ids, rules = load_id_rules(recon.get(type_gene)) + elif args.rules_selector == 'Custom': + genes = data_gene(dataset, type_gene, name, gene_in_rule) - - if args.rules_selector == 'Custom': - if args.yes_no == 'yes': - try: - core_map = ET.parse(args.custom_map) - except (ET.XMLSyntaxError, ET.XMLSchemaParseError): - sys.exit('Execution aborted: custom map in wrong format') - elif args.yes_no == 'no': - core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg') - else: - core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') - - maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf) - + resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) + + create_ras(resolve_rules, name, rules, ids, args.ras_output) + + if err != None and err: + warning('Warning: gene\n' + str(err) + '\nnot found in class ' + + name + ', the expression level for this gene ' + + 'will be considered NaN\n') + + print('Execution succeded') return None diff -r 5d5d01ef1d68 -r 3af9d394367c Marea/ras_generator.xml --- a/Marea/ras_generator.xml Wed Jan 22 11:50:54 2020 -0500 +++ b/Marea/ras_generator.xml Wed Feb 19 10:44:52 2020 -0500 @@ -1,5 +1,5 @@ - - + + - Reaction Activity Scores computation marea_macros.xml @@ -13,18 +13,15 @@ - @@ -47,124 +44,21 @@ - - - - - - - - - + + + - - - cond['type_selector'] == "datasets_rasonly" - - - cond['type_selector'] == "datasets" or cond['type_selector'] == "dataset_class" - - - - cond['type_selector'] != "datasets_rasonly" and cond['advanced']['choice'] and cond['advanced']['generateRas'] - - - + + - - - - - - +