Mercurial > repos > bimib > marea
diff Marea/marea.py @ 16:c71ac0bb12de draft
Uploaded
author | bimib |
---|---|
date | Tue, 01 Oct 2019 06:05:13 -0400 |
parents | e96f3b85e5a0 |
children | e6831924df01 |
line wrap: on
line diff
--- a/Marea/marea.py Tue Oct 01 06:03:12 2019 -0400 +++ b/Marea/marea.py Tue Oct 01 06:05:13 2019 -0400 @@ -1,4 +1,3 @@ - from __future__ import division import sys import pandas as pd @@ -6,6 +5,7 @@ import scipy.stats as st import collections import lxml.etree as ET +import shutil import pickle as pk import math import os @@ -13,7 +13,7 @@ from svglib.svglib import svg2rlg from reportlab.graphics import renderPDF -########################## argparse ########################################### +########################## argparse ########################################## def process_args(args): parser = argparse.ArgumentParser(usage = '%(prog)s [options]', @@ -71,6 +71,21 @@ type = str, choices = ['yes', 'no'], help = 'if make or not custom map') + parser.add_argument('-gs', '--generate_svg', + type = str, + default = 'true', + choices = ['true', 'false'], + help = 'generate svg map') + parser.add_argument('-gp', '--generate_pdf', + type = str, + default = 'true', + choices = ['true', 'false'], + help = 'generate pdf map') + parser.add_argument('-gr', '--generate_ras', + type = str, + default = 'true', + choices = ['true', 'false'], + help = 'generate reaction activity score') args = parser.parse_args() return args @@ -85,7 +100,7 @@ def read_dataset(data, name): try: - dataset = pd.read_csv(data, sep = '\t', header = 0) + dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python') except pd.errors.EmptyDataError: sys.exit('Execution aborted: wrong format of ' + name + '\n') if len(dataset.columns) < 2: @@ -536,7 +551,7 @@ ids = [react[i].id for i in range(len(react))] except cb.io.sbml3.CobraSBMLError: try: - data = (pd.read_csv(data, sep = '\t', dtype = str)).fillna('') + data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('') if len(data.columns) < 2: sys.exit('Execution aborted: wrong format of '+ 'custom datarules\n') @@ -641,9 +656,28 @@ ', the class has been disregarded\n') return class_pat +############################ create_ras ####################################### + +def create_ras (resolve_rules, dataset_name): + + if resolve_rules == None: + warning("Couldn't generate RAS for current dataset: " + dataset_name) + + for geni in resolve_rules.values(): + for i, valori in enumerate(geni): + if valori == None: + geni[i] = 'None' + + output_ras = pd.DataFrame.from_dict(resolve_rules) + output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) + + text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w") + text_file.write(output_to_csv) + text_file.close() + ############################ map ############################################## -def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C): +def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf): args = process_args(sys.argv) if (not class_pat) or (len(class_pat.keys()) < 2): sys.exit('Execution aborted: classes provided for comparisons are ' + @@ -663,52 +697,81 @@ count += 1 except (TypeError, ZeroDivisionError): count += 1 - tab = 'table_out/' + i + '_vs_' + j + '.tsv' + tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") tmp_csv = tmp_csv.reset_index() header = ['ids', 'P_Value', 'Average'] tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) - if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' - and args.yes_no == 'yes'): - fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) - file_svg = 'map_svg/' + i + '_vs_' + j + '.svg' - with open(file_svg, 'wb') as new_map: - new_map.write(ET.tostring(core_map, encoding='UTF-8', - method='xml')) - file_pdf = 'map_pdf/' + i + '_vs_' + j + '.pdf' - renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) + + if create_svg or create_pdf: + if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' + and args.yes_no == 'yes'): + fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) + file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' + with open(file_svg, 'wb') as new_map: + new_map.write(ET.tostring(core_map)) + + + if create_pdf: + file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' + renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) + + if not create_svg: + #Ho utilizzato il file svg per generare il pdf, + #ma l'utente non ne ha richiesto il ritorno, quindi + #lo elimino + os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') + return None ############################ MAIN ############################################# def main(): args = process_args(sys.argv) - os.makedirs('table_out') - if args.rules_selector == 'HMRcore': - os.makedirs('map_svg') - os.makedirs('map_pdf') + + create_svg = check_bool(args.generate_svg) + create_pdf = check_bool(args.generate_pdf) + generate_ras = check_bool(args.generate_ras) + + os.makedirs('result') + + if generate_ras: + os.makedirs('ras') + + if args.rules_selector == 'HMRcore': recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) elif args.rules_selector == 'Recon': recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) elif args.rules_selector == 'Custom': ids, rules, gene_in_rule = make_recon(args.custom) + resolve_none = check_bool(args.none) + class_pat = {} + if args.option == 'datasets': num = 1 - #if len(args.names) != len(set(args.names)): - # sys.exit('Execution aborted: datasets name duplicated') for i, j in zip(args.input_datas, args.names): + name = name_dataset(j, num) dataset = read_dataset(i, name) + dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - type_gene = gene_type(dataset.iloc[0, 0], name) + + type_gene = gene_type(dataset.iloc[0, 0], name) + if args.rules_selector != 'Custom': genes = data_gene(dataset, type_gene, name, None) ids, rules = load_id_rules(recon.get(type_gene)) elif args.rules_selector == 'Custom': genes = data_gene(dataset, type_gene, name, gene_in_rule) + resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) + + if generate_ras: + create_ras(resolve_rules, name) + + if err != None and err: warning('Warning: gene\n' + str(err) + '\nnot found in class ' + name + ', the expression level for this gene ' + @@ -738,10 +801,9 @@ 'will be considered NaN\n') if resolve_rules != None: class_pat = split_class(classes, resolve_rules) + if args.rules_selector == 'Custom': if args.yes_no == 'yes': - os.makedirs('map_svg') - os.makedirs('map_pdf') try: core_map = ET.parse(args.custom_map) except (ET.XMLSyntaxError, ET.XMLSchemaParseError): @@ -750,8 +812,11 @@ core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg') else: core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') - maps(core_map, class_pat, ids, args.pValue, args.fChange) - warning('Execution succeeded') + + maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf) + + print('Execution succeded') + return None ###############################################################################