# HG changeset patch
# User bimib
# Date 1582127092 18000
# Node ID 3af9d394367c9fc83fa17541659510e32864ab12
# Parent 5d5d01ef1d68cf66de0c303bfe9693a51e4be842
Uploaded
diff -r 5d5d01ef1d68 -r 3af9d394367c Marea/marea.py
--- a/Marea/marea.py Wed Jan 22 11:50:54 2020 -0500
+++ b/Marea/marea.py Wed Feb 19 10:44:52 2020 -0500
@@ -26,10 +26,6 @@
parser.add_argument('-cr', '--custom',
type = str,
help='your dataset if you want custom rules')
- parser.add_argument('-na', '--names',
- type = str,
- nargs = '+',
- help = 'input names')
parser.add_argument('-n', '--none',
type = str,
default = 'true',
@@ -37,7 +33,7 @@
help = 'compute Nan values')
parser.add_argument('-pv' ,'--pValue',
type = float,
- default = 0.05,
+ default = 0.1,
help = 'P-Value threshold (default: %(default)s)')
parser.add_argument('-fc', '--fChange',
type = float,
@@ -49,14 +45,10 @@
help = 'your tool directory')
parser.add_argument('-op', '--option',
type = str,
- choices = ['datasets', 'dataset_class', 'datasets_rasonly'],
+ choices = ['datasets', 'dataset_class'],
help='dataset or dataset and class')
parser.add_argument('-ol', '--out_log',
help = "Output log")
- parser.add_argument('-ids', '--input_datas',
- type = str,
- nargs = '+',
- help = 'input datasets')
parser.add_argument('-id', '--input_data',
type = str,
help = 'input dataset')
@@ -80,15 +72,21 @@
default = 'true',
choices = ['true', 'false'],
help = 'generate pdf map')
- parser.add_argument('-gr', '--generate_ras',
+ parser.add_argument('-on', '--control',
+ type = str)
+ parser.add_argument('-co', '--comparison',
+ type = str,
+ default = '1vs1',
+ choices = ['manyvsmany', 'onevsrest', 'onevsmany'])
+ parser.add_argument('-ids', '--input_datas',
type = str,
- default = 'true',
- choices = ['true', 'false'],
- help = 'generate reaction activity score')
- parser.add_argument('-sr', '--single_ras_file',
- type = str,
- help = 'file that will contain ras')
-
+ nargs = '+',
+ help = 'input datasets')
+ parser.add_argument('-na', '--names',
+ type = str,
+ nargs = '+',
+ help = 'input names')
+
args = parser.parse_args()
return args
@@ -615,7 +613,6 @@
def resolve(genes, rules, ids, resolve_none, name):
resolve_rules = {}
- names_array = []
not_found = []
flag = False
for key, value in genes.items():
@@ -663,79 +660,157 @@
', the class has been disregarded\n')
return class_pat
-############################ create_ras #######################################
-
-def create_ras (resolve_rules, dataset_name, single_ras, rules, ids):
-
- if resolve_rules == None:
- warning("Couldn't generate RAS for current dataset: " + dataset_name)
-
- for geni in resolve_rules.values():
- for i, valori in enumerate(geni):
- if valori == None:
- geni[i] = 'None'
-
- output_ras = pd.DataFrame.from_dict(resolve_rules)
-
- output_ras.insert(0, 'Reactions', ids)
- output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False)
-
- if (single_ras):
- args = process_args(sys.argv)
- text_file = open(args.single_ras_file, "w")
- else:
- text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w")
-
- text_file.write(output_to_csv)
- text_file.close()
-
############################ map ##############################################
-def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf):
+def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf, comparison, control):
args = process_args(sys.argv)
if (not class_pat) or (len(class_pat.keys()) < 2):
sys.exit('Execution aborted: classes provided for comparisons are ' +
'less than two\n')
- for i, j in it.combinations(class_pat.keys(), 2):
- tmp = {}
- count = 0
- max_F_C = 0
- for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
- try:
- stat_D, p_value = st.ks_2samp(l1, l2)
- avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
- if not isinstance(avg, str):
- if max_F_C < abs(avg):
- max_F_C = abs(avg)
- tmp[ids[count]] = [float(p_value), avg]
- count += 1
- except (TypeError, ZeroDivisionError):
- count += 1
- tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
- tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
- tmp_csv = tmp_csv.reset_index()
- header = ['ids', 'P_Value', 'Log2(fold change)']
- tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
+
+ if comparison == "manyvsmany":
+ for i, j in it.combinations(class_pat.keys(), 2):
+
+ tmp = {}
+ count = 0
+ max_F_C = 0
+ for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
+ try:
+ stat_D, p_value = st.ks_2samp(l1, l2)
+ #sum(l1) da errore secondo me perchè ha null
+ avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
+ if not isinstance(avg, str):
+ if max_F_C < abs(avg):
+ max_F_C = abs(avg)
+ tmp[ids[count]] = [float(p_value), avg]
+ count += 1
+ except (TypeError, ZeroDivisionError):
+ count += 1
+ tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
+ tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
+ tmp_csv = tmp_csv.reset_index()
+ header = ['ids', 'P_Value', 'Log2(fold change)']
+ tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
+
+ if create_svg or create_pdf:
+ if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
+ and args.yes_no == 'yes'):
+ fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
+ file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
+ with open(file_svg, 'wb') as new_map:
+ new_map.write(ET.tostring(core_map))
+
+
+ if create_pdf:
+ file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
+ renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
+
+ if not create_svg:
+ #Ho utilizzato il file svg per generare il pdf,
+ #ma l'utente non ne ha richiesto il ritorno, quindi
+ #lo elimino
+
+ os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
+ elif comparison == "onevsrest":
+ for single_cluster in class_pat.keys():
+ t = []
+ for k in class_pat.keys():
+ if k != single_cluster:
+ t.append(class_pat.get(k))
+ rest = []
+ for i in t:
+ rest = rest + i
+
+ tmp = {}
+ count = 0
+ max_F_C = 0
+
+ for l1, l2 in zip(rest, class_pat.get(single_cluster)):
+ try:
+ stat_D, p_value = st.ks_2samp(l1, l2)
+ avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
+ if not isinstance(avg, str):
+ if max_F_C < abs(avg):
+ max_F_C = abs(avg)
+ tmp[ids[count]] = [float(p_value), avg]
+ count += 1
+ except (TypeError, ZeroDivisionError):
+ count += 1
+ tab = 'result/' + single_cluster + '_vs_rest (Tabular Result).tsv'
+ tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
+ tmp_csv = tmp_csv.reset_index()
+ header = ['ids', 'P_Value', 'Log2(fold change)']
+ tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
+
+ if create_svg or create_pdf:
+ if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
+ and args.yes_no == 'yes'):
+ fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
+ file_svg = 'result/' + single_cluster + '_vs_ rest (SVG Map).svg'
+ with open(file_svg, 'wb') as new_map:
+ new_map.write(ET.tostring(core_map))
+
+
+ if create_pdf:
+ file_pdf = 'result/' + single_cluster + '_vs_ rest (PDF Map).pdf'
+ renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
+
+ if not create_svg:
+ os.remove('result/' + single_cluster + '_vs_ rest (SVG Map).svg')
+
+ elif comparison == "onevsmany":
+ for i, j in it.combinations(class_pat.keys(), 2):
+
+ if i != control and j != control:
+ print(str(control) + " " + str(i) + " " + str(j))
+ #Se è un confronto fra elementi diversi dal controllo, skippo
+ continue
+
+ print('vado')
+ tmp = {}
+ count = 0
+ max_F_C = 0
+ for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
+ try:
+ stat_D, p_value = st.ks_2samp(l1, l2)
+ #sum(l1) da errore secondo me perchè ha null
+ avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
+ if not isinstance(avg, str):
+ if max_F_C < abs(avg):
+ max_F_C = abs(avg)
+ tmp[ids[count]] = [float(p_value), avg]
+ count += 1
+ except (TypeError, ZeroDivisionError):
+ count += 1
+ tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
+ tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
+ tmp_csv = tmp_csv.reset_index()
+ header = ['ids', 'P_Value', 'Log2(fold change)']
+ tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
+
+ if create_svg or create_pdf:
+ if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
+ and args.yes_no == 'yes'):
+ fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
+ file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
+ with open(file_svg, 'wb') as new_map:
+ new_map.write(ET.tostring(core_map))
+
+
+ if create_pdf:
+ file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
+ renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
+
+ if not create_svg:
+ #Ho utilizzato il file svg per generare il pdf,
+ #ma l'utente non ne ha richiesto il ritorno, quindi
+ #lo elimino
+
+ os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
- if create_svg or create_pdf:
- if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
- and args.yes_no == 'yes'):
- fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
- file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
- with open(file_svg, 'wb') as new_map:
- new_map.write(ET.tostring(core_map))
-
-
- if create_pdf:
- file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
- renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
-
- if not create_svg:
- #Ho utilizzato il file svg per generare il pdf,
- #ma l'utente non ne ha richiesto il ritorno, quindi
- #lo elimino
- os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
-
+
+
+
return None
############################ MAIN #############################################
@@ -745,12 +820,9 @@
create_svg = check_bool(args.generate_svg)
create_pdf = check_bool(args.generate_pdf)
- generate_ras = check_bool(args.generate_ras)
- os.makedirs('result')
-
- if generate_ras:
- os.makedirs('ras')
+ if os.path.isdir('result') == False:
+ os.makedirs('result')
if args.rules_selector == 'HMRcore':
recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))
@@ -758,93 +830,60 @@
recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb'))
elif args.rules_selector == 'Custom':
ids, rules, gene_in_rule = make_recon(args.custom)
-
- resolve_none = check_bool(args.none)
-
+
class_pat = {}
- if args.option == 'datasets_rasonly':
- name = "RAS Dataset"
- dataset = read_dataset(args.input_datas[0],"dataset")
-
- dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
-
- type_gene = gene_type(dataset.iloc[0, 0], name)
-
- if args.rules_selector != 'Custom':
- genes = data_gene(dataset, type_gene, name, None)
- ids, rules = load_id_rules(recon.get(type_gene))
- elif args.rules_selector == 'Custom':
- genes = data_gene(dataset, type_gene, name, gene_in_rule)
-
- resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
-
- create_ras(resolve_rules, name, True, rules, ids)
-
- if err != None and err:
- warning('Warning: gene\n' + str(err) + '\nnot found in class '
- + name + ', the expression level for this gene ' +
- 'will be considered NaN\n')
-
- print('execution succeded')
- return None
-
-
- elif args.option == 'datasets':
+ if args.option == 'datasets':
num = 1
for i, j in zip(args.input_datas, args.names):
-
name = name_dataset(j, num)
- dataset = read_dataset(i, name)
-
- dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
-
- type_gene = gene_type(dataset.iloc[0, 0], name)
+ resolve_rules = read_dataset(i, name)
+
+ resolve_rules.iloc[:, 0] = (resolve_rules.iloc[:, 0]).astype(str)
- if args.rules_selector != 'Custom':
- genes = data_gene(dataset, type_gene, name, None)
- ids, rules = load_id_rules(recon.get(type_gene))
- elif args.rules_selector == 'Custom':
- genes = data_gene(dataset, type_gene, name, gene_in_rule)
-
-
- resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
+ ids = pd.Series.tolist(resolve_rules.iloc[:, 0])
- if generate_ras:
- create_ras(resolve_rules, name, False, rules, ids)
+ resolve_rules = resolve_rules.drop(resolve_rules.columns[[0]], axis=1)
+ resolve_rules = resolve_rules.replace({'None': None})
+ resolve_rules = resolve_rules.to_dict('list')
- if err != None and err:
- warning('Warning: gene\n' + str(err) + '\nnot found in class '
- + name + ', the expression level for this gene ' +
- 'will be considered NaN\n')
+ #Converto i valori da str a float
+ to_float = lambda x: float(x) if (x != None) else None
+
+ resolve_rules_float = {}
+
+ for k in resolve_rules:
+ resolve_rules_float[k] = list(map(to_float, resolve_rules[k])); resolve_rules_float
+
if resolve_rules != None:
- class_pat[name] = list(map(list, zip(*resolve_rules.values())))
+ class_pat[name] = list(map(list, zip(*resolve_rules_float.values())))
+
num += 1
- elif args.option == 'dataset_class':
- name = 'RNAseq'
- dataset = read_dataset(args.input_data, name)
- dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
- type_gene = gene_type(dataset.iloc[0, 0], name)
+
+ if args.option == 'dataset_class':
+ name = 'RAS'
+ resolve_rules = read_dataset(args.input_data, name)
+ resolve_rules.iloc[:, 0] = (resolve_rules.iloc[:, 0]).astype(str)
+
+ ids = pd.Series.tolist(resolve_rules.iloc[:, 0])
+
+ resolve_rules = resolve_rules.drop(resolve_rules.columns[[0]], axis=1)
+ resolve_rules = resolve_rules.replace({'None': None})
+ resolve_rules = resolve_rules.to_dict('list')
+
+ #Converto i valori da str a float
+ to_float = lambda x: float(x) if (x != None) else None
+
+ resolve_rules_float = {}
+
+ for k in resolve_rules:
+ resolve_rules_float[k] = list(map(to_float, resolve_rules[k])); resolve_rules_float
+
classes = read_dataset(args.input_class, 'class')
- if not len(classes.columns) == 2:
- warning('Warning: more than 2 columns in class file. Extra' +
- 'columns have been disregarded\n')
classes = classes.astype(str)
- if args.rules_selector != 'Custom':
- genes = data_gene(dataset, type_gene, name, None)
- ids, rules = load_id_rules(recon.get(type_gene))
- elif args.rules_selector == 'Custom':
- genes = data_gene(dataset, type_gene, name, gene_in_rule)
- resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
- if err != None and err:
- warning('Warning: gene\n'+str(err)+'\nnot found in class '
- + name + ', the expression level for this gene ' +
- 'will be considered NaN\n')
- if resolve_rules != None:
- class_pat = split_class(classes, resolve_rules)
- if generate_ras:
- create_ras(resolve_rules, name, False, rules, ids)
-
+
+ if resolve_rules_float != None:
+ class_pat = split_class(classes, resolve_rules_float)
if args.rules_selector == 'Custom':
if args.yes_no == 'yes':
@@ -857,11 +896,12 @@
else:
core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg')
- maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf)
+ maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf, args.comparison, args.control)
print('Execution succeded')
+
+ return None
- return None
###############################################################################
diff -r 5d5d01ef1d68 -r 3af9d394367c Marea/marea.xml
--- a/Marea/marea.xml Wed Jan 22 11:50:54 2020 -0500
+++ b/Marea/marea.xml Wed Feb 19 10:44:52 2020 -0500
@@ -1,28 +1,19 @@
-
-
-
- marea_macros.xml
-
-
- pandas
- scipy
- cobra
- lxml
- svglib
- reportlab
-
-
-
+
+ marea_macros.xml
+
+
+ pandas
+ scipy
+ cobra
+ lxml
+ svglib
+ reportlab
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
-
-
+
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
+
+
+
+
-
-
-
-
+
+
+
+
+
-
-
-
-
-
-
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
- cond['type_selector'] == "datasets_rasonly"
-
-
- cond['type_selector'] == "datasets" or cond['type_selector'] == "dataset_class"
-
-
-
- cond['type_selector'] != "datasets_rasonly" and cond['advanced']['choice'] and cond['advanced']['generateRas']
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
+
+
-
-
-
+
+undefined
diff -r 5d5d01ef1d68 -r 3af9d394367c Marea/ras_generator.py
--- a/Marea/ras_generator.py Wed Jan 22 11:50:54 2020 -0500
+++ b/Marea/ras_generator.py Wed Feb 19 10:44:52 2020 -0500
@@ -1,16 +1,10 @@
from __future__ import division
import sys
import pandas as pd
-import itertools as it
-import scipy.stats as st
import collections
-import lxml.etree as ET
import pickle as pk
import math
-import os
import argparse
-from svglib.svglib import svg2rlg
-from reportlab.graphics import renderPDF
########################## argparse ##########################################
@@ -26,69 +20,25 @@
parser.add_argument('-cr', '--custom',
type = str,
help='your dataset if you want custom rules')
- parser.add_argument('-na', '--names',
- type = str,
- nargs = '+',
- help = 'input names')
parser.add_argument('-n', '--none',
type = str,
default = 'true',
choices = ['true', 'false'],
help = 'compute Nan values')
- parser.add_argument('-pv' ,'--pValue',
- type = float,
- default = 0.05,
- help = 'P-Value threshold (default: %(default)s)')
- parser.add_argument('-fc', '--fChange',
- type = float,
- default = 1.5,
- help = 'Fold-Change threshold (default: %(default)s)')
parser.add_argument('-td', '--tool_dir',
type = str,
required = True,
help = 'your tool directory')
- parser.add_argument('-op', '--option',
- type = str,
- choices = ['datasets', 'dataset_class', 'datasets_rasonly'],
- help='dataset or dataset and class')
parser.add_argument('-ol', '--out_log',
help = "Output log")
- parser.add_argument('-ids', '--input_datas',
- type = str,
- nargs = '+',
- help = 'input datasets')
- parser.add_argument('-id', '--input_data',
+ parser.add_argument('-id', '--input',
type = str,
help = 'input dataset')
- parser.add_argument('-ic', '--input_class',
- type = str,
- help = 'sample group specification')
- parser.add_argument('-cm', '--custom_map',
- type = str,
- help = 'custom map')
- parser.add_argument('-yn', '--yes_no',
- type = str,
- choices = ['yes', 'no'],
- help = 'if make or not custom map')
- parser.add_argument('-gs', '--generate_svg',
+ parser.add_argument('-ra', '--ras_output',
type = str,
- default = 'true',
- choices = ['true', 'false'],
- help = 'generate svg map')
- parser.add_argument('-gp', '--generate_pdf',
- type = str,
- default = 'true',
- choices = ['true', 'false'],
- help = 'generate pdf map')
- parser.add_argument('-gr', '--generate_ras',
- type = str,
- default = 'true',
- choices = ['true', 'false'],
- help = 'generate reaction activity score')
- parser.add_argument('-sr', '--single_ras_file',
- type = str,
- help = 'file that will contain ras')
-
+ required = True,
+ help = 'ras output')
+
args = parser.parse_args()
return args
@@ -297,79 +247,6 @@
return False
return ris
-############################ map_methods ######################################
-
-def fold_change(avg1, avg2):
- if avg1 == 0 and avg2 == 0:
- return 0
- elif avg1 == 0:
- return '-INF'
- elif avg2 == 0:
- return 'INF'
- else:
- return math.log(avg1 / avg2, 2)
-
-def fix_style(l, col, width, dash):
- tmp = l.split(';')
- flag_col = False
- flag_width = False
- flag_dash = False
- for i in range(len(tmp)):
- if tmp[i].startswith('stroke:'):
- tmp[i] = 'stroke:' + col
- flag_col = True
- if tmp[i].startswith('stroke-width:'):
- tmp[i] = 'stroke-width:' + width
- flag_width = True
- if tmp[i].startswith('stroke-dasharray:'):
- tmp[i] = 'stroke-dasharray:' + dash
- flag_dash = True
- if not flag_col:
- tmp.append('stroke:' + col)
- if not flag_width:
- tmp.append('stroke-width:' + width)
- if not flag_dash:
- tmp.append('stroke-dasharray:' + dash)
- return ';'.join(tmp)
-
-def fix_map(d, core_map, threshold_P_V, threshold_F_C, max_F_C):
- maxT = 12
- minT = 2
- grey = '#BEBEBE'
- blue = '#0000FF'
- red = '#E41A1C'
- for el in core_map.iter():
- el_id = str(el.get('id'))
- if el_id.startswith('R_'):
- tmp = d.get(el_id[2:])
- if tmp != None:
- p_val = tmp[0]
- f_c = tmp[1]
- if p_val < threshold_P_V:
- if not isinstance(f_c, str):
- if abs(f_c) < math.log(threshold_F_C, 2):
- col = grey
- width = str(minT)
- else:
- if f_c < 0:
- col = blue
- elif f_c > 0:
- col = red
- width = str(max((abs(f_c) * maxT) / max_F_C, minT))
- else:
- if f_c == '-INF':
- col = blue
- elif f_c == 'INF':
- col = red
- width = str(maxT)
- dash = 'none'
- else:
- dash = '5,5'
- col = grey
- width = str(minT)
- el.set('style', fix_style(el.get('style'), col, width, dash))
- return core_map
-
############################ make recon #######################################
def check_and_doWord(l):
@@ -615,7 +492,6 @@
def resolve(genes, rules, ids, resolve_none, name):
resolve_rules = {}
- names_array = []
not_found = []
flag = False
for key, value in genes.items():
@@ -652,7 +528,6 @@
for j in range(i, len(classes)):
if classes.iloc[j, 1] == classe:
pat_id = classes.iloc[j, 0]
- tmp = resolve_rules.get(pat_id, None)
if tmp != None:
l.append(tmp)
classes.iloc[j, 1] = None
@@ -665,7 +540,7 @@
############################ create_ras #######################################
-def create_ras (resolve_rules, dataset_name, single_ras, rules, ids):
+def create_ras (resolve_rules, dataset_name, rules, ids, file):
if resolve_rules == None:
warning("Couldn't generate RAS for current dataset: " + dataset_name)
@@ -680,78 +555,16 @@
output_ras.insert(0, 'Reactions', ids)
output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False)
- if (single_ras):
- args = process_args(sys.argv)
- text_file = open(args.single_ras_file, "w")
- else:
- text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w")
+ text_file = open(file, "w")
text_file.write(output_to_csv)
text_file.close()
-############################ map ##############################################
-
-def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf):
- args = process_args(sys.argv)
- if (not class_pat) or (len(class_pat.keys()) < 2):
- sys.exit('Execution aborted: classes provided for comparisons are ' +
- 'less than two\n')
- for i, j in it.combinations(class_pat.keys(), 2):
- tmp = {}
- count = 0
- max_F_C = 0
- for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
- try:
- stat_D, p_value = st.ks_2samp(l1, l2)
- avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
- if not isinstance(avg, str):
- if max_F_C < abs(avg):
- max_F_C = abs(avg)
- tmp[ids[count]] = [float(p_value), avg]
- count += 1
- except (TypeError, ZeroDivisionError):
- count += 1
- tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
- tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
- tmp_csv = tmp_csv.reset_index()
- header = ['ids', 'P_Value', 'Log2(fold change)']
- tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
-
- if create_svg or create_pdf:
- if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
- and args.yes_no == 'yes'):
- fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
- file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
- with open(file_svg, 'wb') as new_map:
- new_map.write(ET.tostring(core_map))
-
-
- if create_pdf:
- file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
- renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
-
- if not create_svg:
- #Ho utilizzato il file svg per generare il pdf,
- #ma l'utente non ne ha richiesto il ritorno, quindi
- #lo elimino
- os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
-
- return None
-
############################ MAIN #############################################
def main():
args = process_args(sys.argv)
-
- create_svg = check_bool(args.generate_svg)
- create_pdf = check_bool(args.generate_pdf)
- generate_ras = check_bool(args.generate_ras)
- os.makedirs('result')
-
- if generate_ras:
- os.makedirs('ras')
-
if args.rules_selector == 'HMRcore':
recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))
elif args.rules_selector == 'Recon':
@@ -761,104 +574,30 @@
resolve_none = check_bool(args.none)
- class_pat = {}
- if args.option == 'datasets_rasonly':
- name = "RAS Dataset"
- dataset = read_dataset(args.input_datas[0],"dataset")
-
- dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
-
- type_gene = gene_type(dataset.iloc[0, 0], name)
-
- if args.rules_selector != 'Custom':
- genes = data_gene(dataset, type_gene, name, None)
- ids, rules = load_id_rules(recon.get(type_gene))
- elif args.rules_selector == 'Custom':
- genes = data_gene(dataset, type_gene, name, gene_in_rule)
-
- resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
+ name = "RAS Dataset"
+ dataset = read_dataset(args.input, "dataset")
- create_ras(resolve_rules, name, True, rules, ids)
-
- if err != None and err:
- warning('Warning: gene\n' + str(err) + '\nnot found in class '
- + name + ', the expression level for this gene ' +
- 'will be considered NaN\n')
-
- print('execution succeded')
- return None
-
-
- elif args.option == 'datasets':
- num = 1
- for i, j in zip(args.input_datas, args.names):
-
- name = name_dataset(j, num)
- dataset = read_dataset(i, name)
-
- dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
-
- type_gene = gene_type(dataset.iloc[0, 0], name)
-
- if args.rules_selector != 'Custom':
- genes = data_gene(dataset, type_gene, name, None)
- ids, rules = load_id_rules(recon.get(type_gene))
- elif args.rules_selector == 'Custom':
- genes = data_gene(dataset, type_gene, name, gene_in_rule)
-
-
- resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
+ dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
- if generate_ras:
- create_ras(resolve_rules, name, False, rules, ids)
-
- if err != None and err:
- warning('Warning: gene\n' + str(err) + '\nnot found in class '
- + name + ', the expression level for this gene ' +
- 'will be considered NaN\n')
- if resolve_rules != None:
- class_pat[name] = list(map(list, zip(*resolve_rules.values())))
- num += 1
- elif args.option == 'dataset_class':
- name = 'RNAseq'
- dataset = read_dataset(args.input_data, name)
- dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
- type_gene = gene_type(dataset.iloc[0, 0], name)
- classes = read_dataset(args.input_class, 'class')
- if not len(classes.columns) == 2:
- warning('Warning: more than 2 columns in class file. Extra' +
- 'columns have been disregarded\n')
- classes = classes.astype(str)
- if args.rules_selector != 'Custom':
- genes = data_gene(dataset, type_gene, name, None)
- ids, rules = load_id_rules(recon.get(type_gene))
- elif args.rules_selector == 'Custom':
- genes = data_gene(dataset, type_gene, name, gene_in_rule)
- resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
- if err != None and err:
- warning('Warning: gene\n'+str(err)+'\nnot found in class '
- + name + ', the expression level for this gene ' +
- 'will be considered NaN\n')
- if resolve_rules != None:
- class_pat = split_class(classes, resolve_rules)
- if generate_ras:
- create_ras(resolve_rules, name, False, rules, ids)
+ type_gene = gene_type(dataset.iloc[0, 0], name)
+
+ if args.rules_selector != 'Custom':
+ genes = data_gene(dataset, type_gene, name, None)
+ ids, rules = load_id_rules(recon.get(type_gene))
+ elif args.rules_selector == 'Custom':
+ genes = data_gene(dataset, type_gene, name, gene_in_rule)
-
- if args.rules_selector == 'Custom':
- if args.yes_no == 'yes':
- try:
- core_map = ET.parse(args.custom_map)
- except (ET.XMLSyntaxError, ET.XMLSchemaParseError):
- sys.exit('Execution aborted: custom map in wrong format')
- elif args.yes_no == 'no':
- core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg')
- else:
- core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg')
-
- maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf)
-
+ resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
+
+ create_ras(resolve_rules, name, rules, ids, args.ras_output)
+
+ if err != None and err:
+ warning('Warning: gene\n' + str(err) + '\nnot found in class '
+ + name + ', the expression level for this gene ' +
+ 'will be considered NaN\n')
+
+
print('Execution succeded')
return None
diff -r 5d5d01ef1d68 -r 3af9d394367c Marea/ras_generator.xml
--- a/Marea/ras_generator.xml Wed Jan 22 11:50:54 2020 -0500
+++ b/Marea/ras_generator.xml Wed Feb 19 10:44:52 2020 -0500
@@ -1,5 +1,5 @@
-
-
+
+ - Reaction Activity Scores computation
marea_macros.xml
@@ -13,18 +13,15 @@
-
@@ -47,124 +44,21 @@
-
-
-
-
-
-
-
-
-
+
+
+
-
-
- cond['type_selector'] == "datasets_rasonly"
-
-
- cond['type_selector'] == "datasets" or cond['type_selector'] == "dataset_class"
-
-
-
- cond['type_selector'] != "datasets_rasonly" and cond['advanced']['choice'] and cond['advanced']['generateRas']
-
-
-
+
+
-
-
-
-
-
-
+