comparison Desktop/Marea/marea.py @ 30:e88efefbd015 draft

fix changes
author bimib
date Tue, 15 Oct 2019 12:21:16 -0400
parents
children
comparison
equal deleted inserted replaced
29:9fcb0e8d6d47 30:e88efefbd015
1 from __future__ import division
2 import sys
3 import pandas as pd
4 import itertools as it
5 import scipy.stats as st
6 import collections
7 import lxml.etree as ET
8 import shutil
9 import pickle as pk
10 import math
11 import os
12 import argparse
13 from svglib.svglib import svg2rlg
14 from reportlab.graphics import renderPDF
15
16 ########################## argparse ##########################################
17
18 def process_args(args):
19 parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
20 description = 'process some value\'s'+
21 ' genes to create a comparison\'s map.')
22 parser.add_argument('-rs', '--rules_selector',
23 type = str,
24 default = 'HMRcore',
25 choices = ['HMRcore', 'Recon', 'Custom'],
26 help = 'chose which type of dataset you want use')
27 parser.add_argument('-cr', '--custom',
28 type = str,
29 help='your dataset if you want custom rules')
30 parser.add_argument('-na', '--names',
31 type = str,
32 nargs = '+',
33 help = 'input names')
34 parser.add_argument('-n', '--none',
35 type = str,
36 default = 'true',
37 choices = ['true', 'false'],
38 help = 'compute Nan values')
39 parser.add_argument('-pv' ,'--pValue',
40 type = float,
41 default = 0.05,
42 help = 'P-Value threshold (default: %(default)s)')
43 parser.add_argument('-fc', '--fChange',
44 type = float,
45 default = 1.5,
46 help = 'Fold-Change threshold (default: %(default)s)')
47 parser.add_argument('-td', '--tool_dir',
48 type = str,
49 required = True,
50 help = 'your tool directory')
51 parser.add_argument('-op', '--option',
52 type = str,
53 choices = ['datasets', 'dataset_class', 'datasets_rasonly'],
54 help='dataset or dataset and class')
55 parser.add_argument('-ol', '--out_log',
56 help = "Output log")
57 parser.add_argument('-ids', '--input_datas',
58 type = str,
59 nargs = '+',
60 help = 'input datasets')
61 parser.add_argument('-id', '--input_data',
62 type = str,
63 help = 'input dataset')
64 parser.add_argument('-ic', '--input_class',
65 type = str,
66 help = 'sample group specification')
67 parser.add_argument('-cm', '--custom_map',
68 type = str,
69 help = 'custom map')
70 parser.add_argument('-yn', '--yes_no',
71 type = str,
72 choices = ['yes', 'no'],
73 help = 'if make or not custom map')
74 parser.add_argument('-gs', '--generate_svg',
75 type = str,
76 default = 'true',
77 choices = ['true', 'false'],
78 help = 'generate svg map')
79 parser.add_argument('-gp', '--generate_pdf',
80 type = str,
81 default = 'true',
82 choices = ['true', 'false'],
83 help = 'generate pdf map')
84 parser.add_argument('-gr', '--generate_ras',
85 type = str,
86 default = 'true',
87 choices = ['true', 'false'],
88 help = 'generate reaction activity score')
89 parser.add_argument('-sr', '--single_ras_file',
90 type = str,
91 help = 'file that will contain ras')
92
93 args = parser.parse_args()
94 return args
95
96 ########################### warning ###########################################
97
98 def warning(s):
99 args = process_args(sys.argv)
100 with open(args.out_log, 'a') as log:
101 log.write(s)
102
103 ############################ dataset input ####################################
104
105 def read_dataset(data, name):
106 try:
107 dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
108 except pd.errors.EmptyDataError:
109 sys.exit('Execution aborted: wrong format of ' + name + '\n')
110 if len(dataset.columns) < 2:
111 sys.exit('Execution aborted: wrong format of ' + name + '\n')
112 return dataset
113
114 ############################ dataset name #####################################
115
116 def name_dataset(name_data, count):
117 if str(name_data) == 'Dataset':
118 return str(name_data) + '_' + str(count)
119 else:
120 return str(name_data)
121
122 ############################ load id e rules ##################################
123
124 def load_id_rules(reactions):
125 ids, rules = [], []
126 for key, value in reactions.items():
127 ids.append(key)
128 rules.append(value)
129 return (ids, rules)
130
131 ############################ check_methods ####################################
132
133 def gene_type(l, name):
134 if check_hgnc(l):
135 return 'hugo_id'
136 elif check_ensembl(l):
137 return 'ensembl_gene_id'
138 elif check_symbol(l):
139 return 'symbol'
140 elif check_entrez(l):
141 return 'entrez_id'
142 else:
143 sys.exit('Execution aborted:\n' +
144 'gene ID type in ' + name + ' not supported. Supported ID'+
145 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n')
146
147 def check_hgnc(l):
148 if len(l) > 5:
149 if (l.upper()).startswith('HGNC:'):
150 return l[5:].isdigit()
151 else:
152 return False
153 else:
154 return False
155
156 def check_ensembl(l):
157 if len(l) == 15:
158 if (l.upper()).startswith('ENS'):
159 return l[4:].isdigit()
160 else:
161 return False
162 else:
163 return False
164
165 def check_symbol(l):
166 if len(l) > 0:
167 if l[0].isalpha() and l[1:].isalnum():
168 return True
169 else:
170 return False
171 else:
172 return False
173
174 def check_entrez(l):
175 if len(l) > 0:
176 return l.isdigit()
177 else:
178 return False
179
180 def check_bool(b):
181 if b == 'true':
182 return True
183 elif b == 'false':
184 return False
185
186 ############################ resolve_methods ##################################
187
188 def replace_gene_value(l, d):
189 tmp = []
190 err = []
191 while l:
192 if isinstance(l[0], list):
193 tmp_rules, tmp_err = replace_gene_value(l[0], d)
194 tmp.append(tmp_rules)
195 err.extend(tmp_err)
196 else:
197 value = replace_gene(l[0], d)
198 tmp.append(value)
199 if value == None:
200 err.append(l[0])
201 l = l[1:]
202 return (tmp, err)
203
204 def replace_gene(l, d):
205 if l =='and' or l == 'or':
206 return l
207 else:
208 value = d.get(l, None)
209 if not(value == None or isinstance(value, (int, float))):
210 sys.exit('Execution aborted: ' + value + ' value not valid\n')
211 return value
212
213 def computes(val1, op, val2, cn):
214 if val1 != None and val2 != None:
215 if op == 'and':
216 return min(val1, val2)
217 else:
218 return val1 + val2
219 elif op == 'and':
220 if cn is True:
221 if val1 != None:
222 return val1
223 elif val2 != None:
224 return val2
225 else:
226 return None
227 else:
228 return None
229 else:
230 if val1 != None:
231 return val1
232 elif val2 != None:
233 return val2
234 else:
235 return None
236
237 def control(ris, l, cn):
238 if len(l) == 1:
239 if isinstance(l[0], (float, int)) or l[0] == None:
240 return l[0]
241 elif isinstance(l[0], list):
242 return control(None, l[0], cn)
243 else:
244 return False
245 elif len(l) > 2:
246 return control_list(ris, l, cn)
247 else:
248 return False
249
250 def control_list(ris, l, cn):
251 while l:
252 if len(l) == 1:
253 return False
254 elif (isinstance(l[0], (float, int)) or
255 l[0] == None) and l[1] in ['and', 'or']:
256 if isinstance(l[2], (float, int)) or l[2] == None:
257 ris = computes(l[0], l[1], l[2], cn)
258 elif isinstance(l[2], list):
259 tmp = control(None, l[2], cn)
260 if tmp is False:
261 return False
262 else:
263 ris = computes(l[0], l[1], tmp, cn)
264 else:
265 return False
266 l = l[3:]
267 elif l[0] in ['and', 'or']:
268 if isinstance(l[1], (float, int)) or l[1] == None:
269 ris = computes(ris, l[0], l[1], cn)
270 elif isinstance(l[1], list):
271 tmp = control(None,l[1], cn)
272 if tmp is False:
273 return False
274 else:
275 ris = computes(ris, l[0], tmp, cn)
276 else:
277 return False
278 l = l[2:]
279 elif isinstance(l[0], list) and l[1] in ['and', 'or']:
280 if isinstance(l[2], (float, int)) or l[2] == None:
281 tmp = control(None, l[0], cn)
282 if tmp is False:
283 return False
284 else:
285 ris = computes(tmp, l[1], l[2], cn)
286 elif isinstance(l[2], list):
287 tmp = control(None, l[0], cn)
288 tmp2 = control(None, l[2], cn)
289 if tmp is False or tmp2 is False:
290 return False
291 else:
292 ris = computes(tmp, l[1], tmp2, cn)
293 else:
294 return False
295 l = l[3:]
296 else:
297 return False
298 return ris
299
300 ############################ map_methods ######################################
301
302 def fold_change(avg1, avg2):
303 if avg1 == 0 and avg2 == 0:
304 return 0
305 elif avg1 == 0:
306 return '-INF'
307 elif avg2 == 0:
308 return 'INF'
309 else:
310 return math.log(avg1 / avg2, 2)
311
312 def fix_style(l, col, width, dash):
313 tmp = l.split(';')
314 flag_col = False
315 flag_width = False
316 flag_dash = False
317 for i in range(len(tmp)):
318 if tmp[i].startswith('stroke:'):
319 tmp[i] = 'stroke:' + col
320 flag_col = True
321 if tmp[i].startswith('stroke-width:'):
322 tmp[i] = 'stroke-width:' + width
323 flag_width = True
324 if tmp[i].startswith('stroke-dasharray:'):
325 tmp[i] = 'stroke-dasharray:' + dash
326 flag_dash = True
327 if not flag_col:
328 tmp.append('stroke:' + col)
329 if not flag_width:
330 tmp.append('stroke-width:' + width)
331 if not flag_dash:
332 tmp.append('stroke-dasharray:' + dash)
333 return ';'.join(tmp)
334
335 def fix_map(d, core_map, threshold_P_V, threshold_F_C, max_F_C):
336 maxT = 12
337 minT = 2
338 grey = '#BEBEBE'
339 blue = '#0000FF'
340 red = '#E41A1C'
341 for el in core_map.iter():
342 el_id = str(el.get('id'))
343 if el_id.startswith('R_'):
344 tmp = d.get(el_id[2:])
345 if tmp != None:
346 p_val = tmp[0]
347 f_c = tmp[1]
348 if p_val < threshold_P_V:
349 if not isinstance(f_c, str):
350 if abs(f_c) < math.log(threshold_F_C, 2):
351 col = grey
352 width = str(minT)
353 else:
354 if f_c < 0:
355 col = blue
356 elif f_c > 0:
357 col = red
358 width = str(max((abs(f_c) * maxT) / max_F_C, minT))
359 else:
360 if f_c == '-INF':
361 col = blue
362 elif f_c == 'INF':
363 col = red
364 width = str(maxT)
365 dash = 'none'
366 else:
367 dash = '5,5'
368 col = grey
369 width = str(minT)
370 el.set('style', fix_style(el.get('style'), col, width, dash))
371 return core_map
372
373 ############################ make recon #######################################
374
375 def check_and_doWord(l):
376 tmp = []
377 tmp_genes = []
378 count = 0
379 while l:
380 if count >= 0:
381 if l[0] == '(':
382 count += 1
383 tmp.append(l[0])
384 l.pop(0)
385 elif l[0] == ')':
386 count -= 1
387 tmp.append(l[0])
388 l.pop(0)
389 elif l[0] == ' ':
390 l.pop(0)
391 else:
392 word = []
393 while l:
394 if l[0] in [' ', '(', ')']:
395 break
396 else:
397 word.append(l[0])
398 l.pop(0)
399 word = ''.join(word)
400 tmp.append(word)
401 if not(word in ['or', 'and']):
402 tmp_genes.append(word)
403 else:
404 return False
405 if count == 0:
406 return (tmp, tmp_genes)
407 else:
408 return False
409
410 def brackets_to_list(l):
411 tmp = []
412 while l:
413 if l[0] == '(':
414 l.pop(0)
415 tmp.append(resolve_brackets(l))
416 else:
417 tmp.append(l[0])
418 l.pop(0)
419 return tmp
420
421 def resolve_brackets(l):
422 tmp = []
423 while l[0] != ')':
424 if l[0] == '(':
425 l.pop(0)
426 tmp.append(resolve_brackets(l))
427 else:
428 tmp.append(l[0])
429 l.pop(0)
430 l.pop(0)
431 return tmp
432
433 def priorityAND(l):
434 tmp = []
435 flag = True
436 while l:
437 if len(l) == 1:
438 if isinstance(l[0], list):
439 tmp.append(priorityAND(l[0]))
440 else:
441 tmp.append(l[0])
442 l = l[1:]
443 elif l[0] == 'or':
444 tmp.append(l[0])
445 flag = False
446 l = l[1:]
447 elif l[1] == 'or':
448 if isinstance(l[0], list):
449 tmp.append(priorityAND(l[0]))
450 else:
451 tmp.append(l[0])
452 tmp.append(l[1])
453 flag = False
454 l = l[2:]
455 elif l[1] == 'and':
456 tmpAnd = []
457 if isinstance(l[0], list):
458 tmpAnd.append(priorityAND(l[0]))
459 else:
460 tmpAnd.append(l[0])
461 tmpAnd.append(l[1])
462 if isinstance(l[2], list):
463 tmpAnd.append(priorityAND(l[2]))
464 else:
465 tmpAnd.append(l[2])
466 l = l[3:]
467 while l:
468 if l[0] == 'and':
469 tmpAnd.append(l[0])
470 if isinstance(l[1], list):
471 tmpAnd.append(priorityAND(l[1]))
472 else:
473 tmpAnd.append(l[1])
474 l = l[2:]
475 elif l[0] == 'or':
476 flag = False
477 break
478 if flag == True: #when there are only AND in list
479 tmp.extend(tmpAnd)
480 elif flag == False:
481 tmp.append(tmpAnd)
482 return tmp
483
484 def checkRule(l):
485 if len(l) == 1:
486 if isinstance(l[0], list):
487 if checkRule(l[0]) is False:
488 return False
489 elif len(l) > 2:
490 if checkRule2(l) is False:
491 return False
492 else:
493 return False
494 return True
495
496 def checkRule2(l):
497 while l:
498 if len(l) == 1:
499 return False
500 elif isinstance(l[0], list) and l[1] in ['and', 'or']:
501 if checkRule(l[0]) is False:
502 return False
503 if isinstance(l[2], list):
504 if checkRule(l[2]) is False:
505 return False
506 l = l[3:]
507 elif l[1] in ['and', 'or']:
508 if isinstance(l[2], list):
509 if checkRule(l[2]) is False:
510 return False
511 l = l[3:]
512 elif l[0] in ['and', 'or']:
513 if isinstance(l[1], list):
514 if checkRule(l[1]) is False:
515 return False
516 l = l[2:]
517 else:
518 return False
519 return True
520
521 def do_rules(rules):
522 split_rules = []
523 err_rules = []
524 tmp_gene_in_rule = []
525 for i in range(len(rules)):
526 tmp = list(rules[i])
527 if tmp:
528 tmp, tmp_genes = check_and_doWord(tmp)
529 tmp_gene_in_rule.extend(tmp_genes)
530 if tmp is False:
531 split_rules.append([])
532 err_rules.append(rules[i])
533 else:
534 tmp = brackets_to_list(tmp)
535 if checkRule(tmp):
536 split_rules.append(priorityAND(tmp))
537 else:
538 split_rules.append([])
539 err_rules.append(rules[i])
540 else:
541 split_rules.append([])
542 if err_rules:
543 warning('Warning: wrong format rule in ' + str(err_rules) + '\n')
544 return (split_rules, list(set(tmp_gene_in_rule)))
545
546 def make_recon(data):
547 try:
548 import cobra as cb
549 import warnings
550 with warnings.catch_warnings():
551 warnings.simplefilter('ignore')
552 recon = cb.io.read_sbml_model(data)
553 react = recon.reactions
554 rules = [react[i].gene_reaction_rule for i in range(len(react))]
555 ids = [react[i].id for i in range(len(react))]
556 except cb.io.sbml3.CobraSBMLError:
557 try:
558 data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('')
559 if len(data.columns) < 2:
560 sys.exit('Execution aborted: wrong format of '+
561 'custom datarules\n')
562 if not len(data.columns) == 2:
563 warning('Warning: more than 2 columns in custom datarules.\n' +
564 'Extra columns have been disregarded\n')
565 ids = list(data.iloc[:, 0])
566 rules = list(data.iloc[:, 1])
567 except pd.errors.EmptyDataError:
568 sys.exit('Execution aborted: wrong format of custom datarules\n')
569 except pd.errors.ParserError:
570 sys.exit('Execution aborted: wrong format of custom datarules\n')
571 split_rules, tmp_genes = do_rules(rules)
572 gene_in_rule = {}
573 for i in tmp_genes:
574 gene_in_rule[i] = 'ok'
575 return (ids, split_rules, gene_in_rule)
576
577 ############################ gene #############################################
578
579 def data_gene(gene, type_gene, name, gene_custom):
580 args = process_args(sys.argv)
581 for i in range(len(gene)):
582 tmp = gene.iloc[i, 0]
583 if tmp.startswith(' ') or tmp.endswith(' '):
584 gene.iloc[i, 0] = (tmp.lstrip()).rstrip()
585 gene_dup = [item for item, count in
586 collections.Counter(gene[gene.columns[0]]).items() if count > 1]
587 pat_dup = [item for item, count in
588 collections.Counter(list(gene.columns)).items() if count > 1]
589 if gene_dup:
590 if gene_custom == None:
591 if args.rules_selector == 'HMRcore':
592 gene_in_rule = pk.load(open(args.tool_dir +
593 '/local/HMRcore_genes.p', 'rb'))
594 elif args.rules_selector == 'Recon':
595 gene_in_rule = pk.load(open(args.tool_dir +
596 '/local/Recon_genes.p', 'rb'))
597 gene_in_rule = gene_in_rule.get(type_gene)
598 else:
599 gene_in_rule = gene_custom
600 tmp = []
601 for i in gene_dup:
602 if gene_in_rule.get(i) == 'ok':
603 tmp.append(i)
604 if tmp:
605 sys.exit('Execution aborted because gene ID '
606 +str(tmp)+' in '+name+' is duplicated\n')
607 if pat_dup:
608 warning('Warning: duplicated label\n' + str(pat_dup) + 'in ' + name +
609 '\n')
610 return (gene.set_index(gene.columns[0])).to_dict()
611
612 ############################ resolve ##########################################
613
614 def resolve(genes, rules, ids, resolve_none, name):
615 resolve_rules = {}
616 not_found = []
617 flag = False
618 for key, value in genes.items():
619 tmp_resolve = []
620 for i in range(len(rules)):
621 tmp = rules[i]
622 if tmp:
623 tmp, err = replace_gene_value(tmp, value)
624 if err:
625 not_found.extend(err)
626 ris = control(None, tmp, resolve_none)
627 if ris is False or ris == None:
628 tmp_resolve.append(None)
629 else:
630 tmp_resolve.append(ris)
631 flag = True
632 else:
633 tmp_resolve.append(None)
634 resolve_rules[key] = tmp_resolve
635 if flag is False:
636 warning('Warning: no computable score (due to missing gene values)' +
637 'for class ' + name + ', the class has been disregarded\n')
638 return (None, None)
639 return (resolve_rules, list(set(not_found)))
640
641 ############################ split class ######################################
642
643 def split_class(classes, resolve_rules):
644 class_pat = {}
645 for i in range(len(classes)):
646 classe = classes.iloc[i, 1]
647 if not pd.isnull(classe):
648 l = []
649 for j in range(i, len(classes)):
650 if classes.iloc[j, 1] == classe:
651 pat_id = classes.iloc[j, 0]
652 tmp = resolve_rules.get(pat_id, None)
653 if tmp != None:
654 l.append(tmp)
655 classes.iloc[j, 1] = None
656 if l:
657 class_pat[classe] = list(map(list, zip(*l)))
658 else:
659 warning('Warning: no sample found in class ' + classe +
660 ', the class has been disregarded\n')
661 return class_pat
662
663 ############################ create_ras #######################################
664
665 def create_ras (resolve_rules, dataset_name, single_ras):
666
667 if resolve_rules == None:
668 warning("Couldn't generate RAS for current dataset: " + dataset_name)
669
670 for geni in resolve_rules.values():
671 for i, valori in enumerate(geni):
672 if valori == None:
673 geni[i] = 'None'
674
675 output_ras = pd.DataFrame.from_dict(resolve_rules)
676 output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False)
677
678 if (single_ras):
679 args = process_args(sys.argv)
680 text_file = open(args.single_ras_file, "w")
681 else:
682 text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w")
683
684 text_file.write(output_to_csv)
685 text_file.close()
686
687 ############################ map ##############################################
688
689 def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf):
690 args = process_args(sys.argv)
691 if (not class_pat) or (len(class_pat.keys()) < 2):
692 sys.exit('Execution aborted: classes provided for comparisons are ' +
693 'less than two\n')
694 for i, j in it.combinations(class_pat.keys(), 2):
695 tmp = {}
696 count = 0
697 max_F_C = 0
698 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
699 try:
700 stat_D, p_value = st.ks_2samp(l1, l2)
701 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
702 if not isinstance(avg, str):
703 if max_F_C < abs(avg):
704 max_F_C = abs(avg)
705 tmp[ids[count]] = [float(p_value), avg]
706 count += 1
707 except (TypeError, ZeroDivisionError):
708 count += 1
709 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
710 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
711 tmp_csv = tmp_csv.reset_index()
712 header = ['ids', 'P_Value', 'Log2(fold change)']
713 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
714
715 if create_svg or create_pdf:
716 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
717 and args.yes_no == 'yes'):
718 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
719 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
720 with open(file_svg, 'wb') as new_map:
721 new_map.write(ET.tostring(core_map))
722
723
724 if create_pdf:
725 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
726 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
727
728 if not create_svg:
729 #Ho utilizzato il file svg per generare il pdf,
730 #ma l'utente non ne ha richiesto il ritorno, quindi
731 #lo elimino
732 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
733
734 return None
735
736 ############################ MAIN #############################################
737
738 def main():
739 args = process_args(sys.argv)
740
741 create_svg = check_bool(args.generate_svg)
742 create_pdf = check_bool(args.generate_pdf)
743 generate_ras = check_bool(args.generate_ras)
744
745 os.makedirs('result')
746
747 if generate_ras:
748 os.makedirs('ras')
749
750 if args.rules_selector == 'HMRcore':
751 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))
752 elif args.rules_selector == 'Recon':
753 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb'))
754 elif args.rules_selector == 'Custom':
755 ids, rules, gene_in_rule = make_recon(args.custom)
756
757 resolve_none = check_bool(args.none)
758
759 class_pat = {}
760
761 if args.option == 'datasets_rasonly':
762 name = "RAS Dataset"
763 dataset = read_dataset(args.input_datas[0],"dataset")
764
765 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
766
767 type_gene = gene_type(dataset.iloc[0, 0], name)
768
769 if args.rules_selector != 'Custom':
770 genes = data_gene(dataset, type_gene, name, None)
771 ids, rules = load_id_rules(recon.get(type_gene))
772 elif args.rules_selector == 'Custom':
773 genes = data_gene(dataset, type_gene, name, gene_in_rule)
774
775 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
776
777 create_ras(resolve_rules, name, True)
778
779 if err != None and err:
780 warning('Warning: gene\n' + str(err) + '\nnot found in class '
781 + name + ', the expression level for this gene ' +
782 'will be considered NaN\n')
783
784 print('execution succeded')
785 return None
786
787
788 elif args.option == 'datasets':
789 num = 1
790 for i, j in zip(args.input_datas, args.names):
791
792 name = name_dataset(j, num)
793 dataset = read_dataset(i, name)
794
795 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
796
797 type_gene = gene_type(dataset.iloc[0, 0], name)
798
799 if args.rules_selector != 'Custom':
800 genes = data_gene(dataset, type_gene, name, None)
801 ids, rules = load_id_rules(recon.get(type_gene))
802 elif args.rules_selector == 'Custom':
803 genes = data_gene(dataset, type_gene, name, gene_in_rule)
804
805 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
806
807 if generate_ras:
808 create_ras(resolve_rules, name, False)
809
810 if err != None and err:
811 warning('Warning: gene\n' + str(err) + '\nnot found in class '
812 + name + ', the expression level for this gene ' +
813 'will be considered NaN\n')
814 if resolve_rules != None:
815 class_pat[name] = list(map(list, zip(*resolve_rules.values())))
816 num += 1
817 elif args.option == 'dataset_class':
818 name = 'RNAseq'
819 dataset = read_dataset(args.input_data, name)
820 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
821 type_gene = gene_type(dataset.iloc[0, 0], name)
822 classes = read_dataset(args.input_class, 'class')
823 if not len(classes.columns) == 2:
824 warning('Warning: more than 2 columns in class file. Extra' +
825 'columns have been disregarded\n')
826 classes = classes.astype(str)
827 if args.rules_selector != 'Custom':
828 genes = data_gene(dataset, type_gene, name, None)
829 ids, rules = load_id_rules(recon.get(type_gene))
830 elif args.rules_selector == 'Custom':
831 genes = data_gene(dataset, type_gene, name, gene_in_rule)
832 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
833 if err != None and err:
834 warning('Warning: gene\n'+str(err)+'\nnot found in class '
835 + name + ', the expression level for this gene ' +
836 'will be considered NaN\n')
837 if resolve_rules != None:
838 class_pat = split_class(classes, resolve_rules)
839
840
841 if args.rules_selector == 'Custom':
842 if args.yes_no == 'yes':
843 try:
844 core_map = ET.parse(args.custom_map)
845 except (ET.XMLSyntaxError, ET.XMLSchemaParseError):
846 sys.exit('Execution aborted: custom map in wrong format')
847 elif args.yes_no == 'no':
848 core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg')
849 else:
850 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg')
851
852 maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf)
853
854 print('Execution succeded')
855
856 return None
857
858 ###############################################################################
859
860 if __name__ == "__main__":
861 main()