comparison Marea/marea.py @ 16:c71ac0bb12de draft

Uploaded
author bimib
date Tue, 01 Oct 2019 06:05:13 -0400
parents e96f3b85e5a0
children e6831924df01
comparison
equal deleted inserted replaced
15:d0e7f14b773f 16:c71ac0bb12de
1
2 from __future__ import division 1 from __future__ import division
3 import sys 2 import sys
4 import pandas as pd 3 import pandas as pd
5 import itertools as it 4 import itertools as it
6 import scipy.stats as st 5 import scipy.stats as st
7 import collections 6 import collections
8 import lxml.etree as ET 7 import lxml.etree as ET
8 import shutil
9 import pickle as pk 9 import pickle as pk
10 import math 10 import math
11 import os 11 import os
12 import argparse 12 import argparse
13 from svglib.svglib import svg2rlg 13 from svglib.svglib import svg2rlg
14 from reportlab.graphics import renderPDF 14 from reportlab.graphics import renderPDF
15 15
16 ########################## argparse ########################################### 16 ########################## argparse ##########################################
17 17
18 def process_args(args): 18 def process_args(args):
19 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', 19 parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
20 description = 'process some value\'s'+ 20 description = 'process some value\'s'+
21 ' genes to create a comparison\'s map.') 21 ' genes to create a comparison\'s map.')
69 help = 'custom map') 69 help = 'custom map')
70 parser.add_argument('-yn', '--yes_no', 70 parser.add_argument('-yn', '--yes_no',
71 type = str, 71 type = str,
72 choices = ['yes', 'no'], 72 choices = ['yes', 'no'],
73 help = 'if make or not custom map') 73 help = 'if make or not custom map')
74 parser.add_argument('-gs', '--generate_svg',
75 type = str,
76 default = 'true',
77 choices = ['true', 'false'],
78 help = 'generate svg map')
79 parser.add_argument('-gp', '--generate_pdf',
80 type = str,
81 default = 'true',
82 choices = ['true', 'false'],
83 help = 'generate pdf map')
84 parser.add_argument('-gr', '--generate_ras',
85 type = str,
86 default = 'true',
87 choices = ['true', 'false'],
88 help = 'generate reaction activity score')
74 args = parser.parse_args() 89 args = parser.parse_args()
75 return args 90 return args
76 91
77 ########################### warning ########################################### 92 ########################### warning ###########################################
78 93
83 98
84 ############################ dataset input #################################### 99 ############################ dataset input ####################################
85 100
86 def read_dataset(data, name): 101 def read_dataset(data, name):
87 try: 102 try:
88 dataset = pd.read_csv(data, sep = '\t', header = 0) 103 dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
89 except pd.errors.EmptyDataError: 104 except pd.errors.EmptyDataError:
90 sys.exit('Execution aborted: wrong format of ' + name + '\n') 105 sys.exit('Execution aborted: wrong format of ' + name + '\n')
91 if len(dataset.columns) < 2: 106 if len(dataset.columns) < 2:
92 sys.exit('Execution aborted: wrong format of ' + name + '\n') 107 sys.exit('Execution aborted: wrong format of ' + name + '\n')
93 return dataset 108 return dataset
534 react = recon.reactions 549 react = recon.reactions
535 rules = [react[i].gene_reaction_rule for i in range(len(react))] 550 rules = [react[i].gene_reaction_rule for i in range(len(react))]
536 ids = [react[i].id for i in range(len(react))] 551 ids = [react[i].id for i in range(len(react))]
537 except cb.io.sbml3.CobraSBMLError: 552 except cb.io.sbml3.CobraSBMLError:
538 try: 553 try:
539 data = (pd.read_csv(data, sep = '\t', dtype = str)).fillna('') 554 data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('')
540 if len(data.columns) < 2: 555 if len(data.columns) < 2:
541 sys.exit('Execution aborted: wrong format of '+ 556 sys.exit('Execution aborted: wrong format of '+
542 'custom datarules\n') 557 'custom datarules\n')
543 if not len(data.columns) == 2: 558 if not len(data.columns) == 2:
544 warning('Warning: more than 2 columns in custom datarules.\n' + 559 warning('Warning: more than 2 columns in custom datarules.\n' +
639 else: 654 else:
640 warning('Warning: no sample found in class ' + classe + 655 warning('Warning: no sample found in class ' + classe +
641 ', the class has been disregarded\n') 656 ', the class has been disregarded\n')
642 return class_pat 657 return class_pat
643 658
659 ############################ create_ras #######################################
660
661 def create_ras (resolve_rules, dataset_name):
662
663 if resolve_rules == None:
664 warning("Couldn't generate RAS for current dataset: " + dataset_name)
665
666 for geni in resolve_rules.values():
667 for i, valori in enumerate(geni):
668 if valori == None:
669 geni[i] = 'None'
670
671 output_ras = pd.DataFrame.from_dict(resolve_rules)
672 output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False)
673
674 text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w")
675 text_file.write(output_to_csv)
676 text_file.close()
677
644 ############################ map ############################################## 678 ############################ map ##############################################
645 679
646 def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C): 680 def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf):
647 args = process_args(sys.argv) 681 args = process_args(sys.argv)
648 if (not class_pat) or (len(class_pat.keys()) < 2): 682 if (not class_pat) or (len(class_pat.keys()) < 2):
649 sys.exit('Execution aborted: classes provided for comparisons are ' + 683 sys.exit('Execution aborted: classes provided for comparisons are ' +
650 'less than two\n') 684 'less than two\n')
651 for i, j in it.combinations(class_pat.keys(), 2): 685 for i, j in it.combinations(class_pat.keys(), 2):
661 max_F_C = abs(avg) 695 max_F_C = abs(avg)
662 tmp[ids[count]] = [float(p_value), avg] 696 tmp[ids[count]] = [float(p_value), avg]
663 count += 1 697 count += 1
664 except (TypeError, ZeroDivisionError): 698 except (TypeError, ZeroDivisionError):
665 count += 1 699 count += 1
666 tab = 'table_out/' + i + '_vs_' + j + '.tsv' 700 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
667 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") 701 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
668 tmp_csv = tmp_csv.reset_index() 702 tmp_csv = tmp_csv.reset_index()
669 header = ['ids', 'P_Value', 'Average'] 703 header = ['ids', 'P_Value', 'Average']
670 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) 704 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
671 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' 705
672 and args.yes_no == 'yes'): 706 if create_svg or create_pdf:
673 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) 707 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
674 file_svg = 'map_svg/' + i + '_vs_' + j + '.svg' 708 and args.yes_no == 'yes'):
675 with open(file_svg, 'wb') as new_map: 709 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
676 new_map.write(ET.tostring(core_map, encoding='UTF-8', 710 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
677 method='xml')) 711 with open(file_svg, 'wb') as new_map:
678 file_pdf = 'map_pdf/' + i + '_vs_' + j + '.pdf' 712 new_map.write(ET.tostring(core_map))
679 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) 713
714
715 if create_pdf:
716 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
717 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
718
719 if not create_svg:
720 #Ho utilizzato il file svg per generare il pdf,
721 #ma l'utente non ne ha richiesto il ritorno, quindi
722 #lo elimino
723 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
724
680 return None 725 return None
681 726
682 ############################ MAIN ############################################# 727 ############################ MAIN #############################################
683 728
684 def main(): 729 def main():
685 args = process_args(sys.argv) 730 args = process_args(sys.argv)
686 os.makedirs('table_out') 731
687 if args.rules_selector == 'HMRcore': 732 create_svg = check_bool(args.generate_svg)
688 os.makedirs('map_svg') 733 create_pdf = check_bool(args.generate_pdf)
689 os.makedirs('map_pdf') 734 generate_ras = check_bool(args.generate_ras)
735
736 os.makedirs('result')
737
738 if generate_ras:
739 os.makedirs('ras')
740
741 if args.rules_selector == 'HMRcore':
690 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) 742 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))
691 elif args.rules_selector == 'Recon': 743 elif args.rules_selector == 'Recon':
692 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) 744 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb'))
693 elif args.rules_selector == 'Custom': 745 elif args.rules_selector == 'Custom':
694 ids, rules, gene_in_rule = make_recon(args.custom) 746 ids, rules, gene_in_rule = make_recon(args.custom)
747
695 resolve_none = check_bool(args.none) 748 resolve_none = check_bool(args.none)
749
696 class_pat = {} 750 class_pat = {}
751
697 if args.option == 'datasets': 752 if args.option == 'datasets':
698 num = 1 753 num = 1
699 #if len(args.names) != len(set(args.names)):
700 # sys.exit('Execution aborted: datasets name duplicated')
701 for i, j in zip(args.input_datas, args.names): 754 for i, j in zip(args.input_datas, args.names):
755
702 name = name_dataset(j, num) 756 name = name_dataset(j, num)
703 dataset = read_dataset(i, name) 757 dataset = read_dataset(i, name)
758
704 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) 759 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
705 type_gene = gene_type(dataset.iloc[0, 0], name) 760
761 type_gene = gene_type(dataset.iloc[0, 0], name)
762
706 if args.rules_selector != 'Custom': 763 if args.rules_selector != 'Custom':
707 genes = data_gene(dataset, type_gene, name, None) 764 genes = data_gene(dataset, type_gene, name, None)
708 ids, rules = load_id_rules(recon.get(type_gene)) 765 ids, rules = load_id_rules(recon.get(type_gene))
709 elif args.rules_selector == 'Custom': 766 elif args.rules_selector == 'Custom':
710 genes = data_gene(dataset, type_gene, name, gene_in_rule) 767 genes = data_gene(dataset, type_gene, name, gene_in_rule)
768
711 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) 769 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
770
771 if generate_ras:
772 create_ras(resolve_rules, name)
773
774
712 if err != None and err: 775 if err != None and err:
713 warning('Warning: gene\n' + str(err) + '\nnot found in class ' 776 warning('Warning: gene\n' + str(err) + '\nnot found in class '
714 + name + ', the expression level for this gene ' + 777 + name + ', the expression level for this gene ' +
715 'will be considered NaN\n') 778 'will be considered NaN\n')
716 if resolve_rules != None: 779 if resolve_rules != None:
736 warning('Warning: gene\n'+str(err)+'\nnot found in class ' 799 warning('Warning: gene\n'+str(err)+'\nnot found in class '
737 + name + ', the expression level for this gene ' + 800 + name + ', the expression level for this gene ' +
738 'will be considered NaN\n') 801 'will be considered NaN\n')
739 if resolve_rules != None: 802 if resolve_rules != None:
740 class_pat = split_class(classes, resolve_rules) 803 class_pat = split_class(classes, resolve_rules)
804
741 if args.rules_selector == 'Custom': 805 if args.rules_selector == 'Custom':
742 if args.yes_no == 'yes': 806 if args.yes_no == 'yes':
743 os.makedirs('map_svg')
744 os.makedirs('map_pdf')
745 try: 807 try:
746 core_map = ET.parse(args.custom_map) 808 core_map = ET.parse(args.custom_map)
747 except (ET.XMLSyntaxError, ET.XMLSchemaParseError): 809 except (ET.XMLSyntaxError, ET.XMLSchemaParseError):
748 sys.exit('Execution aborted: custom map in wrong format') 810 sys.exit('Execution aborted: custom map in wrong format')
749 elif args.yes_no == 'no': 811 elif args.yes_no == 'no':
750 core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg') 812 core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg')
751 else: 813 else:
752 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') 814 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg')
753 maps(core_map, class_pat, ids, args.pValue, args.fChange) 815
754 warning('Execution succeeded') 816 maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf)
817
818 print('Execution succeded')
819
755 return None 820 return None
756 821
757 ############################################################################### 822 ###############################################################################
758 823
759 if __name__ == "__main__": 824 if __name__ == "__main__":