Mercurial > repos > bimib > marea
comparison Marea/marea.py @ 16:c71ac0bb12de draft
Uploaded
author | bimib |
---|---|
date | Tue, 01 Oct 2019 06:05:13 -0400 |
parents | e96f3b85e5a0 |
children | e6831924df01 |
comparison
equal
deleted
inserted
replaced
15:d0e7f14b773f | 16:c71ac0bb12de |
---|---|
1 | |
2 from __future__ import division | 1 from __future__ import division |
3 import sys | 2 import sys |
4 import pandas as pd | 3 import pandas as pd |
5 import itertools as it | 4 import itertools as it |
6 import scipy.stats as st | 5 import scipy.stats as st |
7 import collections | 6 import collections |
8 import lxml.etree as ET | 7 import lxml.etree as ET |
8 import shutil | |
9 import pickle as pk | 9 import pickle as pk |
10 import math | 10 import math |
11 import os | 11 import os |
12 import argparse | 12 import argparse |
13 from svglib.svglib import svg2rlg | 13 from svglib.svglib import svg2rlg |
14 from reportlab.graphics import renderPDF | 14 from reportlab.graphics import renderPDF |
15 | 15 |
16 ########################## argparse ########################################### | 16 ########################## argparse ########################################## |
17 | 17 |
18 def process_args(args): | 18 def process_args(args): |
19 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', | 19 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', |
20 description = 'process some value\'s'+ | 20 description = 'process some value\'s'+ |
21 ' genes to create a comparison\'s map.') | 21 ' genes to create a comparison\'s map.') |
69 help = 'custom map') | 69 help = 'custom map') |
70 parser.add_argument('-yn', '--yes_no', | 70 parser.add_argument('-yn', '--yes_no', |
71 type = str, | 71 type = str, |
72 choices = ['yes', 'no'], | 72 choices = ['yes', 'no'], |
73 help = 'if make or not custom map') | 73 help = 'if make or not custom map') |
74 parser.add_argument('-gs', '--generate_svg', | |
75 type = str, | |
76 default = 'true', | |
77 choices = ['true', 'false'], | |
78 help = 'generate svg map') | |
79 parser.add_argument('-gp', '--generate_pdf', | |
80 type = str, | |
81 default = 'true', | |
82 choices = ['true', 'false'], | |
83 help = 'generate pdf map') | |
84 parser.add_argument('-gr', '--generate_ras', | |
85 type = str, | |
86 default = 'true', | |
87 choices = ['true', 'false'], | |
88 help = 'generate reaction activity score') | |
74 args = parser.parse_args() | 89 args = parser.parse_args() |
75 return args | 90 return args |
76 | 91 |
77 ########################### warning ########################################### | 92 ########################### warning ########################################### |
78 | 93 |
83 | 98 |
84 ############################ dataset input #################################### | 99 ############################ dataset input #################################### |
85 | 100 |
86 def read_dataset(data, name): | 101 def read_dataset(data, name): |
87 try: | 102 try: |
88 dataset = pd.read_csv(data, sep = '\t', header = 0) | 103 dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python') |
89 except pd.errors.EmptyDataError: | 104 except pd.errors.EmptyDataError: |
90 sys.exit('Execution aborted: wrong format of ' + name + '\n') | 105 sys.exit('Execution aborted: wrong format of ' + name + '\n') |
91 if len(dataset.columns) < 2: | 106 if len(dataset.columns) < 2: |
92 sys.exit('Execution aborted: wrong format of ' + name + '\n') | 107 sys.exit('Execution aborted: wrong format of ' + name + '\n') |
93 return dataset | 108 return dataset |
534 react = recon.reactions | 549 react = recon.reactions |
535 rules = [react[i].gene_reaction_rule for i in range(len(react))] | 550 rules = [react[i].gene_reaction_rule for i in range(len(react))] |
536 ids = [react[i].id for i in range(len(react))] | 551 ids = [react[i].id for i in range(len(react))] |
537 except cb.io.sbml3.CobraSBMLError: | 552 except cb.io.sbml3.CobraSBMLError: |
538 try: | 553 try: |
539 data = (pd.read_csv(data, sep = '\t', dtype = str)).fillna('') | 554 data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('') |
540 if len(data.columns) < 2: | 555 if len(data.columns) < 2: |
541 sys.exit('Execution aborted: wrong format of '+ | 556 sys.exit('Execution aborted: wrong format of '+ |
542 'custom datarules\n') | 557 'custom datarules\n') |
543 if not len(data.columns) == 2: | 558 if not len(data.columns) == 2: |
544 warning('Warning: more than 2 columns in custom datarules.\n' + | 559 warning('Warning: more than 2 columns in custom datarules.\n' + |
639 else: | 654 else: |
640 warning('Warning: no sample found in class ' + classe + | 655 warning('Warning: no sample found in class ' + classe + |
641 ', the class has been disregarded\n') | 656 ', the class has been disregarded\n') |
642 return class_pat | 657 return class_pat |
643 | 658 |
659 ############################ create_ras ####################################### | |
660 | |
661 def create_ras (resolve_rules, dataset_name): | |
662 | |
663 if resolve_rules == None: | |
664 warning("Couldn't generate RAS for current dataset: " + dataset_name) | |
665 | |
666 for geni in resolve_rules.values(): | |
667 for i, valori in enumerate(geni): | |
668 if valori == None: | |
669 geni[i] = 'None' | |
670 | |
671 output_ras = pd.DataFrame.from_dict(resolve_rules) | |
672 output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) | |
673 | |
674 text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w") | |
675 text_file.write(output_to_csv) | |
676 text_file.close() | |
677 | |
644 ############################ map ############################################## | 678 ############################ map ############################################## |
645 | 679 |
646 def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C): | 680 def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf): |
647 args = process_args(sys.argv) | 681 args = process_args(sys.argv) |
648 if (not class_pat) or (len(class_pat.keys()) < 2): | 682 if (not class_pat) or (len(class_pat.keys()) < 2): |
649 sys.exit('Execution aborted: classes provided for comparisons are ' + | 683 sys.exit('Execution aborted: classes provided for comparisons are ' + |
650 'less than two\n') | 684 'less than two\n') |
651 for i, j in it.combinations(class_pat.keys(), 2): | 685 for i, j in it.combinations(class_pat.keys(), 2): |
661 max_F_C = abs(avg) | 695 max_F_C = abs(avg) |
662 tmp[ids[count]] = [float(p_value), avg] | 696 tmp[ids[count]] = [float(p_value), avg] |
663 count += 1 | 697 count += 1 |
664 except (TypeError, ZeroDivisionError): | 698 except (TypeError, ZeroDivisionError): |
665 count += 1 | 699 count += 1 |
666 tab = 'table_out/' + i + '_vs_' + j + '.tsv' | 700 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' |
667 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | 701 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") |
668 tmp_csv = tmp_csv.reset_index() | 702 tmp_csv = tmp_csv.reset_index() |
669 header = ['ids', 'P_Value', 'Average'] | 703 header = ['ids', 'P_Value', 'Average'] |
670 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | 704 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) |
671 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' | 705 |
672 and args.yes_no == 'yes'): | 706 if create_svg or create_pdf: |
673 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | 707 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' |
674 file_svg = 'map_svg/' + i + '_vs_' + j + '.svg' | 708 and args.yes_no == 'yes'): |
675 with open(file_svg, 'wb') as new_map: | 709 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) |
676 new_map.write(ET.tostring(core_map, encoding='UTF-8', | 710 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' |
677 method='xml')) | 711 with open(file_svg, 'wb') as new_map: |
678 file_pdf = 'map_pdf/' + i + '_vs_' + j + '.pdf' | 712 new_map.write(ET.tostring(core_map)) |
679 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | 713 |
714 | |
715 if create_pdf: | |
716 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' | |
717 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | |
718 | |
719 if not create_svg: | |
720 #Ho utilizzato il file svg per generare il pdf, | |
721 #ma l'utente non ne ha richiesto il ritorno, quindi | |
722 #lo elimino | |
723 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') | |
724 | |
680 return None | 725 return None |
681 | 726 |
682 ############################ MAIN ############################################# | 727 ############################ MAIN ############################################# |
683 | 728 |
684 def main(): | 729 def main(): |
685 args = process_args(sys.argv) | 730 args = process_args(sys.argv) |
686 os.makedirs('table_out') | 731 |
687 if args.rules_selector == 'HMRcore': | 732 create_svg = check_bool(args.generate_svg) |
688 os.makedirs('map_svg') | 733 create_pdf = check_bool(args.generate_pdf) |
689 os.makedirs('map_pdf') | 734 generate_ras = check_bool(args.generate_ras) |
735 | |
736 os.makedirs('result') | |
737 | |
738 if generate_ras: | |
739 os.makedirs('ras') | |
740 | |
741 if args.rules_selector == 'HMRcore': | |
690 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) | 742 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) |
691 elif args.rules_selector == 'Recon': | 743 elif args.rules_selector == 'Recon': |
692 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) | 744 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) |
693 elif args.rules_selector == 'Custom': | 745 elif args.rules_selector == 'Custom': |
694 ids, rules, gene_in_rule = make_recon(args.custom) | 746 ids, rules, gene_in_rule = make_recon(args.custom) |
747 | |
695 resolve_none = check_bool(args.none) | 748 resolve_none = check_bool(args.none) |
749 | |
696 class_pat = {} | 750 class_pat = {} |
751 | |
697 if args.option == 'datasets': | 752 if args.option == 'datasets': |
698 num = 1 | 753 num = 1 |
699 #if len(args.names) != len(set(args.names)): | |
700 # sys.exit('Execution aborted: datasets name duplicated') | |
701 for i, j in zip(args.input_datas, args.names): | 754 for i, j in zip(args.input_datas, args.names): |
755 | |
702 name = name_dataset(j, num) | 756 name = name_dataset(j, num) |
703 dataset = read_dataset(i, name) | 757 dataset = read_dataset(i, name) |
758 | |
704 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | 759 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) |
705 type_gene = gene_type(dataset.iloc[0, 0], name) | 760 |
761 type_gene = gene_type(dataset.iloc[0, 0], name) | |
762 | |
706 if args.rules_selector != 'Custom': | 763 if args.rules_selector != 'Custom': |
707 genes = data_gene(dataset, type_gene, name, None) | 764 genes = data_gene(dataset, type_gene, name, None) |
708 ids, rules = load_id_rules(recon.get(type_gene)) | 765 ids, rules = load_id_rules(recon.get(type_gene)) |
709 elif args.rules_selector == 'Custom': | 766 elif args.rules_selector == 'Custom': |
710 genes = data_gene(dataset, type_gene, name, gene_in_rule) | 767 genes = data_gene(dataset, type_gene, name, gene_in_rule) |
768 | |
711 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | 769 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) |
770 | |
771 if generate_ras: | |
772 create_ras(resolve_rules, name) | |
773 | |
774 | |
712 if err != None and err: | 775 if err != None and err: |
713 warning('Warning: gene\n' + str(err) + '\nnot found in class ' | 776 warning('Warning: gene\n' + str(err) + '\nnot found in class ' |
714 + name + ', the expression level for this gene ' + | 777 + name + ', the expression level for this gene ' + |
715 'will be considered NaN\n') | 778 'will be considered NaN\n') |
716 if resolve_rules != None: | 779 if resolve_rules != None: |
736 warning('Warning: gene\n'+str(err)+'\nnot found in class ' | 799 warning('Warning: gene\n'+str(err)+'\nnot found in class ' |
737 + name + ', the expression level for this gene ' + | 800 + name + ', the expression level for this gene ' + |
738 'will be considered NaN\n') | 801 'will be considered NaN\n') |
739 if resolve_rules != None: | 802 if resolve_rules != None: |
740 class_pat = split_class(classes, resolve_rules) | 803 class_pat = split_class(classes, resolve_rules) |
804 | |
741 if args.rules_selector == 'Custom': | 805 if args.rules_selector == 'Custom': |
742 if args.yes_no == 'yes': | 806 if args.yes_no == 'yes': |
743 os.makedirs('map_svg') | |
744 os.makedirs('map_pdf') | |
745 try: | 807 try: |
746 core_map = ET.parse(args.custom_map) | 808 core_map = ET.parse(args.custom_map) |
747 except (ET.XMLSyntaxError, ET.XMLSchemaParseError): | 809 except (ET.XMLSyntaxError, ET.XMLSchemaParseError): |
748 sys.exit('Execution aborted: custom map in wrong format') | 810 sys.exit('Execution aborted: custom map in wrong format') |
749 elif args.yes_no == 'no': | 811 elif args.yes_no == 'no': |
750 core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg') | 812 core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg') |
751 else: | 813 else: |
752 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') | 814 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') |
753 maps(core_map, class_pat, ids, args.pValue, args.fChange) | 815 |
754 warning('Execution succeeded') | 816 maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf) |
817 | |
818 print('Execution succeded') | |
819 | |
755 return None | 820 return None |
756 | 821 |
757 ############################################################################### | 822 ############################################################################### |
758 | 823 |
759 if __name__ == "__main__": | 824 if __name__ == "__main__": |