Mercurial > repos > bimib > marea
comparison Marea/marea.py @ 16:c71ac0bb12de draft
Uploaded
| author | bimib | 
|---|---|
| date | Tue, 01 Oct 2019 06:05:13 -0400 | 
| parents | e96f3b85e5a0 | 
| children | e6831924df01 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 15:d0e7f14b773f | 16:c71ac0bb12de | 
|---|---|
| 1 | |
| 2 from __future__ import division | 1 from __future__ import division | 
| 3 import sys | 2 import sys | 
| 4 import pandas as pd | 3 import pandas as pd | 
| 5 import itertools as it | 4 import itertools as it | 
| 6 import scipy.stats as st | 5 import scipy.stats as st | 
| 7 import collections | 6 import collections | 
| 8 import lxml.etree as ET | 7 import lxml.etree as ET | 
| 8 import shutil | |
| 9 import pickle as pk | 9 import pickle as pk | 
| 10 import math | 10 import math | 
| 11 import os | 11 import os | 
| 12 import argparse | 12 import argparse | 
| 13 from svglib.svglib import svg2rlg | 13 from svglib.svglib import svg2rlg | 
| 14 from reportlab.graphics import renderPDF | 14 from reportlab.graphics import renderPDF | 
| 15 | 15 | 
| 16 ########################## argparse ########################################### | 16 ########################## argparse ########################################## | 
| 17 | 17 | 
| 18 def process_args(args): | 18 def process_args(args): | 
| 19 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', | 19 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', | 
| 20 description = 'process some value\'s'+ | 20 description = 'process some value\'s'+ | 
| 21 ' genes to create a comparison\'s map.') | 21 ' genes to create a comparison\'s map.') | 
| 69 help = 'custom map') | 69 help = 'custom map') | 
| 70 parser.add_argument('-yn', '--yes_no', | 70 parser.add_argument('-yn', '--yes_no', | 
| 71 type = str, | 71 type = str, | 
| 72 choices = ['yes', 'no'], | 72 choices = ['yes', 'no'], | 
| 73 help = 'if make or not custom map') | 73 help = 'if make or not custom map') | 
| 74 parser.add_argument('-gs', '--generate_svg', | |
| 75 type = str, | |
| 76 default = 'true', | |
| 77 choices = ['true', 'false'], | |
| 78 help = 'generate svg map') | |
| 79 parser.add_argument('-gp', '--generate_pdf', | |
| 80 type = str, | |
| 81 default = 'true', | |
| 82 choices = ['true', 'false'], | |
| 83 help = 'generate pdf map') | |
| 84 parser.add_argument('-gr', '--generate_ras', | |
| 85 type = str, | |
| 86 default = 'true', | |
| 87 choices = ['true', 'false'], | |
| 88 help = 'generate reaction activity score') | |
| 74 args = parser.parse_args() | 89 args = parser.parse_args() | 
| 75 return args | 90 return args | 
| 76 | 91 | 
| 77 ########################### warning ########################################### | 92 ########################### warning ########################################### | 
| 78 | 93 | 
| 83 | 98 | 
| 84 ############################ dataset input #################################### | 99 ############################ dataset input #################################### | 
| 85 | 100 | 
| 86 def read_dataset(data, name): | 101 def read_dataset(data, name): | 
| 87 try: | 102 try: | 
| 88 dataset = pd.read_csv(data, sep = '\t', header = 0) | 103 dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python') | 
| 89 except pd.errors.EmptyDataError: | 104 except pd.errors.EmptyDataError: | 
| 90 sys.exit('Execution aborted: wrong format of ' + name + '\n') | 105 sys.exit('Execution aborted: wrong format of ' + name + '\n') | 
| 91 if len(dataset.columns) < 2: | 106 if len(dataset.columns) < 2: | 
| 92 sys.exit('Execution aborted: wrong format of ' + name + '\n') | 107 sys.exit('Execution aborted: wrong format of ' + name + '\n') | 
| 93 return dataset | 108 return dataset | 
| 534 react = recon.reactions | 549 react = recon.reactions | 
| 535 rules = [react[i].gene_reaction_rule for i in range(len(react))] | 550 rules = [react[i].gene_reaction_rule for i in range(len(react))] | 
| 536 ids = [react[i].id for i in range(len(react))] | 551 ids = [react[i].id for i in range(len(react))] | 
| 537 except cb.io.sbml3.CobraSBMLError: | 552 except cb.io.sbml3.CobraSBMLError: | 
| 538 try: | 553 try: | 
| 539 data = (pd.read_csv(data, sep = '\t', dtype = str)).fillna('') | 554 data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('') | 
| 540 if len(data.columns) < 2: | 555 if len(data.columns) < 2: | 
| 541 sys.exit('Execution aborted: wrong format of '+ | 556 sys.exit('Execution aborted: wrong format of '+ | 
| 542 'custom datarules\n') | 557 'custom datarules\n') | 
| 543 if not len(data.columns) == 2: | 558 if not len(data.columns) == 2: | 
| 544 warning('Warning: more than 2 columns in custom datarules.\n' + | 559 warning('Warning: more than 2 columns in custom datarules.\n' + | 
| 639 else: | 654 else: | 
| 640 warning('Warning: no sample found in class ' + classe + | 655 warning('Warning: no sample found in class ' + classe + | 
| 641 ', the class has been disregarded\n') | 656 ', the class has been disregarded\n') | 
| 642 return class_pat | 657 return class_pat | 
| 643 | 658 | 
| 659 ############################ create_ras ####################################### | |
| 660 | |
| 661 def create_ras (resolve_rules, dataset_name): | |
| 662 | |
| 663 if resolve_rules == None: | |
| 664 warning("Couldn't generate RAS for current dataset: " + dataset_name) | |
| 665 | |
| 666 for geni in resolve_rules.values(): | |
| 667 for i, valori in enumerate(geni): | |
| 668 if valori == None: | |
| 669 geni[i] = 'None' | |
| 670 | |
| 671 output_ras = pd.DataFrame.from_dict(resolve_rules) | |
| 672 output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) | |
| 673 | |
| 674 text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w") | |
| 675 text_file.write(output_to_csv) | |
| 676 text_file.close() | |
| 677 | |
| 644 ############################ map ############################################## | 678 ############################ map ############################################## | 
| 645 | 679 | 
| 646 def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C): | 680 def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf): | 
| 647 args = process_args(sys.argv) | 681 args = process_args(sys.argv) | 
| 648 if (not class_pat) or (len(class_pat.keys()) < 2): | 682 if (not class_pat) or (len(class_pat.keys()) < 2): | 
| 649 sys.exit('Execution aborted: classes provided for comparisons are ' + | 683 sys.exit('Execution aborted: classes provided for comparisons are ' + | 
| 650 'less than two\n') | 684 'less than two\n') | 
| 651 for i, j in it.combinations(class_pat.keys(), 2): | 685 for i, j in it.combinations(class_pat.keys(), 2): | 
| 661 max_F_C = abs(avg) | 695 max_F_C = abs(avg) | 
| 662 tmp[ids[count]] = [float(p_value), avg] | 696 tmp[ids[count]] = [float(p_value), avg] | 
| 663 count += 1 | 697 count += 1 | 
| 664 except (TypeError, ZeroDivisionError): | 698 except (TypeError, ZeroDivisionError): | 
| 665 count += 1 | 699 count += 1 | 
| 666 tab = 'table_out/' + i + '_vs_' + j + '.tsv' | 700 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' | 
| 667 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | 701 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | 
| 668 tmp_csv = tmp_csv.reset_index() | 702 tmp_csv = tmp_csv.reset_index() | 
| 669 header = ['ids', 'P_Value', 'Average'] | 703 header = ['ids', 'P_Value', 'Average'] | 
| 670 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | 704 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | 
| 671 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' | 705 | 
| 672 and args.yes_no == 'yes'): | 706 if create_svg or create_pdf: | 
| 673 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | 707 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' | 
| 674 file_svg = 'map_svg/' + i + '_vs_' + j + '.svg' | 708 and args.yes_no == 'yes'): | 
| 675 with open(file_svg, 'wb') as new_map: | 709 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | 
| 676 new_map.write(ET.tostring(core_map, encoding='UTF-8', | 710 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' | 
| 677 method='xml')) | 711 with open(file_svg, 'wb') as new_map: | 
| 678 file_pdf = 'map_pdf/' + i + '_vs_' + j + '.pdf' | 712 new_map.write(ET.tostring(core_map)) | 
| 679 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | 713 | 
| 714 | |
| 715 if create_pdf: | |
| 716 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' | |
| 717 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | |
| 718 | |
| 719 if not create_svg: | |
| 720 #Ho utilizzato il file svg per generare il pdf, | |
| 721 #ma l'utente non ne ha richiesto il ritorno, quindi | |
| 722 #lo elimino | |
| 723 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') | |
| 724 | |
| 680 return None | 725 return None | 
| 681 | 726 | 
| 682 ############################ MAIN ############################################# | 727 ############################ MAIN ############################################# | 
| 683 | 728 | 
| 684 def main(): | 729 def main(): | 
| 685 args = process_args(sys.argv) | 730 args = process_args(sys.argv) | 
| 686 os.makedirs('table_out') | 731 | 
| 687 if args.rules_selector == 'HMRcore': | 732 create_svg = check_bool(args.generate_svg) | 
| 688 os.makedirs('map_svg') | 733 create_pdf = check_bool(args.generate_pdf) | 
| 689 os.makedirs('map_pdf') | 734 generate_ras = check_bool(args.generate_ras) | 
| 735 | |
| 736 os.makedirs('result') | |
| 737 | |
| 738 if generate_ras: | |
| 739 os.makedirs('ras') | |
| 740 | |
| 741 if args.rules_selector == 'HMRcore': | |
| 690 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) | 742 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) | 
| 691 elif args.rules_selector == 'Recon': | 743 elif args.rules_selector == 'Recon': | 
| 692 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) | 744 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) | 
| 693 elif args.rules_selector == 'Custom': | 745 elif args.rules_selector == 'Custom': | 
| 694 ids, rules, gene_in_rule = make_recon(args.custom) | 746 ids, rules, gene_in_rule = make_recon(args.custom) | 
| 747 | |
| 695 resolve_none = check_bool(args.none) | 748 resolve_none = check_bool(args.none) | 
| 749 | |
| 696 class_pat = {} | 750 class_pat = {} | 
| 751 | |
| 697 if args.option == 'datasets': | 752 if args.option == 'datasets': | 
| 698 num = 1 | 753 num = 1 | 
| 699 #if len(args.names) != len(set(args.names)): | |
| 700 # sys.exit('Execution aborted: datasets name duplicated') | |
| 701 for i, j in zip(args.input_datas, args.names): | 754 for i, j in zip(args.input_datas, args.names): | 
| 755 | |
| 702 name = name_dataset(j, num) | 756 name = name_dataset(j, num) | 
| 703 dataset = read_dataset(i, name) | 757 dataset = read_dataset(i, name) | 
| 758 | |
| 704 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | 759 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | 
| 705 type_gene = gene_type(dataset.iloc[0, 0], name) | 760 | 
| 761 type_gene = gene_type(dataset.iloc[0, 0], name) | |
| 762 | |
| 706 if args.rules_selector != 'Custom': | 763 if args.rules_selector != 'Custom': | 
| 707 genes = data_gene(dataset, type_gene, name, None) | 764 genes = data_gene(dataset, type_gene, name, None) | 
| 708 ids, rules = load_id_rules(recon.get(type_gene)) | 765 ids, rules = load_id_rules(recon.get(type_gene)) | 
| 709 elif args.rules_selector == 'Custom': | 766 elif args.rules_selector == 'Custom': | 
| 710 genes = data_gene(dataset, type_gene, name, gene_in_rule) | 767 genes = data_gene(dataset, type_gene, name, gene_in_rule) | 
| 768 | |
| 711 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | 769 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | 
| 770 | |
| 771 if generate_ras: | |
| 772 create_ras(resolve_rules, name) | |
| 773 | |
| 774 | |
| 712 if err != None and err: | 775 if err != None and err: | 
| 713 warning('Warning: gene\n' + str(err) + '\nnot found in class ' | 776 warning('Warning: gene\n' + str(err) + '\nnot found in class ' | 
| 714 + name + ', the expression level for this gene ' + | 777 + name + ', the expression level for this gene ' + | 
| 715 'will be considered NaN\n') | 778 'will be considered NaN\n') | 
| 716 if resolve_rules != None: | 779 if resolve_rules != None: | 
| 736 warning('Warning: gene\n'+str(err)+'\nnot found in class ' | 799 warning('Warning: gene\n'+str(err)+'\nnot found in class ' | 
| 737 + name + ', the expression level for this gene ' + | 800 + name + ', the expression level for this gene ' + | 
| 738 'will be considered NaN\n') | 801 'will be considered NaN\n') | 
| 739 if resolve_rules != None: | 802 if resolve_rules != None: | 
| 740 class_pat = split_class(classes, resolve_rules) | 803 class_pat = split_class(classes, resolve_rules) | 
| 804 | |
| 741 if args.rules_selector == 'Custom': | 805 if args.rules_selector == 'Custom': | 
| 742 if args.yes_no == 'yes': | 806 if args.yes_no == 'yes': | 
| 743 os.makedirs('map_svg') | |
| 744 os.makedirs('map_pdf') | |
| 745 try: | 807 try: | 
| 746 core_map = ET.parse(args.custom_map) | 808 core_map = ET.parse(args.custom_map) | 
| 747 except (ET.XMLSyntaxError, ET.XMLSchemaParseError): | 809 except (ET.XMLSyntaxError, ET.XMLSchemaParseError): | 
| 748 sys.exit('Execution aborted: custom map in wrong format') | 810 sys.exit('Execution aborted: custom map in wrong format') | 
| 749 elif args.yes_no == 'no': | 811 elif args.yes_no == 'no': | 
| 750 core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg') | 812 core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg') | 
| 751 else: | 813 else: | 
| 752 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') | 814 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') | 
| 753 maps(core_map, class_pat, ids, args.pValue, args.fChange) | 815 | 
| 754 warning('Execution succeeded') | 816 maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf) | 
| 817 | |
| 818 print('Execution succeded') | |
| 819 | |
| 755 return None | 820 return None | 
| 756 | 821 | 
| 757 ############################################################################### | 822 ############################################################################### | 
| 758 | 823 | 
| 759 if __name__ == "__main__": | 824 if __name__ == "__main__": | 
