comparison Marea/marea.py @ 65:a61733753aec draft

Uploaded
author bimib
date Mon, 16 Mar 2020 08:21:29 -0400
parents 53b4fca5bdf9
children 19d704e24977
comparison
equal deleted inserted replaced
64:53b4fca5bdf9 65:a61733753aec
535 split_rules.append([]) 535 split_rules.append([])
536 if err_rules: 536 if err_rules:
537 warning('Warning: wrong format rule in ' + str(err_rules) + '\n') 537 warning('Warning: wrong format rule in ' + str(err_rules) + '\n')
538 return (split_rules, list(set(tmp_gene_in_rule))) 538 return (split_rules, list(set(tmp_gene_in_rule)))
539 539
540 def make_recon(data):
541 try:
542 import cobra as cb
543 import warnings
544 with warnings.catch_warnings():
545 warnings.simplefilter('ignore')
546 recon = cb.io.read_sbml_model(data)
547 react = recon.reactions
548 rules = [react[i].gene_reaction_rule for i in range(len(react))]
549 ids = [react[i].id for i in range(len(react))]
550 except cb.io.sbml.CobraSBMLError:
551 try:
552 data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('')
553 if len(data.columns) < 2:
554 sys.exit('Execution aborted: wrong format of '+
555 'custom datarules\n')
556 if not len(data.columns) == 2:
557 warning('Warning: more than 2 columns in custom datarules.\n' +
558 'Extra columns have been disregarded\n')
559 ids = list(data.iloc[:, 0])
560 rules = list(data.iloc[:, 1])
561 except pd.errors.EmptyDataError:
562 sys.exit('Execution aborted: wrong format of custom datarules\n')
563 except pd.errors.ParserError:
564 sys.exit('Execution aborted: wrong format of custom datarules\n')
565 split_rules, tmp_genes = do_rules(rules)
566 gene_in_rule = {}
567 for i in tmp_genes:
568 gene_in_rule[i] = 'ok'
569 return (ids, split_rules, gene_in_rule)
570 540
571 ############################ gene ############################################# 541 ############################ gene #############################################
572 542
573 def data_gene(gene, type_gene, name, gene_custom): 543 def data_gene(gene, type_gene, name, gene_custom):
574 args = process_args(sys.argv) 544 args = process_args(sys.argv)
663 if (not class_pat) or (len(class_pat.keys()) < 2): 633 if (not class_pat) or (len(class_pat.keys()) < 2):
664 sys.exit('Execution aborted: classes provided for comparisons are ' + 634 sys.exit('Execution aborted: classes provided for comparisons are ' +
665 'less than two\n') 635 'less than two\n')
666 636
667 if comparison == "manyvsmany": 637 if comparison == "manyvsmany":
668 for i, j in it.combinations(class_pat.keys(), 2):
669
670 tmp = {}
671 count = 0
672 max_F_C = 0
673 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
674 try:
675 stat_D, p_value = st.ks_2samp(l1, l2)
676 #sum(l1) da errore secondo me perchè ha null
677 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
678 if not isinstance(avg, str):
679 if max_F_C < abs(avg):
680 max_F_C = abs(avg)
681 tmp[ids[count]] = [float(p_value), avg]
682 count += 1
683 except (TypeError, ZeroDivisionError):
684 count += 1
685 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
686 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
687 tmp_csv = tmp_csv.reset_index()
688 header = ['ids', 'P_Value', 'Log2(fold change)']
689 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
690
691 if create_svg or create_pdf:
692 if args.custom_rules == 'false' or (args.custom_rules == 'true'
693 and args.custom_map != ''):
694 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
695 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
696 with open(file_svg, 'wb') as new_map:
697 new_map.write(ET.tostring(core_map))
698
699
700 if create_pdf:
701 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
702 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
703
704 if not create_svg:
705 #Ho utilizzato il file svg per generare il pdf,
706 #ma l'utente non ne ha richiesto il ritorno, quindi
707 #lo elimino
708
709 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
710 elif comparison == "onevsrest":
711 for single_cluster in class_pat.keys():
712 t = []
713 for k in class_pat.keys():
714 if k != single_cluster:
715 t.append(class_pat.get(k))
716 rest = []
717 for i in t:
718 rest = rest + i
719
720 tmp = {}
721 count = 0
722 max_F_C = 0
723
724 for l1, l2 in zip(rest, class_pat.get(single_cluster)):
725 try:
726 stat_D, p_value = st.ks_2samp(l1, l2)
727 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
728 if not isinstance(avg, str):
729 if max_F_C < abs(avg):
730 max_F_C = abs(avg)
731 tmp[ids[count]] = [float(p_value), avg]
732 count += 1
733 except (TypeError, ZeroDivisionError):
734 count += 1
735 tab = 'result/' + single_cluster + '_vs_rest (Tabular Result).tsv'
736 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
737 tmp_csv = tmp_csv.reset_index()
738 header = ['ids', 'P_Value', 'Log2(fold change)']
739 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
740
741 if create_svg or create_pdf:
742 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
743 and args.yes_no == 'yes'):
744 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
745 file_svg = 'result/' + single_cluster + '_vs_ rest (SVG Map).svg'
746 with open(file_svg, 'wb') as new_map:
747 new_map.write(ET.tostring(core_map))
748
749
750 if create_pdf:
751 file_pdf = 'result/' + single_cluster + '_vs_ rest (PDF Map).pdf'
752 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
753
754 if not create_svg:
755 os.remove('result/' + single_cluster + '_vs_ rest (SVG Map).svg')
756
757 elif comparison == "onevsmany":
758 for i, j in it.combinations(class_pat.keys(), 2): 638 for i, j in it.combinations(class_pat.keys(), 2):
759 tmp = {} 639 tmp = {}
760 count = 0 640 count = 0
761 max_F_C = 0 641 max_F_C = 0
762 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): 642 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
776 tmp_csv = tmp_csv.reset_index() 656 tmp_csv = tmp_csv.reset_index()
777 header = ['ids', 'P_Value', 'Log2(fold change)'] 657 header = ['ids', 'P_Value', 'Log2(fold change)']
778 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) 658 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
779 659
780 if create_svg or create_pdf: 660 if create_svg or create_pdf:
781 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' 661 if args.custom_rules == 'false' or (args.custom_rules == 'true'
782 and args.yes_no == 'yes'): 662 and args.custom_map != ''):
783 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) 663 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
784 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' 664 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
785 with open(file_svg, 'wb') as new_map: 665 with open(file_svg, 'wb') as new_map:
786 new_map.write(ET.tostring(core_map)) 666 new_map.write(ET.tostring(core_map))
787 667
789 if create_pdf: 669 if create_pdf:
790 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' 670 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
791 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) 671 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
792 672
793 if not create_svg: 673 if not create_svg:
794 #Ho utilizzato il file svg per generare il pdf, 674 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
795 #ma l'utente non ne ha richiesto il ritorno, quindi 675 elif comparison == "onevsrest":
796 #lo elimino 676 for single_cluster in class_pat.keys():
677 t = []
678 for k in class_pat.keys():
679 if k != single_cluster:
680 t.append(class_pat.get(k))
681 rest = []
682 for i in t:
683 rest = rest + i
684
685 tmp = {}
686 count = 0
687 max_F_C = 0
688
689 for l1, l2 in zip(rest, class_pat.get(single_cluster)):
690 try:
691 stat_D, p_value = st.ks_2samp(l1, l2)
692 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
693 if not isinstance(avg, str):
694 if max_F_C < abs(avg):
695 max_F_C = abs(avg)
696 tmp[ids[count]] = [float(p_value), avg]
697 count += 1
698 except (TypeError, ZeroDivisionError):
699 count += 1
700 tab = 'result/' + single_cluster + '_vs_rest (Tabular Result).tsv'
701 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
702 tmp_csv = tmp_csv.reset_index()
703 header = ['ids', 'P_Value', 'Log2(fold change)']
704 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
705
706 if create_svg or create_pdf:
707 if args.custom_rules == 'false' or (args.custom_rules == 'true'
708 and args.custom_map != ''):
709 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
710 file_svg = 'result/' + single_cluster + '_vs_ rest (SVG Map).svg'
711 with open(file_svg, 'wb') as new_map:
712 new_map.write(ET.tostring(core_map))
797 713
714
715 if create_pdf:
716 file_pdf = 'result/' + single_cluster + '_vs_ rest (PDF Map).pdf'
717 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
718
719 if not create_svg:
720 os.remove('result/' + single_cluster + '_vs_ rest (SVG Map).svg')
721
722 elif comparison == "onevsmany":
723 for i, j in it.combinations(class_pat.keys(), 2):
724 if i != control and j != control:
725 continue
726 if i == control and j == control:
727 continue
728 tmp = {}
729 count = 0
730 max_F_C = 0
731 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
732 try:
733 stat_D, p_value = st.ks_2samp(l1, l2)
734 #sum(l1) da errore secondo me perchè ha null
735 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
736 if not isinstance(avg, str):
737 if max_F_C < abs(avg):
738 max_F_C = abs(avg)
739 tmp[ids[count]] = [float(p_value), avg]
740 count += 1
741 except (TypeError, ZeroDivisionError):
742 count += 1
743 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
744 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
745 tmp_csv = tmp_csv.reset_index()
746 header = ['ids', 'P_Value', 'Log2(fold change)']
747 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
748
749 if create_svg or create_pdf:
750 if args.custom_rules == 'false' or (args.custom_rules == 'true'
751 and args.custom_map != ''):
752 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
753 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
754 with open(file_svg, 'wb') as new_map:
755 new_map.write(ET.tostring(core_map))
756
757
758 if create_pdf:
759 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
760 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
761
762 if not create_svg:
798 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') 763 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
799 764
800 765
801 766
802 767
810 create_svg = check_bool(args.generate_svg) 775 create_svg = check_bool(args.generate_svg)
811 create_pdf = check_bool(args.generate_pdf) 776 create_pdf = check_bool(args.generate_pdf)
812 777
813 if os.path.isdir('result') == False: 778 if os.path.isdir('result') == False:
814 os.makedirs('result') 779 os.makedirs('result')
815
816 if args.custom_rules == 'true':
817 ids, rules, gene_in_rule = make_recon(args.custom_rule)
818 780
819 class_pat = {} 781 class_pat = {}
820 782
821 if args.option == 'datasets': 783 if args.option == 'datasets':
822 num = 1 784 num = 1
877 except (ET.XMLSyntaxError, ET.XMLSchemaParseError): 839 except (ET.XMLSyntaxError, ET.XMLSchemaParseError):
878 sys.exit('Execution aborted: custom map in wrong format') 840 sys.exit('Execution aborted: custom map in wrong format')
879 else: 841 else:
880 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') 842 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg')
881 843
882 maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf, args.comparison, args.control) 844 class_pat_trim = {}
845
846 for key in class_pat.keys():
847 class_pat_trim[key.strip()] = class_pat[key]
848
849
850 maps(core_map, class_pat_trim, ids, args.pValue, args.fChange, create_svg, create_pdf, args.comparison, args.control)
883 851
884 print('Execution succeded') 852 print('Execution succeded')
885 853
886 return None 854 return None
887 855