comparison Marea/marea.py @ 64:53b4fca5bdf9 draft

Uploaded
author bimib
date Mon, 16 Mar 2020 07:47:12 -0400
parents 69dfed656e0b
children a61733753aec
comparison
equal deleted inserted replaced
63:b8cbc2122d55 64:53b4fca5bdf9
535 split_rules.append([]) 535 split_rules.append([])
536 if err_rules: 536 if err_rules:
537 warning('Warning: wrong format rule in ' + str(err_rules) + '\n') 537 warning('Warning: wrong format rule in ' + str(err_rules) + '\n')
538 return (split_rules, list(set(tmp_gene_in_rule))) 538 return (split_rules, list(set(tmp_gene_in_rule)))
539 539
540 def make_recon(data):
541 try:
542 import cobra as cb
543 import warnings
544 with warnings.catch_warnings():
545 warnings.simplefilter('ignore')
546 recon = cb.io.read_sbml_model(data)
547 react = recon.reactions
548 rules = [react[i].gene_reaction_rule for i in range(len(react))]
549 ids = [react[i].id for i in range(len(react))]
550 except cb.io.sbml.CobraSBMLError:
551 try:
552 data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('')
553 if len(data.columns) < 2:
554 sys.exit('Execution aborted: wrong format of '+
555 'custom datarules\n')
556 if not len(data.columns) == 2:
557 warning('Warning: more than 2 columns in custom datarules.\n' +
558 'Extra columns have been disregarded\n')
559 ids = list(data.iloc[:, 0])
560 rules = list(data.iloc[:, 1])
561 except pd.errors.EmptyDataError:
562 sys.exit('Execution aborted: wrong format of custom datarules\n')
563 except pd.errors.ParserError:
564 sys.exit('Execution aborted: wrong format of custom datarules\n')
565 split_rules, tmp_genes = do_rules(rules)
566 gene_in_rule = {}
567 for i in tmp_genes:
568 gene_in_rule[i] = 'ok'
569 return (ids, split_rules, gene_in_rule)
540 570
541 ############################ gene ############################################# 571 ############################ gene #############################################
542 572
543 def data_gene(gene, type_gene, name, gene_custom): 573 def data_gene(gene, type_gene, name, gene_custom):
544 args = process_args(sys.argv) 574 args = process_args(sys.argv)
633 if (not class_pat) or (len(class_pat.keys()) < 2): 663 if (not class_pat) or (len(class_pat.keys()) < 2):
634 sys.exit('Execution aborted: classes provided for comparisons are ' + 664 sys.exit('Execution aborted: classes provided for comparisons are ' +
635 'less than two\n') 665 'less than two\n')
636 666
637 if comparison == "manyvsmany": 667 if comparison == "manyvsmany":
668 for i, j in it.combinations(class_pat.keys(), 2):
669
670 tmp = {}
671 count = 0
672 max_F_C = 0
673 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
674 try:
675 stat_D, p_value = st.ks_2samp(l1, l2)
676 #sum(l1) da errore secondo me perchè ha null
677 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
678 if not isinstance(avg, str):
679 if max_F_C < abs(avg):
680 max_F_C = abs(avg)
681 tmp[ids[count]] = [float(p_value), avg]
682 count += 1
683 except (TypeError, ZeroDivisionError):
684 count += 1
685 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
686 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
687 tmp_csv = tmp_csv.reset_index()
688 header = ['ids', 'P_Value', 'Log2(fold change)']
689 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
690
691 if create_svg or create_pdf:
692 if args.custom_rules == 'false' or (args.custom_rules == 'true'
693 and args.custom_map != ''):
694 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
695 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
696 with open(file_svg, 'wb') as new_map:
697 new_map.write(ET.tostring(core_map))
698
699
700 if create_pdf:
701 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
702 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
703
704 if not create_svg:
705 #Ho utilizzato il file svg per generare il pdf,
706 #ma l'utente non ne ha richiesto il ritorno, quindi
707 #lo elimino
708
709 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
710 elif comparison == "onevsrest":
711 for single_cluster in class_pat.keys():
712 t = []
713 for k in class_pat.keys():
714 if k != single_cluster:
715 t.append(class_pat.get(k))
716 rest = []
717 for i in t:
718 rest = rest + i
719
720 tmp = {}
721 count = 0
722 max_F_C = 0
723
724 for l1, l2 in zip(rest, class_pat.get(single_cluster)):
725 try:
726 stat_D, p_value = st.ks_2samp(l1, l2)
727 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
728 if not isinstance(avg, str):
729 if max_F_C < abs(avg):
730 max_F_C = abs(avg)
731 tmp[ids[count]] = [float(p_value), avg]
732 count += 1
733 except (TypeError, ZeroDivisionError):
734 count += 1
735 tab = 'result/' + single_cluster + '_vs_rest (Tabular Result).tsv'
736 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
737 tmp_csv = tmp_csv.reset_index()
738 header = ['ids', 'P_Value', 'Log2(fold change)']
739 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
740
741 if create_svg or create_pdf:
742 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
743 and args.yes_no == 'yes'):
744 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
745 file_svg = 'result/' + single_cluster + '_vs_ rest (SVG Map).svg'
746 with open(file_svg, 'wb') as new_map:
747 new_map.write(ET.tostring(core_map))
748
749
750 if create_pdf:
751 file_pdf = 'result/' + single_cluster + '_vs_ rest (PDF Map).pdf'
752 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
753
754 if not create_svg:
755 os.remove('result/' + single_cluster + '_vs_ rest (SVG Map).svg')
756
757 elif comparison == "onevsmany":
638 for i, j in it.combinations(class_pat.keys(), 2): 758 for i, j in it.combinations(class_pat.keys(), 2):
639 tmp = {} 759 tmp = {}
640 count = 0 760 count = 0
641 max_F_C = 0 761 max_F_C = 0
642 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): 762 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
656 tmp_csv = tmp_csv.reset_index() 776 tmp_csv = tmp_csv.reset_index()
657 header = ['ids', 'P_Value', 'Log2(fold change)'] 777 header = ['ids', 'P_Value', 'Log2(fold change)']
658 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) 778 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
659 779
660 if create_svg or create_pdf: 780 if create_svg or create_pdf:
661 if args.custom_rules == 'false' or (args.custom_rules == 'true' 781 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
662 and args.custom_map != ''): 782 and args.yes_no == 'yes'):
663 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) 783 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
664 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' 784 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
665 with open(file_svg, 'wb') as new_map: 785 with open(file_svg, 'wb') as new_map:
666 new_map.write(ET.tostring(core_map)) 786 new_map.write(ET.tostring(core_map))
667 787
669 if create_pdf: 789 if create_pdf:
670 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' 790 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
671 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) 791 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
672 792
673 if not create_svg: 793 if not create_svg:
674 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') 794 #Ho utilizzato il file svg per generare il pdf,
675 elif comparison == "onevsrest": 795 #ma l'utente non ne ha richiesto il ritorno, quindi
676 for single_cluster in class_pat.keys(): 796 #lo elimino
677 t = []
678 for k in class_pat.keys():
679 if k != single_cluster:
680 t.append(class_pat.get(k))
681 rest = []
682 for i in t:
683 rest = rest + i
684
685 tmp = {}
686 count = 0
687 max_F_C = 0
688
689 for l1, l2 in zip(rest, class_pat.get(single_cluster)):
690 try:
691 stat_D, p_value = st.ks_2samp(l1, l2)
692 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
693 if not isinstance(avg, str):
694 if max_F_C < abs(avg):
695 max_F_C = abs(avg)
696 tmp[ids[count]] = [float(p_value), avg]
697 count += 1
698 except (TypeError, ZeroDivisionError):
699 count += 1
700 tab = 'result/' + single_cluster + '_vs_rest (Tabular Result).tsv'
701 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
702 tmp_csv = tmp_csv.reset_index()
703 header = ['ids', 'P_Value', 'Log2(fold change)']
704 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
705
706 if create_svg or create_pdf:
707 if args.custom_rules == 'false' or (args.custom_rules == 'true'
708 and args.custom_map != ''):
709 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
710 file_svg = 'result/' + single_cluster + '_vs_ rest (SVG Map).svg'
711 with open(file_svg, 'wb') as new_map:
712 new_map.write(ET.tostring(core_map))
713 797
714
715 if create_pdf:
716 file_pdf = 'result/' + single_cluster + '_vs_ rest (PDF Map).pdf'
717 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
718
719 if not create_svg:
720 os.remove('result/' + single_cluster + '_vs_ rest (SVG Map).svg')
721
722 elif comparison == "onevsmany":
723 for i, j in it.combinations(class_pat.keys(), 2):
724 if i != control and j != control:
725 continue
726 if i == control and j == control:
727 continue
728 tmp = {}
729 count = 0
730 max_F_C = 0
731 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
732 try:
733 stat_D, p_value = st.ks_2samp(l1, l2)
734 #sum(l1) da errore secondo me perchè ha null
735 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
736 if not isinstance(avg, str):
737 if max_F_C < abs(avg):
738 max_F_C = abs(avg)
739 tmp[ids[count]] = [float(p_value), avg]
740 count += 1
741 except (TypeError, ZeroDivisionError):
742 count += 1
743 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
744 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
745 tmp_csv = tmp_csv.reset_index()
746 header = ['ids', 'P_Value', 'Log2(fold change)']
747 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
748
749 if create_svg or create_pdf:
750 if args.custom_rules == 'false' or (args.custom_rules == 'true'
751 and args.custom_map != ''):
752 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
753 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
754 with open(file_svg, 'wb') as new_map:
755 new_map.write(ET.tostring(core_map))
756
757
758 if create_pdf:
759 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
760 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
761
762 if not create_svg:
763 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') 798 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
764 799
765 800
766 801
767 802
775 create_svg = check_bool(args.generate_svg) 810 create_svg = check_bool(args.generate_svg)
776 create_pdf = check_bool(args.generate_pdf) 811 create_pdf = check_bool(args.generate_pdf)
777 812
778 if os.path.isdir('result') == False: 813 if os.path.isdir('result') == False:
779 os.makedirs('result') 814 os.makedirs('result')
815
816 if args.custom_rules == 'true':
817 ids, rules, gene_in_rule = make_recon(args.custom_rule)
780 818
781 class_pat = {} 819 class_pat = {}
782 820
783 if args.option == 'datasets': 821 if args.option == 'datasets':
784 num = 1 822 num = 1