Mercurial > repos > bimib > marea
comparison Marea/marea.py @ 64:53b4fca5bdf9 draft
Uploaded
author | bimib |
---|---|
date | Mon, 16 Mar 2020 07:47:12 -0400 |
parents | 69dfed656e0b |
children | a61733753aec |
comparison
equal
deleted
inserted
replaced
63:b8cbc2122d55 | 64:53b4fca5bdf9 |
---|---|
535 split_rules.append([]) | 535 split_rules.append([]) |
536 if err_rules: | 536 if err_rules: |
537 warning('Warning: wrong format rule in ' + str(err_rules) + '\n') | 537 warning('Warning: wrong format rule in ' + str(err_rules) + '\n') |
538 return (split_rules, list(set(tmp_gene_in_rule))) | 538 return (split_rules, list(set(tmp_gene_in_rule))) |
539 | 539 |
540 def make_recon(data): | |
541 try: | |
542 import cobra as cb | |
543 import warnings | |
544 with warnings.catch_warnings(): | |
545 warnings.simplefilter('ignore') | |
546 recon = cb.io.read_sbml_model(data) | |
547 react = recon.reactions | |
548 rules = [react[i].gene_reaction_rule for i in range(len(react))] | |
549 ids = [react[i].id for i in range(len(react))] | |
550 except cb.io.sbml.CobraSBMLError: | |
551 try: | |
552 data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('') | |
553 if len(data.columns) < 2: | |
554 sys.exit('Execution aborted: wrong format of '+ | |
555 'custom datarules\n') | |
556 if not len(data.columns) == 2: | |
557 warning('Warning: more than 2 columns in custom datarules.\n' + | |
558 'Extra columns have been disregarded\n') | |
559 ids = list(data.iloc[:, 0]) | |
560 rules = list(data.iloc[:, 1]) | |
561 except pd.errors.EmptyDataError: | |
562 sys.exit('Execution aborted: wrong format of custom datarules\n') | |
563 except pd.errors.ParserError: | |
564 sys.exit('Execution aborted: wrong format of custom datarules\n') | |
565 split_rules, tmp_genes = do_rules(rules) | |
566 gene_in_rule = {} | |
567 for i in tmp_genes: | |
568 gene_in_rule[i] = 'ok' | |
569 return (ids, split_rules, gene_in_rule) | |
540 | 570 |
541 ############################ gene ############################################# | 571 ############################ gene ############################################# |
542 | 572 |
543 def data_gene(gene, type_gene, name, gene_custom): | 573 def data_gene(gene, type_gene, name, gene_custom): |
544 args = process_args(sys.argv) | 574 args = process_args(sys.argv) |
633 if (not class_pat) or (len(class_pat.keys()) < 2): | 663 if (not class_pat) or (len(class_pat.keys()) < 2): |
634 sys.exit('Execution aborted: classes provided for comparisons are ' + | 664 sys.exit('Execution aborted: classes provided for comparisons are ' + |
635 'less than two\n') | 665 'less than two\n') |
636 | 666 |
637 if comparison == "manyvsmany": | 667 if comparison == "manyvsmany": |
668 for i, j in it.combinations(class_pat.keys(), 2): | |
669 | |
670 tmp = {} | |
671 count = 0 | |
672 max_F_C = 0 | |
673 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): | |
674 try: | |
675 stat_D, p_value = st.ks_2samp(l1, l2) | |
676 #sum(l1) da errore secondo me perchè ha null | |
677 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) | |
678 if not isinstance(avg, str): | |
679 if max_F_C < abs(avg): | |
680 max_F_C = abs(avg) | |
681 tmp[ids[count]] = [float(p_value), avg] | |
682 count += 1 | |
683 except (TypeError, ZeroDivisionError): | |
684 count += 1 | |
685 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' | |
686 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | |
687 tmp_csv = tmp_csv.reset_index() | |
688 header = ['ids', 'P_Value', 'Log2(fold change)'] | |
689 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | |
690 | |
691 if create_svg or create_pdf: | |
692 if args.custom_rules == 'false' or (args.custom_rules == 'true' | |
693 and args.custom_map != ''): | |
694 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | |
695 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' | |
696 with open(file_svg, 'wb') as new_map: | |
697 new_map.write(ET.tostring(core_map)) | |
698 | |
699 | |
700 if create_pdf: | |
701 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' | |
702 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | |
703 | |
704 if not create_svg: | |
705 #Ho utilizzato il file svg per generare il pdf, | |
706 #ma l'utente non ne ha richiesto il ritorno, quindi | |
707 #lo elimino | |
708 | |
709 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') | |
710 elif comparison == "onevsrest": | |
711 for single_cluster in class_pat.keys(): | |
712 t = [] | |
713 for k in class_pat.keys(): | |
714 if k != single_cluster: | |
715 t.append(class_pat.get(k)) | |
716 rest = [] | |
717 for i in t: | |
718 rest = rest + i | |
719 | |
720 tmp = {} | |
721 count = 0 | |
722 max_F_C = 0 | |
723 | |
724 for l1, l2 in zip(rest, class_pat.get(single_cluster)): | |
725 try: | |
726 stat_D, p_value = st.ks_2samp(l1, l2) | |
727 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) | |
728 if not isinstance(avg, str): | |
729 if max_F_C < abs(avg): | |
730 max_F_C = abs(avg) | |
731 tmp[ids[count]] = [float(p_value), avg] | |
732 count += 1 | |
733 except (TypeError, ZeroDivisionError): | |
734 count += 1 | |
735 tab = 'result/' + single_cluster + '_vs_rest (Tabular Result).tsv' | |
736 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | |
737 tmp_csv = tmp_csv.reset_index() | |
738 header = ['ids', 'P_Value', 'Log2(fold change)'] | |
739 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | |
740 | |
741 if create_svg or create_pdf: | |
742 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' | |
743 and args.yes_no == 'yes'): | |
744 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | |
745 file_svg = 'result/' + single_cluster + '_vs_ rest (SVG Map).svg' | |
746 with open(file_svg, 'wb') as new_map: | |
747 new_map.write(ET.tostring(core_map)) | |
748 | |
749 | |
750 if create_pdf: | |
751 file_pdf = 'result/' + single_cluster + '_vs_ rest (PDF Map).pdf' | |
752 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | |
753 | |
754 if not create_svg: | |
755 os.remove('result/' + single_cluster + '_vs_ rest (SVG Map).svg') | |
756 | |
757 elif comparison == "onevsmany": | |
638 for i, j in it.combinations(class_pat.keys(), 2): | 758 for i, j in it.combinations(class_pat.keys(), 2): |
639 tmp = {} | 759 tmp = {} |
640 count = 0 | 760 count = 0 |
641 max_F_C = 0 | 761 max_F_C = 0 |
642 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): | 762 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): |
656 tmp_csv = tmp_csv.reset_index() | 776 tmp_csv = tmp_csv.reset_index() |
657 header = ['ids', 'P_Value', 'Log2(fold change)'] | 777 header = ['ids', 'P_Value', 'Log2(fold change)'] |
658 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | 778 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) |
659 | 779 |
660 if create_svg or create_pdf: | 780 if create_svg or create_pdf: |
661 if args.custom_rules == 'false' or (args.custom_rules == 'true' | 781 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' |
662 and args.custom_map != ''): | 782 and args.yes_no == 'yes'): |
663 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | 783 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) |
664 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' | 784 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' |
665 with open(file_svg, 'wb') as new_map: | 785 with open(file_svg, 'wb') as new_map: |
666 new_map.write(ET.tostring(core_map)) | 786 new_map.write(ET.tostring(core_map)) |
667 | 787 |
669 if create_pdf: | 789 if create_pdf: |
670 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' | 790 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' |
671 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | 791 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) |
672 | 792 |
673 if not create_svg: | 793 if not create_svg: |
674 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') | 794 #Ho utilizzato il file svg per generare il pdf, |
675 elif comparison == "onevsrest": | 795 #ma l'utente non ne ha richiesto il ritorno, quindi |
676 for single_cluster in class_pat.keys(): | 796 #lo elimino |
677 t = [] | |
678 for k in class_pat.keys(): | |
679 if k != single_cluster: | |
680 t.append(class_pat.get(k)) | |
681 rest = [] | |
682 for i in t: | |
683 rest = rest + i | |
684 | |
685 tmp = {} | |
686 count = 0 | |
687 max_F_C = 0 | |
688 | |
689 for l1, l2 in zip(rest, class_pat.get(single_cluster)): | |
690 try: | |
691 stat_D, p_value = st.ks_2samp(l1, l2) | |
692 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) | |
693 if not isinstance(avg, str): | |
694 if max_F_C < abs(avg): | |
695 max_F_C = abs(avg) | |
696 tmp[ids[count]] = [float(p_value), avg] | |
697 count += 1 | |
698 except (TypeError, ZeroDivisionError): | |
699 count += 1 | |
700 tab = 'result/' + single_cluster + '_vs_rest (Tabular Result).tsv' | |
701 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | |
702 tmp_csv = tmp_csv.reset_index() | |
703 header = ['ids', 'P_Value', 'Log2(fold change)'] | |
704 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | |
705 | |
706 if create_svg or create_pdf: | |
707 if args.custom_rules == 'false' or (args.custom_rules == 'true' | |
708 and args.custom_map != ''): | |
709 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | |
710 file_svg = 'result/' + single_cluster + '_vs_ rest (SVG Map).svg' | |
711 with open(file_svg, 'wb') as new_map: | |
712 new_map.write(ET.tostring(core_map)) | |
713 | 797 |
714 | |
715 if create_pdf: | |
716 file_pdf = 'result/' + single_cluster + '_vs_ rest (PDF Map).pdf' | |
717 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | |
718 | |
719 if not create_svg: | |
720 os.remove('result/' + single_cluster + '_vs_ rest (SVG Map).svg') | |
721 | |
722 elif comparison == "onevsmany": | |
723 for i, j in it.combinations(class_pat.keys(), 2): | |
724 if i != control and j != control: | |
725 continue | |
726 if i == control and j == control: | |
727 continue | |
728 tmp = {} | |
729 count = 0 | |
730 max_F_C = 0 | |
731 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): | |
732 try: | |
733 stat_D, p_value = st.ks_2samp(l1, l2) | |
734 #sum(l1) da errore secondo me perchè ha null | |
735 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) | |
736 if not isinstance(avg, str): | |
737 if max_F_C < abs(avg): | |
738 max_F_C = abs(avg) | |
739 tmp[ids[count]] = [float(p_value), avg] | |
740 count += 1 | |
741 except (TypeError, ZeroDivisionError): | |
742 count += 1 | |
743 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' | |
744 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | |
745 tmp_csv = tmp_csv.reset_index() | |
746 header = ['ids', 'P_Value', 'Log2(fold change)'] | |
747 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | |
748 | |
749 if create_svg or create_pdf: | |
750 if args.custom_rules == 'false' or (args.custom_rules == 'true' | |
751 and args.custom_map != ''): | |
752 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | |
753 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' | |
754 with open(file_svg, 'wb') as new_map: | |
755 new_map.write(ET.tostring(core_map)) | |
756 | |
757 | |
758 if create_pdf: | |
759 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' | |
760 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | |
761 | |
762 if not create_svg: | |
763 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') | 798 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') |
764 | 799 |
765 | 800 |
766 | 801 |
767 | 802 |
775 create_svg = check_bool(args.generate_svg) | 810 create_svg = check_bool(args.generate_svg) |
776 create_pdf = check_bool(args.generate_pdf) | 811 create_pdf = check_bool(args.generate_pdf) |
777 | 812 |
778 if os.path.isdir('result') == False: | 813 if os.path.isdir('result') == False: |
779 os.makedirs('result') | 814 os.makedirs('result') |
815 | |
816 if args.custom_rules == 'true': | |
817 ids, rules, gene_in_rule = make_recon(args.custom_rule) | |
780 | 818 |
781 class_pat = {} | 819 class_pat = {} |
782 | 820 |
783 if args.option == 'datasets': | 821 if args.option == 'datasets': |
784 num = 1 | 822 num = 1 |