Mercurial > repos > bimib > cobraxy
comparison COBRAxy/marea.py @ 299:8ae278b4a5d5 draft
Uploaded
author | francesco_lapi |
---|---|
date | Fri, 16 May 2025 15:47:05 +0000 |
parents | 1402c2beb8f2 |
children | 24783af8a2f9 |
comparison
equal
deleted
inserted
replaced
298:bc8cbaacafd0 | 299:8ae278b4a5d5 |
---|---|
390 | 390 |
391 class ArrowColor(Enum): | 391 class ArrowColor(Enum): |
392 """ | 392 """ |
393 Encodes possible arrow colors based on their meaning in the enrichment process. | 393 Encodes possible arrow colors based on their meaning in the enrichment process. |
394 """ | 394 """ |
395 Invalid = "#BEBEBE" # gray, fold-change under treshold | 395 Invalid = "#BEBEBE" # gray, fold-change under treshold or not significant p-value |
396 Transparent = "#ffffff00" # transparent, to make some arrow segments disappear | |
396 UpRegulated = "#ecac68" # orange, up-regulated reaction | 397 UpRegulated = "#ecac68" # orange, up-regulated reaction |
397 DownRegulated = "#6495ed" # lightblue, down-regulated reaction | 398 DownRegulated = "#6495ed" # lightblue, down-regulated reaction |
398 | 399 |
399 UpRegulatedInv = "#FF0000" | 400 UpRegulatedInv = "#FF0000" |
400 # ^^^ bright red, up-regulated net value for a reversible reaction with | 401 # ^^^ bright red, up-regulated net value for a reversible reaction with |
477 if downSizedForTips: width *= 0.8 | 478 if downSizedForTips: width *= 0.8 |
478 return f";stroke:{self.col};stroke-width:{width};stroke-dasharray:{'5,5' if self.dash else 'none'}" | 479 return f";stroke:{self.col};stroke-width:{width};stroke-dasharray:{'5,5' if self.dash else 'none'}" |
479 | 480 |
480 # vvv These constants could be inside the class itself a static properties, but python | 481 # vvv These constants could be inside the class itself a static properties, but python |
481 # was built by brainless organisms so here we are! | 482 # was built by brainless organisms so here we are! |
482 INVALID_ARROW = Arrow(Arrow.MIN_W, ArrowColor.Invalid) | 483 INVALID_ARROW = Arrow(Arrow.MIN_W, ArrowColor.Invalid) |
483 INSIGNIFICANT_ARROW = Arrow(Arrow.MIN_W, ArrowColor.Invalid, isDashed = True) | 484 INSIGNIFICANT_ARROW = Arrow(Arrow.MIN_W, ArrowColor.Invalid, isDashed = True) |
485 TRANSPARENT_ARROW = Arrow(Arrow.MIN_W, ArrowColor.Transparent) # Who cares how big it is if it's transparent | |
484 | 486 |
485 # TODO: A more general version of this can be used for RAS as well, we don't need "fix map" or whatever | 487 # TODO: A more general version of this can be used for RAS as well, we don't need "fix map" or whatever |
486 def applyRpsEnrichmentToMap(rpsEnrichmentRes :Dict[str, Union[Tuple[float, FoldChange], Tuple[float, FoldChange, float, float]]], metabMap :ET.ElementTree, maxNumericZScore :float) -> None: | 488 def applyRpsEnrichmentToMap(rpsEnrichmentRes :Dict[str, Union[Tuple[float, FoldChange], Tuple[float, FoldChange, float, float]]], metabMap :ET.ElementTree, maxNumericZScore :float) -> None: |
487 """ | 489 """ |
488 Applies RPS enrichment results to the provided metabolic map. | 490 Applies RPS enrichment results to the provided metabolic map. |
708 match ARGS.test: | 710 match ARGS.test: |
709 case "ks": | 711 case "ks": |
710 # Perform Kolmogorov-Smirnov test | 712 # Perform Kolmogorov-Smirnov test |
711 _, p_value = st.ks_2samp(dataset1Data, dataset2Data) | 713 _, p_value = st.ks_2samp(dataset1Data, dataset2Data) |
712 case "ttest_p": | 714 case "ttest_p": |
715 # Datasets should have same size | |
716 if len(dataset1Data) != len(dataset2Data): | |
717 raise ValueError("Datasets must have the same size for paired t-test.") | |
713 # Perform t-test for paired samples | 718 # Perform t-test for paired samples |
714 _, p_value = st.ttest_rel(dataset1Data, dataset2Data) | 719 _, p_value = st.ttest_rel(dataset1Data, dataset2Data) |
715 case "ttest_ind": | 720 case "ttest_ind": |
716 # Perform t-test for independent samples | 721 # Perform t-test for independent samples |
717 _, p_value = st.ttest_ind(dataset1Data, dataset2Data) | 722 _, p_value = st.ttest_ind(dataset1Data, dataset2Data) |
718 case "wilcoxon": | 723 case "wilcoxon": |
724 # Datasets should have same size | |
725 if len(dataset1Data) != len(dataset2Data): | |
726 raise ValueError("Datasets must have the same size for Wilcoxon signed-rank test.") | |
719 # Perform Wilcoxon signed-rank test | 727 # Perform Wilcoxon signed-rank test |
720 _, p_value = st.wilcoxon(dataset1Data, dataset2Data) | 728 _, p_value = st.wilcoxon(dataset1Data, dataset2Data) |
721 case "mw": | 729 case "mw": |
722 # Perform Mann-Whitney U test | 730 # Perform Mann-Whitney U test |
723 _, p_value = st.mannwhitneyu(dataset1Data, dataset2Data) | 731 _, p_value = st.mannwhitneyu(dataset1Data, dataset2Data) |
734 # Calculate Z-score | 742 # Calculate Z-score |
735 z_score = (mean1 - mean2) / np.sqrt((std1**2 / n1) + (std2**2 / n2)) | 743 z_score = (mean1 - mean2) / np.sqrt((std1**2 / n1) + (std2**2 / n2)) |
736 | 744 |
737 return p_value, z_score | 745 return p_value, z_score |
738 | 746 |
739 def compareDatasetPair(dataset1Data :List[List[float]], dataset2Data :List[List[float]], ids :List[str]) -> Tuple[Dict[str, List[Union[float, FoldChange]]], float]: | 747 # TODO: the net RPS computation should be done in the RPS module |
748 def compareDatasetPair(dataset1Data :List[List[float]], dataset2Data :List[List[float]], ids :List[str]) -> Tuple[Dict[str, List[Union[float, FoldChange]]], float, Dict[str, Tuple[np.ndarray, np.ndarray]]]: | |
740 #TODO: the following code still suffers from "dumbvarnames-osis" | 749 #TODO: the following code still suffers from "dumbvarnames-osis" |
750 netRPS :Dict[str, Tuple[np.ndarray, np.ndarray]] = {} | |
741 comparisonResult :Dict[str, List[Union[float, FoldChange]]] = {} | 751 comparisonResult :Dict[str, List[Union[float, FoldChange]]] = {} |
742 count = 0 | 752 count = 0 |
743 max_z_score = 0 | 753 max_z_score = 0 |
744 | 754 |
745 for l1, l2 in zip(dataset1Data, dataset2Data): | 755 for l1, l2 in zip(dataset1Data, dataset2Data): |
754 try: position = ids.index(reactId[:-1] + ('B' if reactDir is ReactionDirection.Direct else 'F')) | 764 try: position = ids.index(reactId[:-1] + ('B' if reactDir is ReactionDirection.Direct else 'F')) |
755 except ValueError: continue # we look for the complementary id, if not found we skip | 765 except ValueError: continue # we look for the complementary id, if not found we skip |
756 | 766 |
757 nets1 = np.subtract(l1, dataset1Data[position]) | 767 nets1 = np.subtract(l1, dataset1Data[position]) |
758 nets2 = np.subtract(l2, dataset2Data[position]) | 768 nets2 = np.subtract(l2, dataset2Data[position]) |
769 netRPS[reactId] = (nets1, nets2) | |
759 | 770 |
760 p_value, z_score = computePValue(nets1, nets2) | 771 p_value, z_score = computePValue(nets1, nets2) |
761 avg1 = sum(nets1) / len(nets1) | 772 avg1 = sum(nets1) / len(nets1) |
762 avg2 = sum(nets2) / len(nets2) | 773 avg2 = sum(nets2) / len(nets2) |
763 net = fold_change(avg1, avg2) | 774 net = fold_change(avg1, avg2) |
785 # Retrieve the p-values from the comparisonResult dictionary | 796 # Retrieve the p-values from the comparisonResult dictionary |
786 reactIds = list(comparisonResult.keys()) | 797 reactIds = list(comparisonResult.keys()) |
787 pValues = [comparisonResult[reactId][0] for reactId in reactIds] | 798 pValues = [comparisonResult[reactId][0] for reactId in reactIds] |
788 | 799 |
789 # Apply the Benjamini-Hochberg correction and update | 800 # Apply the Benjamini-Hochberg correction and update |
790 adjustedPValues = st.multipletests(pValues, method="fdr_bh")[1] | 801 adjustedPValues = st.false_discovery_control(pValues)[1] |
791 for i, reactId in enumerate(reactIds): | 802 for i, reactId in enumerate(reactIds): |
792 comparisonResult[reactId][0] = adjustedPValues[i] | 803 comparisonResult[reactId][0] = adjustedPValues[i] |
793 | 804 |
794 return comparisonResult, max_z_score | 805 return comparisonResult, max_z_score, netRPS |
795 | 806 |
796 def computeEnrichment(class_pat: Dict[str, List[List[float]]], ids: List[str], *, fromRAS=True) -> List[Tuple[str, str, dict, float]]: | 807 def computeEnrichment(class_pat: Dict[str, List[List[float]]], ids: List[str], *, fromRAS=True) -> Tuple[List[Tuple[str, str, dict, float]], dict]: |
797 """ | 808 """ |
798 Compares clustered data based on a given comparison mode and applies enrichment-based styling on the | 809 Compares clustered data based on a given comparison mode and applies enrichment-based styling on the |
799 provided metabolic map. | 810 provided metabolic map. |
800 | 811 |
801 Args: | 812 Args: |
802 class_pat : the clustered data. | 813 class_pat : the clustered data. |
803 ids : ids for data association. | 814 ids : ids for data association. |
804 fromRAS : whether the data to enrich consists of RAS scores. | 815 fromRAS : whether the data to enrich consists of RAS scores. |
805 | 816 |
806 Returns: | 817 Returns: |
807 List[Tuple[str, str, dict, float]]: List of tuples with pairs of dataset names, comparison dictionary, and max z-score. | 818 tuple: A tuple containing: |
808 | 819 - List[Tuple[str, str, dict, float]]: List of tuples with pairs of dataset names, comparison dictionary and max z-score. |
820 - dict : net RPS values for each dataset's reactions | |
821 | |
809 Raises: | 822 Raises: |
810 sys.exit : if there are less than 2 classes for comparison | 823 sys.exit : if there are less than 2 classes for comparison |
811 """ | 824 """ |
812 class_pat = {k.strip(): v for k, v in class_pat.items()} | 825 class_pat = {k.strip(): v for k, v in class_pat.items()} |
813 if (not class_pat) or (len(class_pat.keys()) < 2): | 826 if (not class_pat) or (len(class_pat.keys()) < 2): |
814 sys.exit('Execution aborted: classes provided for comparisons are less than two\n') | 827 sys.exit('Execution aborted: classes provided for comparisons are less than two\n') |
815 | 828 |
829 # { datasetName : { reactId : netRPS, ... }, ... } | |
830 netRPSResults :Dict[str, Dict[str, np.ndarray]] = {} | |
816 enrichment_results = [] | 831 enrichment_results = [] |
817 | 832 |
818 if ARGS.comparison == "manyvsmany": | 833 if ARGS.comparison == "manyvsmany": |
819 for i, j in it.combinations(class_pat.keys(), 2): | 834 for i, j in it.combinations(class_pat.keys(), 2): |
820 comparisonDict, max_z_score = compareDatasetPair(class_pat.get(i), class_pat.get(j), ids) | 835 comparisonDict, max_z_score, netRPS = compareDatasetPair(class_pat.get(i), class_pat.get(j), ids) |
821 enrichment_results.append((i, j, comparisonDict, max_z_score)) | 836 enrichment_results.append((i, j, comparisonDict, max_z_score)) |
837 netRPSResults[i] = { reactId : net[0] for reactId, net in netRPS.items() } | |
838 netRPSResults[j] = { reactId : net[1] for reactId, net in netRPS.items() } | |
822 | 839 |
823 elif ARGS.comparison == "onevsrest": | 840 elif ARGS.comparison == "onevsrest": |
824 for single_cluster in class_pat.keys(): | 841 for single_cluster in class_pat.keys(): |
825 rest = [item for k, v in class_pat.items() if k != single_cluster for item in v] | 842 rest = [item for k, v in class_pat.items() if k != single_cluster for item in v] |
826 comparisonDict, max_z_score = compareDatasetPair(class_pat.get(single_cluster), rest, ids) | 843 comparisonDict, max_z_score, netRPS = compareDatasetPair(class_pat.get(single_cluster), rest, ids) |
827 enrichment_results.append((single_cluster, "rest", comparisonDict, max_z_score)) | 844 enrichment_results.append((single_cluster, "rest", comparisonDict, max_z_score)) |
845 netRPSResults[single_cluster] = { reactId : net[0] for reactId, net in netRPS.items() } | |
846 netRPSResults["rest"] = { reactId : net[1] for reactId, net in netRPS.items() } | |
828 | 847 |
829 elif ARGS.comparison == "onevsmany": | 848 elif ARGS.comparison == "onevsmany": |
830 controlItems = class_pat.get(ARGS.control) | 849 controlItems = class_pat.get(ARGS.control) |
831 for otherDataset in class_pat.keys(): | 850 for otherDataset in class_pat.keys(): |
832 if otherDataset == ARGS.control: | 851 if otherDataset == ARGS.control: |
833 continue | 852 continue |
834 comparisonDict, max_z_score = compareDatasetPair(controlItems, class_pat.get(otherDataset), ids) | 853 |
854 comparisonDict, max_z_score, netRPS = compareDatasetPair(controlItems, class_pat.get(otherDataset), ids) | |
835 enrichment_results.append((ARGS.control, otherDataset, comparisonDict, max_z_score)) | 855 enrichment_results.append((ARGS.control, otherDataset, comparisonDict, max_z_score)) |
836 | 856 netRPSResults[ARGS.control] = { reactId : net[0] for reactId, net in netRPS.items() } |
837 return enrichment_results | 857 netRPSResults[otherDataset] = { reactId : net[1] for reactId, net in netRPS.items() } |
858 | |
859 return enrichment_results, netRPSResults | |
838 | 860 |
839 def createOutputMaps(dataset1Name: str, dataset2Name: str, core_map: ET.ElementTree) -> None: | 861 def createOutputMaps(dataset1Name: str, dataset2Name: str, core_map: ET.ElementTree) -> None: |
840 svgFilePath = buildOutputPath(dataset1Name, dataset2Name, details="SVG Map", ext=utils.FileFormat.SVG) | 862 svgFilePath = buildOutputPath(dataset1Name, dataset2Name, details="SVG Map", ext=utils.FileFormat.SVG) |
841 utils.writeSvg(svgFilePath, core_map) | 863 utils.writeSvg(svgFilePath, core_map) |
842 | 864 |
855 | 877 |
856 if not ARGS.generate_svg: # This argument is useless, who cares if the user wants the svg or not | 878 if not ARGS.generate_svg: # This argument is useless, who cares if the user wants the svg or not |
857 os.remove(svgFilePath.show()) | 879 os.remove(svgFilePath.show()) |
858 | 880 |
859 ClassPat = Dict[str, List[List[float]]] | 881 ClassPat = Dict[str, List[List[float]]] |
860 def getClassesAndIdsFromDatasets(datasetsPaths :List[str], datasetPath :str, classPath :str, names :List[str]) -> Tuple[List[str], ClassPat]: | 882 def getClassesAndIdsFromDatasets(datasetsPaths :List[str], datasetPath :str, classPath :str, names :List[str]) -> Tuple[List[str], ClassPat, Dict[str, List[str]]]: |
861 # TODO: I suggest creating dicts with ids as keys instead of keeping class_pat and ids separate, | 883 # TODO: I suggest creating dicts with ids as keys instead of keeping class_pat and ids separate, |
862 # for the sake of everyone's sanity. | 884 # for the sake of everyone's sanity. |
885 columnNames :Dict[str, List[str]] = {} # { datasetName : [ columnName, ... ], ... } | |
863 class_pat :ClassPat = {} | 886 class_pat :ClassPat = {} |
864 if ARGS.option == 'datasets': | 887 if ARGS.option == 'datasets': |
865 num = 1 | 888 num = 1 |
866 for path, name in zip(datasetsPaths, names): | 889 for path, name in zip(datasetsPaths, names): |
867 name = str(name) | 890 name = str(name) |
868 if name == 'Dataset': | 891 if name == 'Dataset': |
869 name += '_' + str(num) | 892 name += '_' + str(num) |
870 | 893 |
871 values, ids = getDatasetValues(path, name) | 894 values, ids = getDatasetValues(path, name) |
872 if values != None: | 895 if values != None: |
873 class_pat[name] = list(map(list, zip(*values.values()))) | 896 class_pat[name] = list(map(list, zip(*values.values()))) # TODO: ??? |
874 # TODO: ??? | 897 columnNames[name] = list(values.keys()) |
875 | 898 |
876 num += 1 | 899 num += 1 |
877 | 900 |
878 elif ARGS.option == "dataset_class": | 901 elif ARGS.option == "dataset_class": |
879 classes = read_dataset(classPath, "class") | 902 classes = read_dataset(classPath, "class") |
880 classes = classes.astype(str) | 903 classes = classes.astype(str) |
881 | 904 |
882 values, ids = getDatasetValues(datasetPath, "Dataset Class (not actual name)") | 905 values, ids = getDatasetValues(datasetPath, "Dataset Class (not actual name)") |
883 if values != None: class_pat = split_class(classes, values) | 906 if values != None: |
884 | 907 # TODO: add the columnNames thing, I didn't because I don't understand the whole "dataset classes" thing |
885 return ids, class_pat | 908 class_pat = split_class(classes, values) |
909 | |
910 return ids, class_pat, columnNames | |
886 #^^^ TODO: this could be a match statement over an enum, make it happen future marea dev with python 3.12! (it's why I kept the ifs) | 911 #^^^ TODO: this could be a match statement over an enum, make it happen future marea dev with python 3.12! (it's why I kept the ifs) |
887 | 912 |
888 #TODO: create these damn args as FilePath objects | 913 #TODO: create these damn args as FilePath objects |
889 def getDatasetValues(datasetPath :str, datasetName :str) -> Tuple[ClassPat, List[str]]: | 914 def getDatasetValues(datasetPath :str, datasetName :str) -> Tuple[ClassPat, List[str]]: |
890 """ | 915 """ |
930 # Prepare enrichment results containers | 955 # Prepare enrichment results containers |
931 ras_results = [] | 956 ras_results = [] |
932 rps_results = [] | 957 rps_results = [] |
933 | 958 |
934 # Compute RAS enrichment if requested | 959 # Compute RAS enrichment if requested |
935 if ARGS.using_RAS: | 960 if ARGS.using_RAS: # vvv columnNames only matter with RPS data |
936 ids_ras, class_pat_ras = getClassesAndIdsFromDatasets( | 961 ids_ras, class_pat_ras, _ = getClassesAndIdsFromDatasets( |
937 ARGS.input_datas, ARGS.input_data, ARGS.input_class, ARGS.names) | 962 ARGS.input_datas, ARGS.input_data, ARGS.input_class, ARGS.names) |
938 ras_results = computeEnrichment(class_pat_ras, ids_ras, fromRAS=True) | 963 ras_results, _ = computeEnrichment(class_pat_ras, ids_ras, fromRAS=True) |
964 # ^^^ netRPS only matter with RPS data | |
939 | 965 |
940 # Compute RPS enrichment if requested | 966 # Compute RPS enrichment if requested |
941 if ARGS.using_RPS: | 967 if ARGS.using_RPS: |
942 ids_rps, class_pat_rps = getClassesAndIdsFromDatasets( | 968 ids_rps, class_pat_rps, columnNames = getClassesAndIdsFromDatasets( |
943 ARGS.input_datas_rps, ARGS.input_data_rps, ARGS.input_class_rps, ARGS.names_rps) | 969 ARGS.input_datas_rps, ARGS.input_data_rps, ARGS.input_class_rps, ARGS.names_rps) |
944 rps_results = computeEnrichment(class_pat_rps, ids_rps, fromRAS=False) | 970 |
971 rps_results, netRPS = computeEnrichment(class_pat_rps, ids_rps, fromRAS=False) | |
945 | 972 |
946 # Organize by comparison pairs | 973 # Organize by comparison pairs |
947 comparisons: Dict[Tuple[str, str], Dict[str, Tuple]] = {} | 974 comparisons: Dict[Tuple[str, str], Dict[str, Tuple]] = {} |
948 for i, j, comparison_data, max_z_score in ras_results: | 975 for i, j, comparison_data, max_z_score in ras_results: |
949 comparisons[(i, j)] = {'ras': (comparison_data, max_z_score), 'rps': None} | 976 comparisons[(i, j)] = {'ras': (comparison_data, max_z_score), 'rps': None} |
950 for i, j, comparison_data, max_z_score in rps_results: | 977 |
978 for i, j, comparison_data, max_z_score, in rps_results: | |
951 comparisons.setdefault((i, j), {}).update({'rps': (comparison_data, max_z_score)}) | 979 comparisons.setdefault((i, j), {}).update({'rps': (comparison_data, max_z_score)}) |
952 | 980 |
953 # For each comparison, create a styled map with RAS bodies and RPS heads | 981 # For each comparison, create a styled map with RAS bodies and RPS heads |
954 for (i, j), res in comparisons.items(): | 982 for (i, j), res in comparisons.items(): |
955 map_copy = copy.deepcopy(core_map) | 983 map_copy = copy.deepcopy(core_map) |
965 # applyRpsEnrichmentToMap styles only heads unless only RPS are used | 993 # applyRpsEnrichmentToMap styles only heads unless only RPS are used |
966 temp_thingsInCommon(tmp_rps, map_copy, max_z_rps, i, j, ras_enrichment=False) | 994 temp_thingsInCommon(tmp_rps, map_copy, max_z_rps, i, j, ras_enrichment=False) |
967 | 995 |
968 # Output both SVG and PDF/PNG as configured | 996 # Output both SVG and PDF/PNG as configured |
969 createOutputMaps(i, j, map_copy) | 997 createOutputMaps(i, j, map_copy) |
998 | |
999 # Add net RPS output file | |
1000 if ARGS.net or not ARGS.using_RAS: | |
1001 for datasetName, rows in netRPS.items(): | |
1002 writeToCsv( | |
1003 [[reactId, *netValues] for reactId, netValues in rows.items()], | |
1004 # vvv In weird comparison modes the dataset names are not recorded properly.. | |
1005 columnNames.get(datasetName, ["Reactions", "other cool column names.."]), | |
1006 utils.FilePath( | |
1007 datasetName, ext = utils.FileFormat.CSV, prefix = ARGS.output_path)) | |
1008 | |
970 print('Execution succeeded') | 1009 print('Execution succeeded') |
971 ############################################################################### | 1010 ############################################################################### |
972 if __name__ == "__main__": | 1011 if __name__ == "__main__": |
973 main() | 1012 main() |