comparison COBRAxy/flux_to_map.py @ 148:5683406a8cfd draft

Uploaded
author luca_milaz
date Wed, 06 Nov 2024 20:57:11 +0000 (2 months ago)
parents 3fca9b568faf
children 8e3cbf68cdc4
comparison
equal deleted inserted replaced
147:3fca9b568faf 148:5683406a8cfd
731 tmp[reactId] = [float(p_value), avg, z_score, avg1, avg2] 731 tmp[reactId] = [float(p_value), avg, z_score, avg1, avg2]
732 except (TypeError, ZeroDivisionError): continue 732 except (TypeError, ZeroDivisionError): continue
733 733
734 return tmp, max_z_score 734 return tmp, max_z_score
735 735
736 def computeEnrichment(metabMap :ET.ElementTree, class_pat :Dict[str, List[List[float]]], ids :List[str]) -> None: 736 def computeEnrichment(metabMap :ET.ElementTree, class_pat :Dict[str, List[List[float]]], ids :List[str]) -> List[Tuple[str, str, dict, float]]:
737 """ 737 """
738 Compares clustered data based on a given comparison mode and applies enrichment-based styling on the 738 Compares clustered data based on a given comparison mode and applies enrichment-based styling on the
739 provided metabolic map. 739 provided metabolic map.
740 740
741 Args: 741 Args:
743 class_pat : the clustered data. 743 class_pat : the clustered data.
744 ids : ids for data association. 744 ids : ids for data association.
745 745
746 746
747 Returns: 747 Returns:
748 None 748 List[Tuple[str, str, dict, float]]: List of tuples with pairs of dataset names, comparison dictionary, and max z-score.
749 749
750 Raises: 750 Raises:
751 sys.exit : if there are less than 2 classes for comparison 751 sys.exit : if there are less than 2 classes for comparison
752 752
753 Side effects: 753 Side effects:
756 """ 756 """
757 class_pat = { k.strip() : v for k, v in class_pat.items() } 757 class_pat = { k.strip() : v for k, v in class_pat.items() }
758 #TODO: simplfy this stuff vvv and stop using sys.exit (raise the correct utils error) 758 #TODO: simplfy this stuff vvv and stop using sys.exit (raise the correct utils error)
759 if (not class_pat) or (len(class_pat.keys()) < 2): sys.exit('Execution aborted: classes provided for comparisons are less than two\n') 759 if (not class_pat) or (len(class_pat.keys()) < 2): sys.exit('Execution aborted: classes provided for comparisons are less than two\n')
760 760
761 enrichment_results = []
762
763
761 if ARGS.comparison == "manyvsmany": 764 if ARGS.comparison == "manyvsmany":
762 for i, j in it.combinations(class_pat.keys(), 2): 765 for i, j in it.combinations(class_pat.keys(), 2):
763 #TODO: these 2 functions are always called in pair and in this order and need common data,
764 # some clever refactoring would be appreciated.
765 comparisonDict, max_z_score = compareDatasetPair(class_pat.get(i), class_pat.get(j), ids) 766 comparisonDict, max_z_score = compareDatasetPair(class_pat.get(i), class_pat.get(j), ids)
766 temp_thingsInCommon(comparisonDict, metabMap, max_z_score, i, j) 767 enrichment_results.append((i, j, comparisonDict, max_z_score))
768
769
767 770
768 elif ARGS.comparison == "onevsrest": 771 elif ARGS.comparison == "onevsrest":
769 for single_cluster in class_pat.keys(): 772 for single_cluster in class_pat.keys():
770 t :List[List[List[float]]] = [] 773 rest = [item for k, v in class_pat.items() if k != single_cluster for item in v]
771 for k in class_pat.keys():
772 if k != single_cluster:
773 t.append(class_pat.get(k))
774
775 rest :List[List[float]] = []
776 for i in t:
777 rest = rest + i
778
779 comparisonDict, max_z_score = compareDatasetPair(class_pat.get(single_cluster), rest, ids) 774 comparisonDict, max_z_score = compareDatasetPair(class_pat.get(single_cluster), rest, ids)
780 temp_thingsInCommon(comparisonDict, metabMap, max_z_score, single_cluster) 775 enrichment_results.append((single_cluster, "rest", comparisonDict, max_z_score))
781 776
782 elif ARGS.comparison == "onevsmany": 777 elif ARGS.comparison == "onevsmany":
783 controlItems = class_pat.get(ARGS.control) 778 controlItems = class_pat.get(ARGS.control)
784 for otherDataset in class_pat.keys(): 779 for otherDataset in class_pat.keys():
785 if otherDataset == ARGS.control: continue 780 if otherDataset == ARGS.control:
786 781 continue
787 comparisonDict, max_z_score = compareDatasetPair(controlItems, class_pat.get(otherDataset), ids) 782 comparisonDict, max_z_score = compareDatasetPair(controlItems, class_pat.get(otherDataset), ids)
788 temp_thingsInCommon(comparisonDict, metabMap, max_z_score, ARGS.control, otherDataset) 783 enrichment_results.append((ARGS.control, otherDataset, comparisonDict, max_z_score))
784 return enrichment_results
789 785
790 def createOutputMaps(dataset1Name :str, dataset2Name :str, core_map :ET.ElementTree) -> None: 786 def createOutputMaps(dataset1Name :str, dataset2Name :str, core_map :ET.ElementTree) -> None:
791 svgFilePath = buildOutputPath(dataset1Name, dataset2Name, details = "SVG Map", ext = utils.FileFormat.SVG) 787 svgFilePath = buildOutputPath(dataset1Name, dataset2Name, details="SVG Map", ext=utils.FileFormat.SVG)
792 utils.writeSvg(svgFilePath, core_map) 788 utils.writeSvg(svgFilePath, core_map)
793 789
794 if ARGS.generate_pdf: 790 if ARGS.generate_pdf:
795 pngPath = buildOutputPath(dataset1Name, dataset2Name, details = "PNG Map", ext = utils.FileFormat.PNG) 791 pngPath = buildOutputPath(dataset1Name, dataset2Name, details="PNG Map", ext=utils.FileFormat.PNG)
796 pdfPath = buildOutputPath(dataset1Name, dataset2Name, details = "PDF Map", ext = utils.FileFormat.PDF) 792 pdfPath = buildOutputPath(dataset1Name, dataset2Name, details="PDF Map", ext=utils.FileFormat.PDF)
797 convert_to_pdf(svgFilePath, pngPath, pdfPath) 793 convert_to_pdf(svgFilePath, pngPath, pdfPath)
798 794
799 if not ARGS.generate_svg: os.remove(svgFilePath.show()) 795 if not ARGS.generate_svg:
796 os.remove(svgFilePath.show())
800 797
801 ClassPat = Dict[str, List[List[float]]] 798 ClassPat = Dict[str, List[List[float]]]
802 def getClassesAndIdsFromDatasets(datasetsPaths :List[str], datasetPath :str, classPath :str, names :List[str]) -> Tuple[List[str], ClassPat]: 799 def getClassesAndIdsFromDatasets(datasetsPaths :List[str], datasetPath :str, classPath :str, names :List[str]) -> Tuple[List[str], ClassPat]:
803 # TODO: I suggest creating dicts with ids as keys instead of keeping class_pat and ids separate, 800 # TODO: I suggest creating dicts with ids as keys instead of keeping class_pat and ids separate,
804 # for the sake of everyone's sanity. 801 # for the sake of everyone's sanity.
994 pdfPath = utils.FilePath(f"PDF Map {map_type} - {key}", ext=utils.FileFormat.PDF, prefix=ARGS.output_path) 991 pdfPath = utils.FilePath(f"PDF Map {map_type} - {key}", ext=utils.FileFormat.PDF, prefix=ARGS.output_path)
995 convert_to_pdf(svgFilePath, pngPath, pdfPath) 992 convert_to_pdf(svgFilePath, pngPath, pdfPath)
996 if not ARGS.generate_svg: 993 if not ARGS.generate_svg:
997 os.remove(svgFilePath.show()) 994 os.remove(svgFilePath.show())
998 995
999
1000
1001 996
1002 ############################ MAIN ############################################# 997 ############################ MAIN #############################################
1003 def main(args:List[str] = None) -> None: 998 def main(args:List[str] = None) -> None:
1004 """ 999 """
1005 Initializes everything and sets the program in motion based on the fronted input arguments. 1000 Initializes everything and sets the program in motion based on the fronted input arguments.
1031 elif(ARGS.choice_map == utils.Model.ENGRO2): 1026 elif(ARGS.choice_map == utils.Model.ENGRO2):
1032 temp_map = utils.Model.ENGRO2_no_legend 1027 temp_map = utils.Model.ENGRO2_no_legend
1033 computeEnrichmentMeanMedian(temp_map.getMap(ARGS.tool_dir), class_pat, ids, ARGS.color_map) 1028 computeEnrichmentMeanMedian(temp_map.getMap(ARGS.tool_dir), class_pat, ids, ARGS.color_map)
1034 else: 1029 else:
1035 computeEnrichmentMeanMedian(core_map, class_pat, ids, ARGS.color_map) 1030 computeEnrichmentMeanMedian(core_map, class_pat, ids, ARGS.color_map)
1036 1031
1037 1032
1038 computeEnrichment(core_map, class_pat, ids) 1033 enrichment_results = computeEnrichment(core_map, class_pat, ids)
1039 1034 for i, j, comparisonDict, max_z_score in enrichment_results:
1040 # create output files: TODO: this is the same comparison happening in "maps", find a better way to organize this 1035 map_copy = copy.deepcopy(core_map)
1041 if ARGS.comparison == "manyvsmany": 1036 temp_thingsInCommon(comparisonDict, map_copy, max_z_score, i, j)
1042 for i, j in it.combinations(class_pat.keys(), 2): createOutputMaps(i, j, core_map) 1037 createOutputMaps(i, j, map_copy)
1043 return 1038
1044
1045 if ARGS.comparison == "onevsrest":
1046 for single_cluster in class_pat.keys(): createOutputMaps(single_cluster, "rest", core_map)
1047 return
1048
1049 for otherDataset in class_pat.keys():
1050 if otherDataset != ARGS.control: createOutputMaps(i, j, core_map)
1051
1052 if not ERRORS: return 1039 if not ERRORS: return
1053 utils.logWarning( 1040 utils.logWarning(
1054 f"The following reaction IDs were mentioned in the dataset but weren't found in the map: {ERRORS}", 1041 f"The following reaction IDs were mentioned in the dataset but weren't found in the map: {ERRORS}",
1055 ARGS.out_log) 1042 ARGS.out_log)
1056 1043
1057 print('Execution succeded') 1044 print('Execution succeded')
1058 1045
1059 ############################################################################### 1046 ###############################################################################
1060 if __name__ == "__main__": 1047 if __name__ == "__main__":
1061 main() 1048 main()
1049