comparison COBRAxy/flux_to_map.py @ 293:7b8d9de81a86 draft

Uploaded
author francesco_lapi
date Thu, 15 May 2025 18:23:52 +0000
parents e87aeb3a33cd
children 626b6d1de075
comparison
equal deleted inserted replaced
292:31bc171a6ba5 293:7b8d9de81a86
13 from PIL import Image 13 from PIL import Image
14 import os 14 import os
15 import copy 15 import copy
16 import argparse 16 import argparse
17 import pyvips 17 import pyvips
18 from PIL import Image, ImageDraw, ImageFont 18 from PIL import Image
19 from typing import Tuple, Union, Optional, List, Dict 19 from typing import Tuple, Union, Optional, List, Dict
20 import matplotlib.pyplot as plt 20 import matplotlib.pyplot as plt
21 21
22 ERRORS = [] 22 ERRORS = []
23 ########################## argparse ########################################## 23 ########################## argparse ##########################################
48 48
49 #Computation details: 49 #Computation details:
50 parser.add_argument( 50 parser.add_argument(
51 '-co', '--comparison', 51 '-co', '--comparison',
52 type = str, 52 type = str,
53 default = '1vs1', 53 default = 'manyvsmany',
54 choices = ['manyvsmany', 'onevsrest', 'onevsmany']) 54 choices = ['manyvsmany', 'onevsrest', 'onevsmany'])
55
56 parser.add_argument(
57 '-te' ,'--test',
58 type = str,
59 default = 'ks',
60 choices = ['ks', 'ttest_p', 'ttest_ind', 'wilcoxon', 'mw'],
61 help = 'Statistical test to use (default: %(default)s)')
55 62
56 parser.add_argument( 63 parser.add_argument(
57 '-pv' ,'--pValue', 64 '-pv' ,'--pValue',
58 type = float, 65 type = float,
59 default = 0.1, 66 default = 0.1,
128 135
129 args :argparse.Namespace = parser.parse_args(args) 136 args :argparse.Namespace = parser.parse_args(args)
130 args.net = True # TODO SICCOME I FLUSSI POSSONO ESSERE ANCHE NEGATIVI SONO SEMPRE CONSIDERATI NETTI 137 args.net = True # TODO SICCOME I FLUSSI POSSONO ESSERE ANCHE NEGATIVI SONO SEMPRE CONSIDERATI NETTI
131 138
132 return args 139 return args
133 140
134 ############################ dataset input #################################### 141 ############################ dataset input ####################################
135 def read_dataset(data :str, name :str) -> pd.DataFrame: 142 def read_dataset(data :str, name :str) -> pd.DataFrame:
136 """ 143 """
137 Tries to read the dataset from its path (data) as a tsv and turns it into a DataFrame. 144 Tries to read the dataset from its path (data) as a tsv and turns it into a DataFrame.
138 145
193 return '-INF' 200 return '-INF'
194 elif avg2 == 0: 201 elif avg2 == 0:
195 return 'INF' 202 return 'INF'
196 else: # (threshold_F_C - 1) / (abs(threshold_F_C) + 1) con threshold_F_C > 1 203 else: # (threshold_F_C - 1) / (abs(threshold_F_C) + 1) con threshold_F_C > 1
197 return (avg1 - avg2) / (abs(avg1) + abs(avg2)) 204 return (avg1 - avg2) / (abs(avg1) + abs(avg2))
198
199 def fix_style(l :str, col :Optional[str], width :str, dash :str) -> str:
200 """
201 Produces a "fixed" style string to assign to a reaction arrow in the SVG map, assigning style properties to the corresponding values passed as input params.
202
203 Args:
204 l : current style string of an SVG element
205 col : new value for the "stroke" style property
206 width : new value for the "stroke-width" style property
207 dash : new value for the "stroke-dasharray" style property
208
209 Returns:
210 str : the fixed style string
211 """
212 tmp = l.split(';')
213 flag_col = False
214 flag_width = False
215 flag_dash = False
216 for i in range(len(tmp)):
217 if tmp[i].startswith('stroke:'):
218 tmp[i] = 'stroke:' + col
219 flag_col = True
220 if tmp[i].startswith('stroke-width:'):
221 tmp[i] = 'stroke-width:' + width
222 flag_width = True
223 if tmp[i].startswith('stroke-dasharray:'):
224 tmp[i] = 'stroke-dasharray:' + dash
225 flag_dash = True
226 if not flag_col:
227 tmp.append('stroke:' + col)
228 if not flag_width:
229 tmp.append('stroke-width:' + width)
230 if not flag_dash:
231 tmp.append('stroke-dasharray:' + dash)
232 return ';'.join(tmp)
233
234 # The type of d values is collapsed, losing precision, because the dict containst lists instead of tuples, please fix!
235 def fix_map(d :Dict[str, List[Union[float, FoldChange]]], core_map :ET.ElementTree, threshold_P_V :float, threshold_F_C :float, max_z_score :float) -> ET.ElementTree:
236 """
237 Edits the selected SVG map based on the p-value and fold change data (d) and some significance thresholds also passed as inputs.
238
239 Args:
240 d : dictionary mapping a p-value and a fold-change value (values) to each reaction ID as encoded in the SVG map (keys)
241 core_map : SVG map to modify
242 threshold_P_V : threshold for a p-value to be considered significant
243 threshold_F_C : threshold for a fold change value to be considered significant
244 max_z_score : highest z-score (absolute value)
245
246 Returns:
247 ET.ElementTree : the modified core_map
248
249 Side effects:
250 core_map : mut
251 """
252 maxT = 12
253 minT = 2
254 grey = '#BEBEBE'
255 blue = '#6495ed' # azzurrino
256 red = '#ecac68' # arancione
257 for el in core_map.iter():
258 el_id = str(el.get('id'))
259 if el_id.startswith('R_'):
260 tmp = d.get(el_id[2:])
261 if tmp != None:
262 p_val :float = tmp[0]
263 f_c = tmp[1]
264 z_score = tmp[2]
265
266 if math.isnan(p_val) or (isinstance(f_c, float) and math.isnan(f_c)): continue
267
268 if p_val < threshold_P_V:
269 if not isinstance(f_c, str):
270 if abs(f_c) < ((threshold_F_C - 1) / (abs(threshold_F_C) + 1)): #
271 col = grey
272 width = str(minT)
273 else:
274 if f_c < 0:
275 col = blue
276 elif f_c > 0:
277 col = red
278 width = str(max((abs(z_score) * maxT) / max_z_score, minT))
279 else:
280 if f_c == '-INF':
281 col = blue
282 elif f_c == 'INF':
283 col = red
284 width = str(maxT)
285 dash = 'none'
286 else:
287 dash = '5,5'
288 col = grey
289 width = str(minT)
290 el.set('style', fix_style(el.get('style', ""), col, width, dash))
291 return core_map
292 205
293 def getElementById(reactionId :str, metabMap :ET.ElementTree) -> utils.Result[ET.Element, utils.Result.ResultErr]: 206 def getElementById(reactionId :str, metabMap :ET.ElementTree) -> utils.Result[ET.Element, utils.Result.ResultErr]:
294 """ 207 """
295 Finds any element in the given map with the given ID. ID uniqueness in an svg file is recommended but 208 Finds any element in the given map with the given ID. ID uniqueness in an svg file is recommended but
296 not enforced, if more than one element with the exact ID is found only the first will be returned. 209 not enforced, if more than one element with the exact ID is found only the first will be returned.
494 INVALID_ARROW.styleReactionElements(metabMap, reactionId, mindReactionDir = False) 407 INVALID_ARROW.styleReactionElements(metabMap, reactionId, mindReactionDir = False)
495 408
496 continue 409 continue
497 410
498 width = Arrow.MAX_W 411 width = Arrow.MAX_W
499 if not math.isinf(foldChange): 412 if not math.isinf(z_score):
500 try: 413 try:
501 width = max(abs(z_score * Arrow.MAX_W) / maxNumericZScore, Arrow.MIN_W) 414 width = min(
415 max(abs(z_score * Arrow.MAX_W) / maxNumericZScore, Arrow.MIN_W),
416 Arrow.MAX_W)
502 417
503 except ZeroDivisionError: pass 418 except ZeroDivisionError: pass
504 # TODO CHECK RV 419 # TODO CHECK RV
505 #if not reactionId.endswith("_RV"): # RV stands for reversible reactions 420 #if not reactionId.endswith("_RV"): # RV stands for reversible reactions
506 # Arrow(width, ArrowColor.fromFoldChangeSign(foldChange)).styleReactionElements(metabMap, reactionId) 421 # Arrow(width, ArrowColor.fromFoldChangeSign(foldChange)).styleReactionElements(metabMap, reactionId)
695 dataset1Data : data from the 1st dataset. 610 dataset1Data : data from the 1st dataset.
696 dataset2Data : data from the 2nd dataset. 611 dataset2Data : data from the 2nd dataset.
697 612
698 Returns: 613 Returns:
699 tuple: (P-value, Z-score) 614 tuple: (P-value, Z-score)
700 - P-value from a Kolmogorov-Smirnov test on the provided data. 615 - P-value from the selected test on the provided data.
701 - Z-score of the difference between means of the two datasets. 616 - Z-score of the difference between means of the two datasets.
702 """ 617 """
703 # Perform Kolmogorov-Smirnov test 618
704 ks_statistic, p_value = st.ks_2samp(dataset1Data, dataset2Data) 619 match ARGS.test:
620 case "ks":
621 # Perform Kolmogorov-Smirnov test
622 _, p_value = st.ks_2samp(dataset1Data, dataset2Data)
623 case "ttest_p":
624 # Perform t-test for paired samples
625 _, p_value = st.ttest_rel(dataset1Data, dataset2Data)
626 case "ttest_ind":
627 # Perform t-test for independent samples
628 _, p_value = st.ttest_ind(dataset1Data, dataset2Data)
629 case "wilcoxon":
630 # Perform Wilcoxon signed-rank test
631 _, p_value = st.wilcoxon(dataset1Data, dataset2Data)
632 case "mw":
633 # Perform Mann-Whitney U test
634 _, p_value = st.mannwhitneyu(dataset1Data, dataset2Data)
705 635
706 # Calculate means and standard deviations 636 # Calculate means and standard deviations
707 mean1 = np.nanmean(dataset1Data) 637 mean1 = np.nanmean(dataset1Data)
708 mean2 = np.nanmean(dataset2Data) 638 mean2 = np.nanmean(dataset2Data)
709 std1 = np.nanstd(dataset1Data, ddof=1) 639 std1 = np.nanstd(dataset1Data, ddof=1)
730 try: 660 try:
731 p_value, z_score = computePValue(l1, l2) 661 p_value, z_score = computePValue(l1, l2)
732 avg1 = sum(l1) / len(l1) 662 avg1 = sum(l1) / len(l1)
733 avg2 = sum(l2) / len(l2) 663 avg2 = sum(l2) / len(l2)
734 f_c = fold_change(avg1, avg2) 664 f_c = fold_change(avg1, avg2)
735 if not isinstance(z_score, str) and max_z_score < abs(z_score): max_z_score = abs(z_score) 665 if np.isfinite(z_score) and max_z_score < abs(z_score): max_z_score = abs(z_score)
736 666
737 tmp[reactId] = [float(p_value), f_c, z_score, avg1, avg2] 667 tmp[reactId] = [float(p_value), f_c, z_score, avg1, avg2]
738 except (TypeError, ZeroDivisionError): continue 668 except (TypeError, ZeroDivisionError): continue
739 669
740 return tmp, max_z_score 670 return tmp, max_z_score
813 743
814 elif ARGS.option == "dataset_class": 744 elif ARGS.option == "dataset_class":
815 classes = read_dataset(classPath, "class") 745 classes = read_dataset(classPath, "class")
816 classes = classes.astype(str) 746 classes = classes.astype(str)
817 resolve_rules_float, ids = getDatasetValues(datasetPath, "Dataset Class (not actual name)") 747 resolve_rules_float, ids = getDatasetValues(datasetPath, "Dataset Class (not actual name)")
818 #check if classes have mathc on ids 748 #check if classes have match on ids
819 if not all(classes.iloc[:, 0].isin(ids)): 749 if not all(classes.iloc[:, 0].isin(ids)):
820 utils.logWarning( 750 utils.logWarning(
821 "No match between classes and sample IDs", ARGS.out_log) 751 "No match between classes and sample IDs", ARGS.out_log)
822 if resolve_rules_float != None: class_pat = split_class(classes, resolve_rules_float) 752 if resolve_rules_float != None: class_pat = split_class(classes, resolve_rules_float)
823 753
886 dataset.index.name: ['LactGlc', 'LactGln', 'LactO2', 'GluGln'], 816 dataset.index.name: ['LactGlc', 'LactGln', 'LactO2', 'GluGln'],
887 **{col: [values['lact_glc'][i], values['lact_gln'][i], values['lact_o2'][i], values['glu_gln'][i]] 817 **{col: [values['lact_glc'][i], values['lact_gln'][i], values['lact_o2'][i], values['glu_gln'][i]]
888 for i, col in enumerate(dataset.columns)} 818 for i, col in enumerate(dataset.columns)}
889 }) 819 })
890 820
891 print(new_rows) 821 #print(new_rows)
892 822
893 # Ritorna il dataset originale con le nuove righe 823 # Ritorna il dataset originale con le nuove righe
894 dataset.reset_index(inplace=True) 824 dataset.reset_index(inplace=True)
895 dataset = pd.concat([dataset, new_rows], ignore_index=True) 825 dataset = pd.concat([dataset, new_rows], ignore_index=True)
896 826
910 str: The color in hexadecimal format (e.g., '#ff0000' for red). 840 str: The color in hexadecimal format (e.g., '#ff0000' for red).
911 """ 841 """
912 # Convert RGB values (0-1 range) to hexadecimal format 842 # Convert RGB values (0-1 range) to hexadecimal format
913 rgb = (np.array(rgb) * 255).astype(int) 843 rgb = (np.array(rgb) * 255).astype(int)
914 return '#{:02x}{:02x}{:02x}'.format(rgb[0], rgb[1], rgb[2]) 844 return '#{:02x}{:02x}{:02x}'.format(rgb[0], rgb[1], rgb[2])
915
916
917 845
918 def save_colormap_image(min_value: float, max_value: float, path: utils.FilePath, colorMap:str="viridis"): 846 def save_colormap_image(min_value: float, max_value: float, path: utils.FilePath, colorMap:str="viridis"):
919 """ 847 """
920 Create and save an image of the colormap showing the gradient and its range. 848 Create and save an image of the colormap showing the gradient and its range.
921 849
1058 pdfPath = utils.FilePath(f"PDF Map {map_type} - {key}", ext=utils.FileFormat.PDF, prefix=ARGS.output_path) 986 pdfPath = utils.FilePath(f"PDF Map {map_type} - {key}", ext=utils.FileFormat.PDF, prefix=ARGS.output_path)
1059 convert_to_pdf(svgFilePath, pngPath, pdfPath) 987 convert_to_pdf(svgFilePath, pngPath, pdfPath)
1060 if not ARGS.generate_svg: 988 if not ARGS.generate_svg:
1061 os.remove(svgFilePath.show()) 989 os.remove(svgFilePath.show())
1062 990
1063
1064 ############################ MAIN ############################################# 991 ############################ MAIN #############################################
1065 def main(args:List[str] = None) -> None: 992 def main(args:List[str] = None) -> None:
1066 """ 993 """
1067 Initializes everything and sets the program in motion based on the fronted input arguments. 994 Initializes everything and sets the program in motion based on the fronted input arguments.
1068 995