492
|
1 # Python API Tutorial
|
|
2
|
|
3 Learn how to use COBRAxy tools programmatically in Python scripts and analysis pipelines.
|
|
4
|
|
5 ## Overview
|
|
6
|
|
7 This tutorial teaches you to integrate COBRAxy into Python workflows by calling tool main functions directly with parsed arguments.
|
|
8
|
|
9 **Time required**: ~45 minutes
|
|
10 **Difficulty**: Intermediate
|
|
11 **Prerequisites**: Basic Python knowledge, COBRAxy installation
|
|
12
|
|
13 ## Understanding COBRAxy Architecture
|
|
14
|
|
15 ### Tool Structure
|
|
16
|
|
17 Each COBRAxy tool is a Python module with:
|
|
18 - `main(args)` function that accepts argument list
|
|
19 - Command-line argument parsing
|
|
20 - Self-contained execution logic
|
|
21
|
|
22 ```python
|
|
23 # General pattern for all tools
|
|
24 import tool_module
|
|
25 tool_module.main(['-arg1', 'value1', '-arg2', 'value2'])
|
|
26 ```
|
|
27
|
|
28 ### Available Tools
|
|
29
|
|
30 | Python Module | Purpose | Key Arguments |
|
|
31 |---------------|---------|---------------|
|
|
32 | `ras_generator` | Compute reaction activity scores | `-in`, `-ra`, `-rs` |
|
|
33 | `rps_generator` | Compute reaction propensity scores | `-id`, `-rp` |
|
|
34 | `marea` | Statistical pathway analysis | `-input_data`, `-choice_map` |
|
|
35 | `ras_to_bounds` | Apply RAS constraints to model | `-ir`, `-ms`, `-idop` |
|
|
36 | `flux_simulation` | Sample metabolic fluxes | `-ms`, `-in`, `-a`, `-ns` |
|
|
37 | `flux_to_map` | Add flux data to maps | `-if`, `-mp`, `-idop` |
|
|
38
|
|
39 ## Setup Your Environment
|
|
40
|
|
41 ### Import Required Modules
|
|
42
|
|
43 ```python
|
|
44 import sys
|
|
45 import os
|
|
46 from pathlib import Path
|
|
47
|
|
48 # Add COBRAxy to Python path
|
|
49 cobraxy_path = "/path/to/COBRAxy"
|
|
50 sys.path.insert(0, cobraxy_path)
|
|
51
|
|
52 # Import COBRAxy tools
|
|
53 import ras_generator
|
|
54 import rps_generator
|
|
55 import marea
|
|
56 import ras_to_bounds
|
|
57 import flux_simulation
|
|
58 import flux_to_map
|
|
59 import metabolicModel2Tabular as model_setting
|
|
60 ```
|
|
61
|
|
62 ### Set Working Directory
|
|
63
|
|
64 ```python
|
|
65 # Set up working directory
|
|
66 work_dir = Path("/path/to/analysis")
|
|
67 work_dir.mkdir(exist_ok=True)
|
|
68 os.chdir(work_dir)
|
|
69
|
|
70 # COBRAxy tools expect this parameter
|
|
71 tool_dir = str(Path(cobraxy_path).absolute())
|
|
72 ```
|
|
73
|
|
74 ## Basic Workflow Example
|
|
75
|
|
76 ### Step 1: Prepare Sample Data
|
|
77
|
|
78 ```python
|
|
79 import pandas as pd
|
|
80 import numpy as np
|
|
81
|
|
82 # Create sample gene expression data
|
|
83 genes = ['HGNC:5', 'HGNC:10', 'HGNC:15', 'HGNC:25', 'HGNC:30']
|
|
84 samples = ['Control_1', 'Control_2', 'Treatment_1', 'Treatment_2']
|
|
85
|
|
86 # Generate random expression values
|
|
87 np.random.seed(42)
|
|
88 data = np.random.lognormal(mean=2, sigma=1, size=(len(genes), len(samples)))
|
|
89
|
|
90 # Create DataFrame
|
|
91 expression_df = pd.DataFrame(data, index=genes, columns=samples)
|
|
92 expression_df.index.name = 'Gene_ID'
|
|
93
|
|
94 # Save to file
|
|
95 expression_file = work_dir / "expression_data.tsv"
|
|
96 expression_df.to_csv(expression_file, sep='\t')
|
|
97 print(f"Created sample data: {expression_file}")
|
|
98 ```
|
|
99
|
|
100 ### Step 2: Extract Model Information
|
|
101
|
|
102 ```python
|
|
103 # Extract model components (optional, for understanding model structure)
|
|
104 model_args = [
|
|
105 '-td', tool_dir,
|
|
106 '-ms', 'ENGRO2', # Use built-in ENGRO2 model
|
|
107 '-idop', str(work_dir / 'model_info')
|
|
108 ]
|
|
109
|
|
110 try:
|
|
111 model_setting.main(model_args)
|
|
112 print("✓ Model information extracted")
|
|
113 except Exception as e:
|
|
114 print(f"Model extraction failed: {e}")
|
|
115 ```
|
|
116
|
|
117 ### Step 3: Generate RAS Scores
|
|
118
|
|
119 ```python
|
|
120 # Generate Reaction Activity Scores
|
|
121 ras_output = work_dir / "ras_scores.tsv"
|
|
122
|
|
123 ras_args = [
|
|
124 '-td', tool_dir,
|
|
125 '-in', str(expression_file),
|
|
126 '-ra', str(ras_output),
|
|
127 '-rs', 'ENGRO2', # Built-in model
|
|
128 '-n', 'true' # Handle missing genes
|
|
129 ]
|
|
130
|
|
131 try:
|
|
132 ras_generator.main(ras_args)
|
|
133 print(f"✓ RAS scores generated: {ras_output}")
|
|
134 except Exception as e:
|
|
135 print(f"RAS generation failed: {e}")
|
|
136 raise
|
|
137 ```
|
|
138
|
|
139 ### Step 4: Generate RPS Scores (Optional)
|
|
140
|
|
141 ```python
|
|
142 # Create sample metabolite data
|
|
143 metabolites = ['glucose', 'pyruvate', 'lactate', 'ATP', 'NADH']
|
|
144 met_data = np.random.lognormal(mean=3, sigma=0.5, size=(len(metabolites), len(samples)))
|
|
145
|
|
146 met_df = pd.DataFrame(met_data, index=metabolites, columns=samples)
|
|
147 met_df.index.name = 'Metabolite_ID'
|
|
148
|
|
149 metabolite_file = work_dir / "metabolite_data.tsv"
|
|
150 met_df.to_csv(metabolite_file, sep='\t')
|
|
151
|
|
152 # Generate Reaction Propensity Scores
|
|
153 rps_output = work_dir / "rps_scores.tsv"
|
|
154
|
|
155 rps_args = [
|
|
156 '-td', tool_dir,
|
|
157 '-id', str(metabolite_file),
|
|
158 '-rp', str(rps_output)
|
|
159 ]
|
|
160
|
|
161 try:
|
|
162 rps_generator.main(rps_args)
|
|
163 print(f"✓ RPS scores generated: {rps_output}")
|
|
164 except Exception as e:
|
|
165 print(f"RPS generation warning: {e}")
|
|
166 # RPS generation might fail with sample data - that's OK
|
|
167 ```
|
|
168
|
|
169 ### Step 5: Statistical Analysis with MAREA
|
|
170
|
|
171 ```python
|
|
172 # Create enriched pathway maps
|
|
173 maps_output = work_dir / "pathway_maps"
|
|
174
|
|
175 marea_args = [
|
|
176 '-td', tool_dir,
|
|
177 '-using_RAS', 'true',
|
|
178 '-input_data', str(ras_output),
|
|
179 '-choice_map', 'ENGRO2',
|
|
180 '-gs', 'true', # Gene set analysis
|
|
181 '-idop', str(maps_output)
|
|
182 ]
|
|
183
|
|
184 try:
|
|
185 marea.main(marea_args)
|
|
186 print(f"✓ Pathway maps created: {maps_output}")
|
|
187 except Exception as e:
|
|
188 print(f"MAREA analysis failed: {e}")
|
|
189 ```
|
|
190
|
|
191 ### Step 6: Flux Simulation Pipeline
|
|
192
|
|
193 ```python
|
|
194 # Apply RAS constraints to model
|
|
195 bounds_output = work_dir / "model_bounds"
|
|
196
|
|
197 bounds_args = [
|
|
198 '-td', tool_dir,
|
|
199 '-ms', 'ENGRO2',
|
|
200 '-ir', str(ras_output),
|
|
201 '-rs', 'true', # Use RAS values
|
|
202 '-idop', str(bounds_output)
|
|
203 ]
|
|
204
|
|
205 try:
|
|
206 ras_to_bounds.main(bounds_args)
|
|
207 print(f"✓ Model constraints applied: {bounds_output}")
|
|
208 except Exception as e:
|
|
209 print(f"Bounds generation failed: {e}")
|
|
210 raise
|
|
211
|
|
212 # Sample metabolic fluxes
|
|
213 flux_output = work_dir / "flux_samples"
|
|
214
|
|
215 flux_args = [
|
|
216 '-td', tool_dir,
|
|
217 '-ms', 'ENGRO2',
|
|
218 '-in', str(bounds_output / "*.tsv"), # Will be expanded by tool
|
|
219 '-a', 'CBS', # Sampling algorithm
|
|
220 '-ns', '1000', # Number of samples
|
|
221 '-idop', str(flux_output)
|
|
222 ]
|
|
223
|
|
224 try:
|
|
225 flux_simulation.main(flux_args)
|
|
226 print(f"✓ Flux samples generated: {flux_output}")
|
|
227 except Exception as e:
|
|
228 print(f"Flux simulation failed: {e}")
|
|
229 ```
|
|
230
|
|
231 ### Step 7: Create Final Visualizations
|
|
232
|
|
233 ```python
|
|
234 # Add flux data to enriched maps
|
|
235 final_maps = work_dir / "final_visualizations"
|
|
236
|
|
237 # Check if we have both maps and flux data
|
|
238 maps_dir = maps_output
|
|
239 flux_dir = flux_output
|
|
240
|
|
241 if maps_dir.exists() and flux_dir.exists():
|
|
242 flux_to_map_args = [
|
|
243 '-td', tool_dir,
|
|
244 '-if', str(flux_dir / "*.tsv"),
|
|
245 '-mp', str(maps_dir / "*.svg"),
|
|
246 '-idop', str(final_maps)
|
|
247 ]
|
|
248
|
|
249 try:
|
|
250 flux_to_map.main(flux_to_map_args)
|
|
251 print(f"✓ Final visualizations created: {final_maps}")
|
|
252 except Exception as e:
|
|
253 print(f"Final mapping failed: {e}")
|
|
254 else:
|
|
255 print("Skipping final visualization - missing input files")
|
|
256 ```
|
|
257
|
|
258 ## Advanced Usage Patterns
|
|
259
|
|
260 ### Error Handling and Validation
|
|
261
|
|
262 ```python
|
|
263 def run_cobraxy_tool(tool_module, args, description):
|
|
264 """Helper function to run COBRAxy tools with error handling."""
|
|
265 try:
|
|
266 print(f"Running {description}...")
|
|
267 tool_module.main(args)
|
|
268 print(f"✓ {description} completed successfully")
|
|
269 return True
|
|
270 except Exception as e:
|
|
271 print(f"✗ {description} failed: {e}")
|
|
272 return False
|
|
273
|
|
274 # Usage
|
|
275 success = run_cobraxy_tool(
|
|
276 ras_generator,
|
|
277 ras_args,
|
|
278 "RAS generation"
|
|
279 )
|
|
280
|
|
281 if not success:
|
|
282 print("Pipeline stopped due to error")
|
|
283 exit(1)
|
|
284 ```
|
|
285
|
|
286 ### Batch Processing Multiple Datasets
|
|
287
|
|
288 ```python
|
|
289 def process_dataset(dataset_path, output_dir):
|
|
290 """Process a single dataset through COBRAxy pipeline."""
|
|
291
|
|
292 dataset_name = dataset_path.stem
|
|
293 out_dir = Path(output_dir) / dataset_name
|
|
294 out_dir.mkdir(exist_ok=True)
|
|
295
|
|
296 # Generate RAS
|
|
297 ras_file = out_dir / "ras_scores.tsv"
|
|
298 ras_args = [
|
|
299 '-td', tool_dir,
|
|
300 '-in', str(dataset_path),
|
|
301 '-ra', str(ras_file),
|
|
302 '-rs', 'ENGRO2'
|
|
303 ]
|
|
304
|
|
305 if run_cobraxy_tool(ras_generator, ras_args, f"RAS for {dataset_name}"):
|
|
306 # Continue with MAREA analysis
|
|
307 maps_dir = out_dir / "maps"
|
|
308 marea_args = [
|
|
309 '-td', tool_dir,
|
|
310 '-using_RAS', 'true',
|
|
311 '-input_data', str(ras_file),
|
|
312 '-choice_map', 'ENGRO2',
|
|
313 '-idop', str(maps_dir)
|
|
314 ]
|
|
315 run_cobraxy_tool(marea, marea_args, f"MAREA for {dataset_name}")
|
|
316
|
|
317 return out_dir
|
|
318
|
|
319 # Process multiple datasets
|
|
320 datasets = [
|
|
321 "/path/to/dataset1.tsv",
|
|
322 "/path/to/dataset2.tsv",
|
|
323 "/path/to/dataset3.tsv"
|
|
324 ]
|
|
325
|
|
326 results = []
|
|
327 for dataset in datasets:
|
|
328 result_dir = process_dataset(Path(dataset), work_dir / "batch_results")
|
|
329 results.append(result_dir)
|
|
330
|
|
331 print(f"Processed {len(results)} datasets")
|
|
332 ```
|
|
333
|
|
334 ### Custom Analysis Pipelines
|
|
335
|
|
336 ```python
|
|
337 class COBRAxyPipeline:
|
|
338 """Custom COBRAxy analysis pipeline."""
|
|
339
|
|
340 def __init__(self, tool_dir, work_dir):
|
|
341 self.tool_dir = tool_dir
|
|
342 self.work_dir = Path(work_dir)
|
|
343 self.work_dir.mkdir(exist_ok=True)
|
|
344
|
|
345 def run_enrichment_analysis(self, expression_file, model='ENGRO2'):
|
|
346 """Run enrichment-focused analysis."""
|
|
347
|
|
348 # Generate RAS
|
|
349 ras_file = self.work_dir / "ras_scores.tsv"
|
|
350 ras_args = ['-td', self.tool_dir, '-in', str(expression_file),
|
|
351 '-ra', str(ras_file), '-rs', model]
|
|
352
|
|
353 if not run_cobraxy_tool(ras_generator, ras_args, "RAS generation"):
|
|
354 return None
|
|
355
|
|
356 # Run MAREA
|
|
357 maps_dir = self.work_dir / "enrichment_maps"
|
|
358 marea_args = ['-td', self.tool_dir, '-using_RAS', 'true',
|
|
359 '-input_data', str(ras_file), '-choice_map', model,
|
|
360 '-gs', 'true', '-idop', str(maps_dir)]
|
|
361
|
|
362 if run_cobraxy_tool(marea, marea_args, "MAREA analysis"):
|
|
363 return maps_dir
|
|
364 return None
|
|
365
|
|
366 def run_flux_analysis(self, expression_file, model='ENGRO2', n_samples=1000):
|
|
367 """Run flux sampling analysis."""
|
|
368
|
|
369 # Generate RAS and apply bounds
|
|
370 ras_file = self.work_dir / "ras_scores.tsv"
|
|
371 bounds_dir = self.work_dir / "bounds"
|
|
372 flux_dir = self.work_dir / "flux_samples"
|
|
373
|
|
374 # RAS generation
|
|
375 ras_args = ['-td', self.tool_dir, '-in', str(expression_file),
|
|
376 '-ra', str(ras_file), '-rs', model]
|
|
377 if not run_cobraxy_tool(ras_generator, ras_args, "RAS generation"):
|
|
378 return None
|
|
379
|
|
380 # Apply bounds
|
|
381 bounds_args = ['-td', self.tool_dir, '-ms', model, '-ir', str(ras_file),
|
|
382 '-rs', 'true', '-idop', str(bounds_dir)]
|
|
383 if not run_cobraxy_tool(ras_to_bounds, bounds_args, "Bounds application"):
|
|
384 return None
|
|
385
|
|
386 # Flux sampling
|
|
387 flux_args = ['-td', self.tool_dir, '-ms', model,
|
|
388 '-in', str(bounds_dir / "*.tsv"),
|
|
389 '-a', 'CBS', '-ns', str(n_samples), '-idop', str(flux_dir)]
|
|
390
|
|
391 if run_cobraxy_tool(flux_simulation, flux_args, "Flux simulation"):
|
|
392 return flux_dir
|
|
393 return None
|
|
394
|
|
395 # Usage
|
|
396 pipeline = COBRAxyPipeline(tool_dir, work_dir / "custom_analysis")
|
|
397
|
|
398 # Run enrichment analysis
|
|
399 enrichment_results = pipeline.run_enrichment_analysis(expression_file)
|
|
400 if enrichment_results:
|
|
401 print(f"Enrichment analysis completed: {enrichment_results}")
|
|
402
|
|
403 # Run flux analysis
|
|
404 flux_results = pipeline.run_flux_analysis(expression_file, n_samples=500)
|
|
405 if flux_results:
|
|
406 print(f"Flux analysis completed: {flux_results}")
|
|
407 ```
|
|
408
|
|
409 ## Integration with Data Analysis Libraries
|
|
410
|
|
411 ### Pandas Integration
|
|
412
|
|
413 ```python
|
|
414 # Read COBRAxy results back into pandas
|
|
415 ras_df = pd.read_csv(ras_output, sep='\t', index_col=0)
|
|
416 print(f"RAS data shape: {ras_df.shape}")
|
|
417 print(f"Sample statistics:\n{ras_df.describe()}")
|
|
418
|
|
419 # Filter highly variable reactions
|
|
420 ras_std = ras_df.std(axis=1)
|
|
421 variable_reactions = ras_std.nlargest(20).index
|
|
422 print(f"Most variable reactions: {list(variable_reactions)}")
|
|
423 ```
|
|
424
|
|
425 ### Matplotlib Visualization
|
|
426
|
|
427 ```python
|
|
428 import matplotlib.pyplot as plt
|
|
429 import seaborn as sns
|
|
430
|
|
431 # Visualize RAS distributions
|
|
432 plt.figure(figsize=(12, 8))
|
|
433 sns.heatmap(ras_df.iloc[:50], cmap='RdBu_r', center=0, cbar_kws={'label': 'RAS Score'})
|
|
434 plt.title('Reaction Activity Scores (Top 50 Reactions)')
|
|
435 plt.xlabel('Samples')
|
|
436 plt.ylabel('Reactions')
|
|
437 plt.tight_layout()
|
|
438 plt.savefig(work_dir / 'ras_heatmap.png', dpi=300)
|
|
439 plt.show()
|
|
440 ```
|
|
441
|
|
442 ## Best Practices
|
|
443
|
|
444 ### 1. Environment Management
|
|
445 ```python
|
|
446 # Use pathlib for cross-platform compatibility
|
|
447 from pathlib import Path
|
|
448
|
|
449 # Use absolute paths
|
|
450 tool_dir = str(Path(cobraxy_path).absolute())
|
|
451 work_dir = Path("/analysis").absolute()
|
|
452 ```
|
|
453
|
|
454 ### 2. Error Handling
|
|
455 ```python
|
|
456 # Always wrap tool calls in try-except
|
|
457 try:
|
|
458 ras_generator.main(ras_args)
|
|
459 except Exception as e:
|
|
460 print(f"RAS generation failed: {e}")
|
|
461 # Log details, cleanup, or alternative action
|
|
462 ```
|
|
463
|
|
464 ### 3. Argument Validation
|
|
465 ```python
|
|
466 def validate_file_exists(filepath):
|
|
467 """Validate input file exists."""
|
|
468 path = Path(filepath)
|
|
469 if not path.exists():
|
|
470 raise FileNotFoundError(f"Input file not found: {filepath}")
|
|
471 return str(path.absolute())
|
|
472
|
|
473 # Use before calling tools
|
|
474 expression_file = validate_file_exists(expression_file)
|
|
475 ```
|
|
476
|
|
477
|
|
478
|
|
479 ## Troubleshooting
|
|
480
|
|
481 ### Common Issues
|
|
482
|
|
483 **Import errors**
|
|
484 ```python
|
|
485 # Check if COBRAxy path is correct
|
|
486 import sys
|
|
487 print("Python path includes:")
|
|
488 for p in sys.path:
|
|
489 print(f" {p}")
|
|
490
|
|
491 # Add COBRAxy path
|
|
492 sys.path.insert(0, "/correct/path/to/COBRAxy")
|
|
493 ```
|
|
494
|
|
495 **Tool execution failures**
|
|
496 ```python
|
|
497 # Enable verbose output
|
|
498 import logging
|
|
499 logging.basicConfig(level=logging.DEBUG)
|
|
500
|
|
501 # Check working directory
|
|
502 print(f"Current directory: {os.getcwd()}")
|
|
503 print(f"Directory contents: {list(Path('.').iterdir())}")
|
|
504 ```
|
|
505
|
|
506 **File path issues**
|
|
507 ```python
|
|
508 # Use absolute paths
|
|
509 ras_args = [
|
|
510 '-td', str(Path(tool_dir).absolute()),
|
|
511 '-in', str(Path(expression_file).absolute()),
|
|
512 '-ra', str(Path(ras_output).absolute()),
|
|
513 '-rs', 'ENGRO2'
|
|
514 ]
|
|
515 ```
|
|
516
|
|
517 ## Next Steps
|
|
518
|
|
519 Now that you can use COBRAxy programmatically:
|
|
520
|
|
521 1. **[Tools Reference](../tools/)** - Detailed parameter documentation
|
|
522 2. **[Examples](../examples/)** - Real-world analysis scripts
|
|
523 3. **Build custom analysis pipelines** for your research needs
|
|
524 4. **Integrate with workflow managers** like Snakemake or Nextflow
|
|
525
|
|
526 ## Resources
|
|
527
|
|
528 - [COBRApy Documentation](https://cobrapy.readthedocs.io/) - Underlying metabolic modeling library
|
|
529 - [Pandas Documentation](https://pandas.pydata.org/) - Data manipulation
|
|
530 - [Matplotlib Gallery](https://matplotlib.org/gallery/) - Visualization examples
|
|
531 - [Python Pathlib](https://docs.python.org/3/library/pathlib.html) - Modern path handling |