Mercurial > repos > dave > interval2maf
comparison interval2maf.py @ 0:d38f317fa9d4 draft default tip
Uploaded
author | dave |
---|---|
date | Fri, 10 Jul 2020 16:05:40 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d38f317fa9d4 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Reads a list of intervals and a maf. Produces a new maf containing the | |
4 blocks or parts of blocks in the original that overlapped the intervals. | |
5 | |
6 If a MAF file, not UID, is provided the MAF file is indexed before being processed. | |
7 | |
8 NOTE: If two intervals overlap the same block it will be written twice. | |
9 | |
10 usage: %prog maf_file [options] | |
11 -d, --dbkey=d: Database key, ie hg17 | |
12 -c, --chromCol=c: Column of Chr | |
13 -s, --startCol=s: Column of Start | |
14 -e, --endCol=e: Column of End | |
15 -S, --strandCol=S: Column of Strand | |
16 -t, --mafType=t: Type of MAF source to use | |
17 -m, --mafFile=m: Path of source MAF file, if not using cached version | |
18 -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version | |
19 -i, --interval_file=i: Input interval file | |
20 -o, --output_file=o: Output MAF file | |
21 -p, --species=p: Species to include in output | |
22 -P, --split_blocks_by_species=P: Split blocks by species | |
23 -r, --remove_all_gap_columns=r: Remove all Gap columns | |
24 -l, --indexLocation=l: Override default maf_index.loc file | |
25 -z, --mafIndexFile=z: Directory of local maf index file ( maf_index.loc or maf_pairwise.loc ) | |
26 """ | |
27 # Dan Blankenberg | |
28 from __future__ import print_function | |
29 | |
30 import bx.align.maf | |
31 import bx.intervals.io | |
32 from bx.cookbook import doc_optparse | |
33 | |
34 from galaxy.tools.util import maf_utilities | |
35 | |
36 | |
37 def __main__(): | |
38 index = index_filename = None | |
39 | |
40 # Parse Command Line | |
41 options, args = doc_optparse.parse(__doc__) | |
42 | |
43 if options.dbkey: | |
44 dbkey = options.dbkey | |
45 else: | |
46 dbkey = None | |
47 if dbkey in [None, "?"]: | |
48 maf_utilities.tool_fail("You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file.") | |
49 | |
50 species = maf_utilities.parse_species_option(options.species) | |
51 | |
52 if options.chromCol: | |
53 chromCol = int(options.chromCol) - 1 | |
54 else: | |
55 maf_utilities.tool_fail("Chromosome column not set, click the pencil icon in the history item to set the metadata attributes.") | |
56 | |
57 if options.startCol: | |
58 startCol = int(options.startCol) - 1 | |
59 else: | |
60 maf_utilities.tool_fail("Start column not set, click the pencil icon in the history item to set the metadata attributes.") | |
61 | |
62 if options.endCol: | |
63 endCol = int(options.endCol) - 1 | |
64 else: | |
65 maf_utilities.tool_fail("End column not set, click the pencil icon in the history item to set the metadata attributes.") | |
66 | |
67 if options.strandCol: | |
68 strandCol = int(options.strandCol) - 1 | |
69 else: | |
70 strandCol = -1 | |
71 | |
72 if options.interval_file: | |
73 interval_file = options.interval_file | |
74 else: | |
75 maf_utilities.tool_fail("Input interval file has not been specified.") | |
76 | |
77 if options.output_file: | |
78 output_file = options.output_file | |
79 else: | |
80 maf_utilities.tool_fail("Output file has not been specified.") | |
81 | |
82 split_blocks_by_species = remove_all_gap_columns = False | |
83 if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species': | |
84 split_blocks_by_species = True | |
85 if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns': | |
86 remove_all_gap_columns = True | |
87 else: | |
88 remove_all_gap_columns = True | |
89 # Finish parsing command line | |
90 | |
91 # Open indexed access to MAFs | |
92 if options.mafType: | |
93 if options.indexLocation: | |
94 index = maf_utilities.maf_index_by_uid(options.mafType, options.indexLocation) | |
95 else: | |
96 index = maf_utilities.maf_index_by_uid(options.mafType, options.mafIndexFile) | |
97 if index is None: | |
98 maf_utilities.tool_fail("The MAF source specified (%s) appears to be invalid." % (options.mafType)) | |
99 elif options.mafFile: | |
100 index, index_filename = maf_utilities.open_or_build_maf_index(options.mafFile, options.mafIndex, species=[dbkey]) | |
101 if index is None: | |
102 maf_utilities.tool_fail("Your MAF file appears to be malformed.") | |
103 else: | |
104 maf_utilities.tool_fail("Desired source MAF type has not been specified.") | |
105 | |
106 # Create MAF writter | |
107 out = bx.align.maf.Writer(open(output_file, "w")) | |
108 | |
109 # Iterate over input regions | |
110 num_blocks = 0 | |
111 num_regions = None | |
112 for num_regions, region in enumerate(bx.intervals.io.NiceReaderWrapper(open(interval_file, 'r'), chrom_col=chromCol, start_col=startCol, end_col=endCol, strand_col=strandCol, fix_strand=True, return_header=False, return_comments=False)): | |
113 src = maf_utilities.src_merge(dbkey, region.chrom) | |
114 for block in index.get_as_iterator(src, region.start, region.end): | |
115 if split_blocks_by_species: | |
116 blocks = [new_block for new_block in maf_utilities.iter_blocks_split_by_species(block) if maf_utilities.component_overlaps_region(new_block.get_component_by_src_start(dbkey), region)] | |
117 else: | |
118 blocks = [block] | |
119 for block in blocks: | |
120 block = maf_utilities.chop_block_by_region(block, src, region) | |
121 if block is not None: | |
122 if species is not None: | |
123 block = block.limit_to_species(species) | |
124 block = maf_utilities.orient_block_by_region(block, src, region) | |
125 if remove_all_gap_columns: | |
126 block.remove_all_gap_columns() | |
127 out.write(block) | |
128 num_blocks += 1 | |
129 | |
130 # Close output MAF | |
131 out.close() | |
132 | |
133 # remove index file if created during run | |
134 maf_utilities.remove_temp_index_file(index_filename) | |
135 | |
136 if num_blocks: | |
137 print("%i MAF blocks extracted for %i regions." % (num_blocks, (num_regions + 1))) | |
138 elif num_regions is not None: | |
139 print("No MAF blocks could be extracted for %i regions." % (num_regions + 1)) | |
140 else: | |
141 print("No valid regions have been provided.") | |
142 | |
143 | |
144 if __name__ == "__main__": | |
145 __main__() |