Mercurial > repos > xuebing > sharplabtool
comparison tools/maf/interval2maf.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Reads a list of intervals and a maf. Produces a new maf containing the | |
5 blocks or parts of blocks in the original that overlapped the intervals. | |
6 | |
7 If a MAF file, not UID, is provided the MAF file is indexed before being processed. | |
8 | |
9 NOTE: If two intervals overlap the same block it will be written twice. | |
10 | |
11 usage: %prog maf_file [options] | |
12 -d, --dbkey=d: Database key, ie hg17 | |
13 -c, --chromCol=c: Column of Chr | |
14 -s, --startCol=s: Column of Start | |
15 -e, --endCol=e: Column of End | |
16 -S, --strandCol=S: Column of Strand | |
17 -t, --mafType=t: Type of MAF source to use | |
18 -m, --mafFile=m: Path of source MAF file, if not using cached version | |
19 -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version | |
20 -i, --interval_file=i: Input interval file | |
21 -o, --output_file=o: Output MAF file | |
22 -p, --species=p: Species to include in output | |
23 -P, --split_blocks_by_species=P: Split blocks by species | |
24 -r, --remove_all_gap_columns=r: Remove all Gap columns | |
25 -l, --indexLocation=l: Override default maf_index.loc file | |
26 -z, --mafIndexFile=z: Directory of local maf index file ( maf_index.loc or maf_pairwise.loc ) | |
27 """ | |
28 | |
29 #Dan Blankenberg | |
30 from galaxy import eggs | |
31 import pkg_resources; pkg_resources.require( "bx-python" ) | |
32 from bx.cookbook import doc_optparse | |
33 import bx.align.maf | |
34 import bx.intervals.io | |
35 from galaxy.tools.util import maf_utilities | |
36 import sys | |
37 | |
38 assert sys.version_info[:2] >= ( 2, 4 ) | |
39 | |
40 def __main__(): | |
41 index = index_filename = None | |
42 mincols = 0 | |
43 | |
44 #Parse Command Line | |
45 options, args = doc_optparse.parse( __doc__ ) | |
46 | |
47 if options.dbkey: dbkey = options.dbkey | |
48 else: dbkey = None | |
49 if dbkey in [None, "?"]: | |
50 maf_utilities.tool_fail( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." ) | |
51 | |
52 species = maf_utilities.parse_species_option( options.species ) | |
53 | |
54 if options.chromCol: chromCol = int( options.chromCol ) - 1 | |
55 else: | |
56 maf_utilities.tool_fail( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." ) | |
57 | |
58 if options.startCol: startCol = int( options.startCol ) - 1 | |
59 else: | |
60 maf_utilities.tool_fail( "Start column not set, click the pencil icon in the history item to set the metadata attributes." ) | |
61 | |
62 if options.endCol: endCol = int( options.endCol ) - 1 | |
63 else: | |
64 maf_utilities.tool_fail( "End column not set, click the pencil icon in the history item to set the metadata attributes." ) | |
65 | |
66 if options.strandCol: strandCol = int( options.strandCol ) - 1 | |
67 else: | |
68 strandCol = -1 | |
69 | |
70 if options.interval_file: interval_file = options.interval_file | |
71 else: | |
72 maf_utilities.tool_fail( "Input interval file has not been specified." ) | |
73 | |
74 if options.output_file: output_file = options.output_file | |
75 else: | |
76 maf_utilities.tool_fail( "Output file has not been specified." ) | |
77 | |
78 split_blocks_by_species = remove_all_gap_columns = False | |
79 if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species': | |
80 split_blocks_by_species = True | |
81 if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns': | |
82 remove_all_gap_columns = True | |
83 else: | |
84 remove_all_gap_columns = True | |
85 #Finish parsing command line | |
86 | |
87 #Open indexed access to MAFs | |
88 if options.mafType: | |
89 if options.indexLocation: | |
90 index = maf_utilities.maf_index_by_uid( options.mafType, options.indexLocation ) | |
91 else: | |
92 index = maf_utilities.maf_index_by_uid( options.mafType, options.mafIndexFile ) | |
93 if index is None: | |
94 maf_utilities.tool_fail( "The MAF source specified (%s) appears to be invalid." % ( options.mafType ) ) | |
95 elif options.mafFile: | |
96 index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species = [dbkey] ) | |
97 if index is None: | |
98 maf_utilities.tool_fail( "Your MAF file appears to be malformed." ) | |
99 else: | |
100 maf_utilities.tool_fail( "Desired source MAF type has not been specified." ) | |
101 | |
102 #Create MAF writter | |
103 out = bx.align.maf.Writer( open(output_file, "w") ) | |
104 | |
105 #Iterate over input regions | |
106 num_blocks = 0 | |
107 num_regions = None | |
108 for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chromCol, start_col = startCol, end_col = endCol, strand_col = strandCol, fix_strand = True, return_header = False, return_comments = False ) ): | |
109 src = maf_utilities.src_merge( dbkey, region.chrom ) | |
110 for block in index.get_as_iterator( src, region.start, region.end ): | |
111 if split_blocks_by_species: | |
112 blocks = [ new_block for new_block in maf_utilities.iter_blocks_split_by_species( block ) if maf_utilities.component_overlaps_region( new_block.get_component_by_src_start( dbkey ), region ) ] | |
113 else: | |
114 blocks = [ block ] | |
115 for block in blocks: | |
116 block = maf_utilities.chop_block_by_region( block, src, region ) | |
117 if block is not None: | |
118 if species is not None: | |
119 block = block.limit_to_species( species ) | |
120 block = maf_utilities.orient_block_by_region( block, src, region ) | |
121 if remove_all_gap_columns: | |
122 block.remove_all_gap_columns() | |
123 out.write( block ) | |
124 num_blocks += 1 | |
125 | |
126 #Close output MAF | |
127 out.close() | |
128 | |
129 #remove index file if created during run | |
130 maf_utilities.remove_temp_index_file( index_filename ) | |
131 | |
132 if num_blocks: | |
133 print "%i MAF blocks extracted for %i regions." % ( num_blocks, ( num_regions + 1 ) ) | |
134 elif num_regions is not None: | |
135 print "No MAF blocks could be extracted for %i regions." % ( num_regions + 1 ) | |
136 else: | |
137 print "No valid regions have been provided." | |
138 | |
139 if __name__ == "__main__": __main__() |