0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Reads a list of intervals and a maf. Produces a new maf containing the
|
|
5 blocks or parts of blocks in the original that overlapped the intervals.
|
|
6
|
|
7 If a MAF file, not UID, is provided the MAF file is indexed before being processed.
|
|
8
|
|
9 NOTE: If two intervals overlap the same block it will be written twice.
|
|
10
|
|
11 usage: %prog maf_file [options]
|
|
12 -d, --dbkey=d: Database key, ie hg17
|
|
13 -c, --chromCol=c: Column of Chr
|
|
14 -s, --startCol=s: Column of Start
|
|
15 -e, --endCol=e: Column of End
|
|
16 -S, --strandCol=S: Column of Strand
|
|
17 -t, --mafType=t: Type of MAF source to use
|
|
18 -m, --mafFile=m: Path of source MAF file, if not using cached version
|
|
19 -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version
|
|
20 -i, --interval_file=i: Input interval file
|
|
21 -o, --output_file=o: Output MAF file
|
|
22 -p, --species=p: Species to include in output
|
|
23 -P, --split_blocks_by_species=P: Split blocks by species
|
|
24 -r, --remove_all_gap_columns=r: Remove all Gap columns
|
|
25 -l, --indexLocation=l: Override default maf_index.loc file
|
|
26 -z, --mafIndexFile=z: Directory of local maf index file ( maf_index.loc or maf_pairwise.loc )
|
|
27 """
|
|
28
|
|
29 #Dan Blankenberg
|
|
30 from galaxy import eggs
|
|
31 import pkg_resources; pkg_resources.require( "bx-python" )
|
|
32 from bx.cookbook import doc_optparse
|
|
33 import bx.align.maf
|
|
34 import bx.intervals.io
|
|
35 from galaxy.tools.util import maf_utilities
|
|
36 import sys
|
|
37
|
|
38 assert sys.version_info[:2] >= ( 2, 4 )
|
|
39
|
|
40 def __main__():
|
|
41 index = index_filename = None
|
|
42 mincols = 0
|
|
43
|
|
44 #Parse Command Line
|
|
45 options, args = doc_optparse.parse( __doc__ )
|
|
46
|
|
47 if options.dbkey: dbkey = options.dbkey
|
|
48 else: dbkey = None
|
|
49 if dbkey in [None, "?"]:
|
|
50 maf_utilities.tool_fail( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." )
|
|
51
|
|
52 species = maf_utilities.parse_species_option( options.species )
|
|
53
|
|
54 if options.chromCol: chromCol = int( options.chromCol ) - 1
|
|
55 else:
|
|
56 maf_utilities.tool_fail( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." )
|
|
57
|
|
58 if options.startCol: startCol = int( options.startCol ) - 1
|
|
59 else:
|
|
60 maf_utilities.tool_fail( "Start column not set, click the pencil icon in the history item to set the metadata attributes." )
|
|
61
|
|
62 if options.endCol: endCol = int( options.endCol ) - 1
|
|
63 else:
|
|
64 maf_utilities.tool_fail( "End column not set, click the pencil icon in the history item to set the metadata attributes." )
|
|
65
|
|
66 if options.strandCol: strandCol = int( options.strandCol ) - 1
|
|
67 else:
|
|
68 strandCol = -1
|
|
69
|
|
70 if options.interval_file: interval_file = options.interval_file
|
|
71 else:
|
|
72 maf_utilities.tool_fail( "Input interval file has not been specified." )
|
|
73
|
|
74 if options.output_file: output_file = options.output_file
|
|
75 else:
|
|
76 maf_utilities.tool_fail( "Output file has not been specified." )
|
|
77
|
|
78 split_blocks_by_species = remove_all_gap_columns = False
|
|
79 if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species':
|
|
80 split_blocks_by_species = True
|
|
81 if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns':
|
|
82 remove_all_gap_columns = True
|
|
83 else:
|
|
84 remove_all_gap_columns = True
|
|
85 #Finish parsing command line
|
|
86
|
|
87 #Open indexed access to MAFs
|
|
88 if options.mafType:
|
|
89 if options.indexLocation:
|
|
90 index = maf_utilities.maf_index_by_uid( options.mafType, options.indexLocation )
|
|
91 else:
|
|
92 index = maf_utilities.maf_index_by_uid( options.mafType, options.mafIndexFile )
|
|
93 if index is None:
|
|
94 maf_utilities.tool_fail( "The MAF source specified (%s) appears to be invalid." % ( options.mafType ) )
|
|
95 elif options.mafFile:
|
|
96 index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species = [dbkey] )
|
|
97 if index is None:
|
|
98 maf_utilities.tool_fail( "Your MAF file appears to be malformed." )
|
|
99 else:
|
|
100 maf_utilities.tool_fail( "Desired source MAF type has not been specified." )
|
|
101
|
|
102 #Create MAF writter
|
|
103 out = bx.align.maf.Writer( open(output_file, "w") )
|
|
104
|
|
105 #Iterate over input regions
|
|
106 num_blocks = 0
|
|
107 num_regions = None
|
|
108 for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chromCol, start_col = startCol, end_col = endCol, strand_col = strandCol, fix_strand = True, return_header = False, return_comments = False ) ):
|
|
109 src = maf_utilities.src_merge( dbkey, region.chrom )
|
|
110 for block in index.get_as_iterator( src, region.start, region.end ):
|
|
111 if split_blocks_by_species:
|
|
112 blocks = [ new_block for new_block in maf_utilities.iter_blocks_split_by_species( block ) if maf_utilities.component_overlaps_region( new_block.get_component_by_src_start( dbkey ), region ) ]
|
|
113 else:
|
|
114 blocks = [ block ]
|
|
115 for block in blocks:
|
|
116 block = maf_utilities.chop_block_by_region( block, src, region )
|
|
117 if block is not None:
|
|
118 if species is not None:
|
|
119 block = block.limit_to_species( species )
|
|
120 block = maf_utilities.orient_block_by_region( block, src, region )
|
|
121 if remove_all_gap_columns:
|
|
122 block.remove_all_gap_columns()
|
|
123 out.write( block )
|
|
124 num_blocks += 1
|
|
125
|
|
126 #Close output MAF
|
|
127 out.close()
|
|
128
|
|
129 #remove index file if created during run
|
|
130 maf_utilities.remove_temp_index_file( index_filename )
|
|
131
|
|
132 if num_blocks:
|
|
133 print "%i MAF blocks extracted for %i regions." % ( num_blocks, ( num_regions + 1 ) )
|
|
134 elif num_regions is not None:
|
|
135 print "No MAF blocks could be extracted for %i regions." % ( num_regions + 1 )
|
|
136 else:
|
|
137 print "No valid regions have been provided."
|
|
138
|
|
139 if __name__ == "__main__": __main__()
|