Mercurial > repos > dave > interval2maf
view interval2maf.py @ 0:d38f317fa9d4 draft default tip
Uploaded
author | dave |
---|---|
date | Fri, 10 Jul 2020 16:05:40 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python """ Reads a list of intervals and a maf. Produces a new maf containing the blocks or parts of blocks in the original that overlapped the intervals. If a MAF file, not UID, is provided the MAF file is indexed before being processed. NOTE: If two intervals overlap the same block it will be written twice. usage: %prog maf_file [options] -d, --dbkey=d: Database key, ie hg17 -c, --chromCol=c: Column of Chr -s, --startCol=s: Column of Start -e, --endCol=e: Column of End -S, --strandCol=S: Column of Strand -t, --mafType=t: Type of MAF source to use -m, --mafFile=m: Path of source MAF file, if not using cached version -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version -i, --interval_file=i: Input interval file -o, --output_file=o: Output MAF file -p, --species=p: Species to include in output -P, --split_blocks_by_species=P: Split blocks by species -r, --remove_all_gap_columns=r: Remove all Gap columns -l, --indexLocation=l: Override default maf_index.loc file -z, --mafIndexFile=z: Directory of local maf index file ( maf_index.loc or maf_pairwise.loc ) """ # Dan Blankenberg from __future__ import print_function import bx.align.maf import bx.intervals.io from bx.cookbook import doc_optparse from galaxy.tools.util import maf_utilities def __main__(): index = index_filename = None # Parse Command Line options, args = doc_optparse.parse(__doc__) if options.dbkey: dbkey = options.dbkey else: dbkey = None if dbkey in [None, "?"]: maf_utilities.tool_fail("You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file.") species = maf_utilities.parse_species_option(options.species) if options.chromCol: chromCol = int(options.chromCol) - 1 else: maf_utilities.tool_fail("Chromosome column not set, click the pencil icon in the history item to set the metadata attributes.") if options.startCol: startCol = int(options.startCol) - 1 else: maf_utilities.tool_fail("Start column not set, click the pencil icon in the history item to set the metadata attributes.") if options.endCol: endCol = int(options.endCol) - 1 else: maf_utilities.tool_fail("End column not set, click the pencil icon in the history item to set the metadata attributes.") if options.strandCol: strandCol = int(options.strandCol) - 1 else: strandCol = -1 if options.interval_file: interval_file = options.interval_file else: maf_utilities.tool_fail("Input interval file has not been specified.") if options.output_file: output_file = options.output_file else: maf_utilities.tool_fail("Output file has not been specified.") split_blocks_by_species = remove_all_gap_columns = False if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species': split_blocks_by_species = True if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns': remove_all_gap_columns = True else: remove_all_gap_columns = True # Finish parsing command line # Open indexed access to MAFs if options.mafType: if options.indexLocation: index = maf_utilities.maf_index_by_uid(options.mafType, options.indexLocation) else: index = maf_utilities.maf_index_by_uid(options.mafType, options.mafIndexFile) if index is None: maf_utilities.tool_fail("The MAF source specified (%s) appears to be invalid." % (options.mafType)) elif options.mafFile: index, index_filename = maf_utilities.open_or_build_maf_index(options.mafFile, options.mafIndex, species=[dbkey]) if index is None: maf_utilities.tool_fail("Your MAF file appears to be malformed.") else: maf_utilities.tool_fail("Desired source MAF type has not been specified.") # Create MAF writter out = bx.align.maf.Writer(open(output_file, "w")) # Iterate over input regions num_blocks = 0 num_regions = None for num_regions, region in enumerate(bx.intervals.io.NiceReaderWrapper(open(interval_file, 'r'), chrom_col=chromCol, start_col=startCol, end_col=endCol, strand_col=strandCol, fix_strand=True, return_header=False, return_comments=False)): src = maf_utilities.src_merge(dbkey, region.chrom) for block in index.get_as_iterator(src, region.start, region.end): if split_blocks_by_species: blocks = [new_block for new_block in maf_utilities.iter_blocks_split_by_species(block) if maf_utilities.component_overlaps_region(new_block.get_component_by_src_start(dbkey), region)] else: blocks = [block] for block in blocks: block = maf_utilities.chop_block_by_region(block, src, region) if block is not None: if species is not None: block = block.limit_to_species(species) block = maf_utilities.orient_block_by_region(block, src, region) if remove_all_gap_columns: block.remove_all_gap_columns() out.write(block) num_blocks += 1 # Close output MAF out.close() # remove index file if created during run maf_utilities.remove_temp_index_file(index_filename) if num_blocks: print("%i MAF blocks extracted for %i regions." % (num_blocks, (num_regions + 1))) elif num_regions is not None: print("No MAF blocks could be extracted for %i regions." % (num_regions + 1)) else: print("No valid regions have been provided.") if __name__ == "__main__": __main__()