Mercurial > repos > devteam > subtract
changeset 5:0145969324c4 draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
author | devteam |
---|---|
date | Thu, 22 Jun 2017 18:53:03 -0400 |
parents | 7a2a604ae9c8 |
children | 0427ca314f3d |
files | gops_subtract.py macros.xml operation_filter.py subtract.xml tool_dependencies.xml utils/__init__.pyc utils/gff_util.py utils/gff_util.pyc utils/odict.pyc |
diffstat | 9 files changed, 134 insertions(+), 131 deletions(-) [+] |
line wrap: on
line diff
--- a/gops_subtract.py Thu Feb 11 12:11:59 2016 -0500 +++ b/gops_subtract.py Thu Jun 22 18:53:03 2017 -0400 @@ -11,14 +11,18 @@ -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval """ +from __future__ import print_function + import fileinput import sys + +from bx.cookbook import doc_optparse from bx.intervals.io import GenomicInterval, NiceReaderWrapper from bx.intervals.operations.subtract import subtract -from bx.cookbook import doc_optparse from bx.tabular.io import ParseError from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped -from utils.gff_util import GFFFeature, GFFReaderWrapper, convert_bed_coords_to_gff + +from utils.gff_util import convert_bed_coords_to_gff, GFFFeature, GFFReaderWrapper assert sys.version_info[:2] >= ( 2, 4 ) @@ -81,16 +85,17 @@ out_file.write( "%s\n" % "\t".join( feature.fields ) ) else: out_file.write( "%s\n" % feature ) - except ParseError, exc: + except ParseError as exc: out_file.close() fail( "Invalid file format: %s" % str( exc ) ) out_file.close() if g1.skipped > 0: - print skipped( g1, filedesc=" of 2nd dataset" ) + print(skipped( g1, filedesc=" of 2nd dataset" )) if g2.skipped > 0: - print skipped( g2, filedesc=" of 1st dataset" ) + print(skipped( g2, filedesc=" of 1st dataset" )) + if __name__ == "__main__": main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jun 22 18:53:03 2017 -0400 @@ -0,0 +1,20 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="0.7.1">bx-python</requirement> + <requirement type="package" version="1.0.0">galaxy-ops</requirement> + </requirements> + </xml> + <token name="@SCREENCASTS@"> +----- + +**Screencasts!** + +See Galaxy Interval Operation Screencasts_ (right click to open this link in another window). + +.. _Screencasts: https://galaxyproject.org/learn/interval-operations/ + +----- + </token> +</macros>
--- a/operation_filter.py Thu Feb 11 12:11:59 2016 -0500 +++ b/operation_filter.py Thu Jun 22 18:53:03 2017 -0400 @@ -1,8 +1,7 @@ # runs after the job (and after the default post-filter) +from galaxy.jobs.handler import JOB_ERROR from galaxy.tools.parameters import DataToolParameter -from galaxy.jobs.handler import JOB_ERROR - # Older py compatibility try: set() @@ -14,7 +13,7 @@ dbkeys = set() data_param_names = set() data_params = 0 - for name, param in page_param_map.iteritems(): + for name, param in page_param_map.items(): if isinstance( param, DataToolParameter ): # for each dataset parameter if param_values.get(name, None) is not None: @@ -53,7 +52,6 @@ try: if stderr and len( stderr ) > 0: raise Exception( stderr ) - except Exception: data.blurb = JOB_ERROR data.state = JOB_ERROR
--- a/subtract.xml Thu Feb 11 12:11:59 2016 -0500 +++ b/subtract.xml Thu Jun 22 18:53:03 2017 -0400 @@ -1,110 +1,95 @@ <tool id="gops_subtract_1" name="Subtract" version="1.0.0"> - <description>the intervals of two datasets</description> - <requirements> - <requirement type="package" version="0.7.1">bx-python</requirement> - <requirement type="package" version="1.0.0">galaxy-ops</requirement> - </requirements> - <command interpreter="python">gops_subtract.py - $input1 $input2 $output - - #if isinstance( $input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__): - -1 1,4,5,7 --gff1 - #else: - -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} - #end if + <description>the intervals of two datasets</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <code file="operation_filter.py"/> + <command><![CDATA[ +python '$__tool_directory__/gops_subtract.py' +'$input1' +'$input2' +'$output' - #if isinstance( $input2.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__): - -2 1,4,5,7 --gff2 - #else: - -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol} - #end if +#if $input1.is_of_type('gff') + -1 1,4,5,7 --gff1 +#else: + -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} +#end if - -m $min $returntype - </command> - <inputs> - <param format="interval,gff" name="input2" type="data" help="Second dataset"> - <label>Subtract</label> - </param> - - <param format="interval,gff" name="input1" type="data" help="First dataset"> - <label>from</label> - </param> +#if $input2.is_of_type('gff') + -2 1,4,5,7 --gff2 +#else: + -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol} +#end if - <param name="returntype" type="select" label="Return" help="of the first dataset (see figure below)"> - <option value="">Intervals with no overlap</option> - <option value="-p">Non-overlapping pieces of intervals</option> - </param> - - <param name="min" type="integer" value="1" min="1" help="(bp)"> - <label>where minimal overlap is</label> - </param> - </inputs> - <outputs> - <data format="input" name="output" metadata_source="input1"/> - </outputs> - <code file="operation_filter.py"/> - <trackster_conf/> - <tests> - <test> - <param name="input1" value="1.bed" /> - <param name="input2" value="2.bed" /> - <param name="min" value="1" /> - <param name="returntype" value="" /> - <output name="output" file="gops-subtract.dat" /> - </test> - <test> - <param name="input1" value="1.bed" /> - <param name="input2" value="2_mod.bed" ftype="interval"/> - <param name="min" value="1" /> - <param name="returntype" value="" /> - <output name="output" file="gops_subtract_diffCols.dat" /> - </test> - <test> - <param name="input1" value="gops_subtract_bigint.bed" /> - <param name="input2" value="2.bed" /> - <param name="min" value="1" /> - <param name="returntype" value="" /> - <output name="output" file="gops-subtract.dat" /> - </test> - <test> - <param name="input1" value="1.bed" /> - <param name="input2" value="2.bed" /> - <param name="min" value="10" /> - <param name="returntype" value="Non-overlapping pieces of intervals" /> - <output name="output" file="gops-subtract-p.dat" /> - </test> - <!-- Subtract two GFF files. --> - <test> - <param name="input1" value="gops_subtract_in1.gff" /> - <param name="input2" value="gops_subtract_in2.gff" /> - <param name="min" value="1" /> - <param name="returntype" value="" /> - <output name="output" file="gops_subtract_out1.gff" /> - </test> - <!-- Subtract BED file from GFF file. --> - <test> - <param name="input1" value="gops_subtract_in1.gff" /> - <param name="input2" value="gops_subtract_in2.bed" /> - <param name="min" value="1" /> - <param name="returntype" value="" /> - <output name="output" file="gops_subtract_out1.gff" /> - </test> - </tests> - <help> - +-m $min +$returntype + ]]></command> + <inputs> + <param name="input2" type="data" format="interval,gff" label="Subtract" help="Second dataset" /> + <param name="input1" type="data" format="interval,gff" label="from" help="First dataset" /> + <param name="returntype" type="select" label="Return" help="of the first dataset (see figure below)"> + <option value="">Intervals with no overlap</option> + <option value="-p">Non-overlapping pieces of intervals</option> + </param> + <param name="min" type="integer" value="1" min="1" label="where minimal overlap is" help="(bp)" /> + </inputs> + <outputs> + <data name="output" format_source="input1" metadata_source="input1"/> + </outputs> + <tests> + <test> + <param name="input1" value="1.bed" /> + <param name="input2" value="2.bed" /> + <param name="min" value="1" /> + <param name="returntype" value="" /> + <output name="output" file="gops-subtract.dat" /> + </test> + <test> + <param name="input1" value="1.bed" /> + <param name="input2" value="2_mod.bed" ftype="interval"/> + <param name="min" value="1" /> + <param name="returntype" value="" /> + <output name="output" file="gops_subtract_diffCols.dat" /> + </test> + <test> + <param name="input1" value="gops_subtract_bigint.bed" /> + <param name="input2" value="2.bed" /> + <param name="min" value="1" /> + <param name="returntype" value="" /> + <output name="output" file="gops-subtract.dat" /> + </test> + <test> + <param name="input1" value="1.bed" /> + <param name="input2" value="2.bed" /> + <param name="min" value="10" /> + <param name="returntype" value="Non-overlapping pieces of intervals" /> + <output name="output" file="gops-subtract-p.dat" /> + </test> + <!-- Subtract two GFF files. --> + <test> + <param name="input1" value="gops_subtract_in1.gff" /> + <param name="input2" value="gops_subtract_in2.gff" /> + <param name="min" value="1" /> + <param name="returntype" value="" /> + <output name="output" file="gops_subtract_out1.gff" /> + </test> + <!-- Subtract BED file from GFF file. --> + <test> + <param name="input1" value="gops_subtract_in1.gff" /> + <param name="input2" value="gops_subtract_in2.bed" /> + <param name="min" value="1" /> + <param name="returntype" value="" /> + <output name="output" file="gops_subtract_out1.gff" /> + </test> + </tests> + <help><![CDATA[ .. class:: infomark **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns. ------ - -**Screencasts!** - -See Galaxy Interval Operation Screencasts_ (right click to open this link in another window). - -.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations - ------ +@SCREENCASTS@ **Syntax** @@ -123,6 +108,5 @@ Non-overlapping pieces of intervals: .. image:: gops_subtractOverlappingPieces.gif - -</help> + ]]></help> </tool>
--- a/tool_dependencies.xml Thu Feb 11 12:11:59 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="bx-python" version="0.7.1"> - <repository changeset_revision="2d0c08728bca" name="package_bx_python_0_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="galaxy-ops" version="1.0.0"> - <repository changeset_revision="eef263ff9b95" name="package_galaxy_ops_1_0_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>
--- a/utils/gff_util.py Thu Feb 11 12:11:59 2016 -0500 +++ b/utils/gff_util.py Thu Jun 22 18:53:03 2017 -0400 @@ -1,11 +1,12 @@ """ Provides utilities for working with GFF files. """ +import copy -import copy from bx.intervals.io import GenomicInterval, GenomicIntervalReader, MissingFieldError, NiceReaderWrapper -from bx.tabular.io import Header, Comment, ParseError -from utils.odict import odict +from bx.tabular.io import Comment, Header, ParseError + +from .odict import odict class GFFInterval( GenomicInterval ): @@ -144,7 +145,7 @@ self.default_strand, fix_strand=self.fix_strand ) return interval - def next( self ): + def __next__( self ): """ Returns next GFFFeature. """ # @@ -177,10 +178,10 @@ while not self.seed_interval: try: self.seed_interval = GenomicIntervalReader.next( self ) - except ParseError, e: + except ParseError as e: handle_parse_error( e ) # TODO: When no longer supporting python 2.4 use finally: - #finally: + # finally: raw_size += len( self.current_line ) # If header or comment, clear seed interval and return it with its size. @@ -205,19 +206,19 @@ try: interval = GenomicIntervalReader.next( self ) raw_size += len( self.current_line ) - except StopIteration, e: + except StopIteration as e: # No more intervals to read, but last feature needs to be # returned. interval = None raw_size += len( self.current_line ) break - except ParseError, e: + except ParseError as e: handle_parse_error( e ) raw_size += len( self.current_line ) continue # TODO: When no longer supporting python 2.4 use finally: - #finally: - #raw_size += len( self.current_line ) + # finally: + # raw_size += len( self.current_line ) # Ignore comments. if isinstance( interval, Comment ): @@ -263,6 +264,7 @@ convert_gff_coords_to_bed( feature ) return feature + next = __next__ # This line should be removed once the bx-python port to Python3 is finished def convert_bed_coords_to_gff( interval ): @@ -374,7 +376,9 @@ # -- Get function that generates line/feature key. -- - get_transcript_id = lambda fields: parse_gff_attributes( fields[8] )[ 'transcript_id' ] + def get_transcript_id(fields): + return parse_gff_attributes( fields[8] )[ 'transcript_id' ] + if strict: # Strict GTF parsing uses transcript_id only to group lines into feature. key_fn = get_transcript_id @@ -382,7 +386,8 @@ # Use lenient parsing where chromosome + transcript_id is the key. This allows # transcripts with same ID on different chromosomes; this occurs in some popular # datasources, such as RefGenes in UCSC. - key_fn = lambda fields: fields[0] + '_' + get_transcript_id( fields ) + def key_fn(fields): + return fields[0] + '_' + get_transcript_id( fields ) # Aggregate intervals by transcript_id and collect comments. feature_intervals = odict()