Mercurial > repos > iuc > bedtools
changeset 0:b8348686a0b9 draft
Imported from capsule None
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Bed12ToBed6.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,28 @@ +<tool id="bedtools_bed12tobed6" name="Convert from BED12 to BED6" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bed12ToBed16 + -i '$input' + > '$output' + </command> + <inputs> + <param format="bed" name="input" type="data" label="Convert the following BED12 file to BED6"/> + </inputs> + <outputs> + <data format="bed" name="output" metadata_source="input" label="${input.name} (as BED6)"/> + </outputs> + <help> + +**What it does** + +bed12ToBed6 is a convenience tool that converts BED features in BED12 (a.k.a. “blocked” BED features such as genes) to discrete BED6 features. For example, in the case of a gene with six exons, bed12ToBed6 would create six separate BED6 features (i.e., one for each exon). + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BedToBam.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,34 @@ +<tool id="bedtools_bedtobam" name="Convert from BED to BAM" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools bedtobam + $ubam + $bed12 + -mapq $mapq + -i '$input' + > '$output' + </command> + <inputs> + <param format="bed" name="input" type="data" label="Convert the following BED file to BAM"/> + <param name="bed12" type="boolean" label="Indicate that the input BED file is in BED12 (a.k.a “blocked” BED) format" truevalue="-bed12" falsevalue="" checked="false" help="If Selected, bedToBam will convert blocked BED features (e.g., gene annotaions) into “spliced” BAM alignments by creating an appropriate CIGAR string.."/> + <param name="mapq" type="integer" label="Set a mapping quality (SAM MAPQ field) value for all BED entries" value="255" /> + <param name="ubam" type="boolean" label="Write uncompressed BAM output." truevalue="-ubam" falsevalue="" checked="false"/> + </inputs> + <outputs> + <data format="bam" name="output" metadata_source="input" label="${input.name} (as BAM)"/> + </outputs> + <help> + +**What it does** + +bedToBam converts features in a feature file to BAM format. This is useful as an efficient means of storing large genome annotations in a compact, indexed format for visualization purposes. + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annotateBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,66 @@ +<tool id="bedtools_annotatebed" name="AnnotateBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools annotate + -i $inputA + -files + #for $bed in $names.beds: + $bed.input + #end for + + #if $names.names_select == 'yes': + -names + #for $bed in $names.beds: + $bed.inputName + #end for + #end if + $strand + $counts + $both + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file" /> + <!-- Additional files, if the user needs more --> + <conditional name="names"> + <param name="names_select" type="select" label="Specify names for each file"> + <option value="no" selected="True">No</option> + <option value="yes">Yes</option> + </param> + <when value="yes"> + <repeat name="beds" title="Add BED files and names" > + <param name="input" format="bed" type="data" label="BED file" /> + <param name="inputName" type="text" label="Name of the file" /> + </repeat> + </when> + <when value="no"> + <repeat name="beds" title="Add BED files" > + <param name="input" format="bed" type="data" label="BED file" /> + </repeat> + </when> + </conditional> + + <expand macro="strand2" /> + <param name="counts" type="boolean" checked="false" truevalue="-counts" falsevalue="" label="Report the count of features followed by the % coverage for each annotation file. Default is to report solely the fraction of -i covered by each file." /> + <param name="both" type="boolean" checked="false" truevalue="-both" falsevalue="" label="Report the count of features followed by the % coverage for each annotation file. Default is to report solely the fraction of the input file covered by each file." /> + </inputs> + + <outputs> + <data format="bed" name="output" label="" /> + </outputs> + <help> + +**What it does** + +bedtools annotate, well, annotates one BED/VCF/GFF file with the coverage and number of overlaps observed from multiple other BED/VCF/GFF files. In this way, it allows one to ask to what degree one feature coincides with multiple other feature types with a single command. + +@REFERENCES@ + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamToBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,55 @@ +<tool id="bedtools_bamtobed" name="Convert from BAM to BED" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools bamtobed + $option + $ed_score + -i '$input' + > '$output' + #if str($tag): + -tag $tag + #end if + </command> + <inputs> + <param format="bam" name="input" type="data" label="Convert the following BAM file to BED"/> + <param name="option" type="select" label="What type of BED output would you like"> + <option value="">Create a 6-column BED file.</option> + <option value="-bed12">Create a full, 12-column "blocked" BED file.</option> + <option value="-bedpe">Create a paired-end, BEDPE format.</option> + </param> + <param name="split" type="boolean" label="Report each portion of a split BAM alignment" truevalue="-split" falsevalue="" checked="false" help="(i.e., having an 'N' CIGAR operation) as a distinct BED intervals."/> + <param name="ed_score" type="boolean" label="Use alignment's edit-distance for BED score" truevalue="-ed" falsevalue="" checked="false"/> + <param name="tag" type="text" optional="true" label="Use other NUMERIC BAM alignment tag as the BED score"/> + </inputs> + <outputs> + <data format="bed" name="output" metadata_source="input" label="${input.name} (as BED)"/> + </outputs> +<help> + +**What it does** + +bedtools bamtobed is a conversion utility that converts sequence alignments in BAM format into BED, BED12, and/or BEDPE records. + +.. class:: infomark + +The "Report spliced BAM alignment..." option breaks BAM alignments with the "N" (splice) operator into distinct BED entries. For example, using this option on a CIGAR such as 50M1000N50M would, by default, produce a single BED record that spans 1100bp. However, using this option, it would create two separate BED records that are each 50bp in size and are separated by 1000bp (the size of the N operation). This is important for RNA-seq and structural variation experiments. + + +.. class:: warningmark + +If using a custom BAM alignment TAG as the BED score, note that this must be a numeric tag (e.g., type "i" as in NM:i:0). + +.. class:: warningmark + +If creating a BEDPE output (see output formatting options), the BAM file should be sorted by query name. + +@REFERENCES@ + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamToFastq.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,33 @@ +<tool id="bedtools_bamtofastq" name="Convert from BAM to FastQ" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools bamtofastq + $tags + $fq2 + -i '$input' + -fq '$output' + </command> + <inputs> + <param format="bam" name="input" type="data" label="Convert the following BAM file to FASTQ"/> + <param name="tags" type="boolean" truevalue="-tags" falsevalue="" selected="False" label="Create FASTQ based on the mate info in the BAM R2 and Q2 tags."/> + <param name="fq2" type="boolean" truevalue="-fq2" falsevalue="" selected="False" label="ASTQ for second end. Used if BAM contains paired-end data. BAM should be sorted by query name if creating paired FASTQ with this option."/> + </inputs> + <outputs> + <data format="fastq" name="output" metadata_source="input" label="${input.name} (as FASTQ)"/> + </outputs> +<help> + +**What it does** + +bedtools bamtofastq is a conversion utility for extracting FASTQ records from sequence alignments in BAM format. + +@REFERENCES@ + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bedpeToBam.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,38 @@ +<tool id="bedtools_bedpetobam" name="Convert from BEDPE to BAM" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools bedpetobam + $ubam + -mapq $mapq + -i '$input' + -g $genome + > '$output' + </command> + <inputs> + <param format="bed,gff,vcf" name="input" type="data" label="BED/VCF/GFF file"/> + <expand macro="genome" /> + <param name="mapq" type="integer" label="Set a mapping quality (SAM MAPQ field) value for all BED entries" value="255" /> + <param name="ubam" type="boolean" label="Write uncompressed BAM output." truevalue="-ubam" falsevalue="" checked="false"/> + </inputs> + <outputs> + <data format="bam" name="output" metadata_source="input" label="${input.name} (as BAM)"/> + </outputs> + <help> + +**What it does** + +Converts feature records to BAM format. + +.. class:: warningmark + +BED files must be at least BED4 to create BAM (needs name field). + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/closestBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,42 @@ +<tool id="bedtools_closestbed" name="ClosestBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + closestBed + $strand + $addition + -t $ties + -a $inputA + -b $inputB + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="overlap intervals in this BED/VCF/GFF file?"/> + + <param name="ties" type="select" label="How ties for closest feature should be handled" help="This occurs when two features in B have exactly the same overlap with a feature in A."> + <option value="all" selected="True">all - Report all ties (default)</option> + <option value="first">first - Report the first tie that occurred in the B file</option> + <option value="last">last - Report the last tie that occurred in the B file</option> + </param> + + <param name="strand" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Force strandedness" help="That is, find the closest feature in B overlaps A on the same strand. By default, this is disabled" /> + <param name="addition" type="boolean" checked="false" truevalue="-d" falsevalue="" label="In addition to the closest feature in B, report its distance to A as an extra column. The reported distance for overlapping features will be 0" /> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label="Intersection of ${inputA.name} and ${inputB.name}"/> + </outputs> + <help> + +**What it does** + +Similar to intersectBed, closestBed searches for overlapping features in A and B. In the event that no feature in B overlaps the current feature in A, closestBed will report the closest (that is, least genomic distance from the start or end of A) feature in B. For example, one might want to find which is the closest gene to a significant GWAS polymorphism. Note that closestBed will report an overlapping feature as the closest—that is, it does not restrict to closest non-overlapping feature. + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clusterBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,41 @@ +<tool id="bedtools_clusterbed" name="ClusterBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools cluster + $strand + -d $distance + -i $inputA + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param name="strand" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Force strandedness." + help="That is, only cluster features that are the same strand. By default, this is disabled." /> + <param name="distance" type="integer" value="0" + label="Maximum distance between features allowed for features to be clustered" + help="Default is 0. That is, overlapping and/or book-ended features are clustered." /> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label=""/> + </outputs> + <help> + +**What it does** + +Similar to merge, cluster report each set of overlapping or “book-ended” features in an interval file. In contrast to merge, cluster does not flatten the cluster of intervals into a new meta-interval; instead, it assigns an unique cluster ID to each record in each cluster. This is useful for having fine control over how sets of overlapping intervals in a single interval file are combined. + +.. image:: $PATH_TO_IMAGES/cluster-glyph.png + +.. class:: warningmark + +bedtools cluster requires that you presort your data by chromosome and then by start position (e.g., sort -k1,1 -k2,2n in.bed > in.sorted.bed for BED files). + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/complementBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,32 @@ +<tool id="bedtools_complementbed" name="ComplementBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + complementBed + -d $distance + -g genome + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <expand macro="genome" /> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label="Complemen of ${inputA.name}"/> + </outputs> + <help> + +**What it does** + +bedtools complement returns all intervals in a genome that are not covered by at least one interval in the input BED/GFF/VCF file. + +.. image:: $PATH_TO_IMAGES/complement-glyph.png + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coverageBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,64 @@ +<tool id="bedtools_coveragebed" name="Compute both the depth and breadth of coverage" version="@WRAPPER_VERSION@.1"> + <description>of features in file A across the features in file B (coverageBed)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + coverageBed + #if $inputA.ext == "bam" + -abam '$inputA' + #else + -a '$inputA' + #end if + -b '$inputB' + $d + $hist + $split + $strandedness + | sort -k1,1 -k2,2n + > '$output' + </command> + <inputs> + <param format="bed,bam,gff,gg3,vcf" name="inputA" type="data" label="Count how many intervals in this BED/VCF/GFF/BAM file (source)"> + <validator type="unspecified_build" /> + </param> + <param format="bed,gff,gff3,vcf" name="inputB" type="data" label="overlap the intervals in this BED file (target)"> + <validator type="unspecified_build" /> + </param> + <param name="split" type="boolean" checked="false" truevalue="-split" falsevalue="" label="Treat split/spliced BAM or BED12 entries as distinct BED intervals when computing coverage" + help="If set, the coverage will be calculated based the spliced intervals only. For BAM files, this inspects the CIGAR N operation to infer the blocks for computing coverage. For BED12 files, this inspects the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12). If this option is not set, coverage will be calculated based on the interval's START/END coordinates, and would include introns in the case of RNAseq data. (-split)" /> + <param name="strandedness" type="boolean" label="Force strandedness" truevalue="-s" falsevalue="" checked="false" + help="That is, only features in A are only counted towards coverage in B if they are the same strand. (-s)"/> + <param name="d" type="boolean" checked="false" truevalue="-d" falsevalue="" label="Report the depth at each position in each B feature" + help="Positions reported are one based. Each position and depth follow the complete B feature. (-d)" /> + <param name="hist" type="boolean" checked="false" truevalue="-hist" falsevalue="" label="Report a histogram of coverage for each feature in B as well as a summary histogram for all features in B" + help="Additonal columns after each feature in B: 1) depth 2) # bases at depth 3) size of B 4) % of B at depth. (-hist)" /> + </inputs> + + <outputs> + <data format="bed" name="output" metadata_source="inputB" label="count of overlaps in ${inputA.name} on ${inputB.name}"/> + </outputs> + <help> + +**What it does** + +coverageBed_ computes both the depth and breadth of coverage of features in +file A across the features in file B. For example, coverageBed can compute the coverage of sequence alignments +(file A) across 1 kilobase (arbitrary) windows (file B) tiling a genome of interest. +One advantage that coverageBed offers is that it not only counts the number of features that +overlap an interval in file B, it also computes the fraction of bases in B interval that were overlapped by one or more features. +Thus, coverageBed also computes the breadth of coverage for each interval in B. + +.. _coverageBed: http://bedtools.readthedocs.org/en/latest/content/tools/coverage.html + +.. class:: infomark + +The output file will be comprised of each interval from your original target BED file, plus an additional column indicating the number of intervals in your source file that overlapped that target interval. + +@REFERENCES@ + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/expandBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,30 @@ +<tool id="bedtools_expandbed" name="ExpandBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools expand + -c $cols + -i $inputA + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param name="cols" type="text" value="" label="Specify the column(s) (comma separated) that should be summarized" /> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label=""/> + </outputs> + <help> + +**What it does** + +Replicate lines in a file based on columns of comma-separated values. + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flankbed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,49 @@ +<tool id="bedtools_flankbed" name="FlankBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + flankBed + $pct + $strand + -g $genome + -i $inputA + + #if $addition.addition_select == 'b': + -b $addition.b + #else: + -l $addition.l + -r $addition.r + #end if + + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <expand macro="genome" /> + <param name="pct" type="boolean" checked="false" truevalue="-pct" falsevalue="" label="Define -l and -r as a fraction of the feature’s length" help="E.g. if used on a 1000bp feature, -l 0.50, will add 500 bp “upstream”" /> + <param name="strand" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Define -l and -r based on strand" help="For example. if used, -l 500 for a negative-stranded feature, it will add 500 bp to the end coordinate" /> + <expand macro="addition" /> + </inputs> + <outputs> + <data format="bed" name="output" label=""/> + </outputs> + <help> + +**What it does** + +bedtools flank will optionally create flanking intervals whose size is user-specified fraction of the original interval. + +.. image:: $PATH_TO_IMAGES/flank-glyph.png + +.. class:: warningmark + +In order to prevent creating intervals that violate chromosome boundaries, bedtools flank requires a genome file defining the length of each chromosome or contig. + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genomeCoverageBed_bedgraph.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,106 @@ +<tool id="bedtools_genomecoveragebed_bedgraph" name="Create a BedGraph of genome coverage" version="@WRAPPER_VERSION@.0"> + <description> + </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + genomeCoverageBed + #if $input.ext == "bam" + -ibam '$input' + #else + -i '$input' + -g ${chromInfo} + #end if + + #if str($scale): + -scale $scale + #end if + + -bg + $zero_regions + $split + $strand + > '$output' + </command> + <inputs> + <param format="bed,bam" name="input" type="data" label="The BAM or BED file from which coverage should be computed"> + <validator type="unspecified_build" /> + </param> + + <param name="zero_regions" type="boolean" checked="true" truevalue="-bga" falsevalue="" label="Report regions with zero coverage" help="If set, regions without any coverage will also be reported." /> + + <param name="split" type="boolean" checked="false" truevalue="-split" falsevalue="" label="Treat split/spliced BAM or BED12 entries as distinct BED intervals when computing coverage." help="If set, the coverage will be calculated based the spliced intervals only. For BAM files, this inspects the CIGAR N operation to infer the blocks for computing coverage. For BED12 files, this inspects the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12). If this option is not set, coverage will be calculated based on the interval's START/END coordinates, and would include introns in the case of RNAseq data." /> + + <param name="strand" type="select" label="Calculate coverage based on"> + <option value="">both strands combined</option> + <option value="-strand +">positive strand only</option> + <option value="-strand -">negative strand only</option> + </param> + + <param name="scale" type="float" optional="true" label="Scale the coverage by a constant factor" help="Each BEDGRAPH coverage value is multiplied by this factor before being reported. Useful for normalizing coverage by, e.g., reads per million (RPM)"/> + </inputs> + <outputs> + <data format="bedgraph" name="output" metadata_source="input" label="${input.name} (Genome Coverage BedGraph)" /> + </outputs> + <help> + + +**What it does** + +This tool calculates the genome-wide coverage of intervals defined in a BAM or BED file and reports them in BedGraph format. + +.. class:: warningmark + +The input BED or BAM file must be sorted by chromosome name (but doesn't necessarily have to be sorted by start position). + +----- + +**Example 1** + +Input (BED format)- +Overlapping, un-sorted intervals:: + + chr1 140 176 + chr1 100 130 + chr1 120 147 + + +Output (BedGraph format)- +Sorted, non-overlapping intervals, with coverage value on the 4th column:: + + chr1 100 120 1 + chr1 120 130 2 + chr1 130 140 1 + chr1 140 147 2 + chr1 147 176 1 + +----- + +**Example 2 - with ZERO-Regions selected (assuming hg19)** + +Input (BED format)- +Overlapping, un-sorted intervals:: + + chr1 140 176 + chr1 100 130 + chr1 120 147 + + +Output (BedGraph format)- +Sorted, non-overlapping intervals, with coverage value on the 4th column:: + + chr1 0 100 0 + chr1 100 120 1 + chr1 120 130 2 + chr1 130 140 1 + chr1 140 147 2 + chr1 147 176 1 + chr1 176 249250621 0 + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genomeCoverageBed_histogram.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,68 @@ +<tool id="bedtools_genomecoveragebed_histogram" name="Create a histogram of genome coverage" version="@WRAPPER_VERSION@.0"> + <description> + </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + genomeCoverageBed + #if $input.ext == "bam" + -ibam '$input' + #else + -i '$input' + -g ${chromInfo} + #end if + #if str($max): + -max $max + #end if + > '$output' + </command> + + <inputs> + <param format="bed,bam" name="input" type="data" label="The BAM or BED file from which coverage should be computed"></param> + <param name="max" type="text" optional="true" label="Max depth" help="Combine all positions with a depth >= max into a single bin in the histogram."/> + </inputs> + + <outputs> + <data format="tabular" name="output" metadata_source="input" label="${input.name} (Genome Coverage Histogram)" /> + </outputs> + +<help> +**What it does** + +This tool calculates a histogram of genome coverage depth based on mapped reads in BAM format or intervals in BED format. + + +------ + + +.. class:: infomark + +The output file will contain five columns: + + * 1. Chromosome name (or 'genome' for whole-genome coverage) + * 2. Coverage depth + * 3. The number of bases on chromosome (or genome) with depth equal to column 2. + * 4. The size of chromosome (or entire genome) in base pairs + * 5. The fraction of bases on chromosome (or entire genome) with depth equal to column 2. + +**Example Output**:: + + chr2L 0 1379895 23011544 0.0599653 + chr2L 1 837250 23011544 0.0363839 + chr2L 2 904442 23011544 0.0393038 + chr2L 3 913723 23011544 0.0397072 + chr2L 4 952166 23011544 0.0413778 + chr2L 5 967763 23011544 0.0420555 + chr2L 6 986331 23011544 0.0428624 + chr2L 7 998244 23011544 0.0433801 + chr2L 8 995791 23011544 0.0432735 + chr2L 9 996398 23011544 0.0432999 + + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/getfastaBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,45 @@ +<tool id="bedtools_getfastabed" name="GetFastaBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools getfasta + $name + $tab + $strand + $split + -fi $fasta + -bed $inputA + -fo $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file" /> + <param format="fasta" name="fasta" type="data" label="Fasta file" /> + <param name="name" type="boolean" checked="false" truevalue="-name" falsevalue="" label="Use the “name” column in the BED file for the FASTA headers in the output FASTA file" /> + <param name="tab" type="boolean" checked="false" truevalue="-tab" falsevalue="" label="Report extract sequences in a tab-delimited format instead of in FASTA format" /> + <param name="strand" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Force strandedness" help="If the feature occupies the antisense strand, the sequence will be reverse complemented." /> + <param name="split" type="boolean" checked="false" truevalue="-split" falsevalue="" label="Given BED12 input, extract and concatenate the sequences from the BED 'blocks' (e.g., exons)" /> + </inputs> + <outputs> + <data format="fasta" name="output" /> + </outputs> + <help> +**What it does** + +bedtools getfasta will extract the sequence defined by the coordinates in a BED interval and create a new FASTA entry in the output file for each extracted sequence. By default, the FASTA header for each extracted sequence will be formatted as follows: “<chrom>:<start>-<end>”. + +.. image:: $PATH_TO_IMAGES/getfasta-glyph.png + +.. class:: warningmark + +1. The headers in the input FASTA file must exactly match the chromosome column in the BED file. + +2. You can use the UNIX fold command to set the line width of the FASTA output. For example, fold -w 60 will make each line of the FASTA file have at most 60 nucleotides for easy viewing. + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/groupbyBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,44 @@ +<tool id="bedtools_groupbybed" name="GroupByBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools groupby + -c $cols + -g $group + -o $operation + -i $inputA + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param name="cols" type="text" value="" label="Specify the column(s) (comma separated) that should be summarized" /> + <param name="group" type="text" value="1,2,3" label="Specifies which column(s) (1-based) should be used to group the input. Columns may be comma-separated with each column must be explicitly listed. Or, ranges (e.g. 1-4) are also allowed." /> + <param name="operation" type="select" label="Specify the operation"> + <option value="sum" selected="True">Sum - numeric only</option> + <option value="stdev">Stdev - numeric only</option> + <option value="sstdev">Sstdev - numeric only</option> + <option value="freqasc">Freqasc - print a comma separated list of values observed and the number of times they were observed. Reported in ascending order of frequency*</option> + <option value="freqdesc">Freqdesc - - print a comma separated list of values observed and the number of times they were observed. Reported in descending order of frequency*</option> + <option value="first">First - numeric or text</option> + <option value="last">Last - numeric or text</option> + <expand macro="math_options" /> + <expand macro="additional_math_options" /> + </param> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label=""/> + </outputs> + <help> + +**What it does** + +Replicate lines in a file based on columns of comma-separated values. + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/intersectBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,86 @@ +<tool id="bedtools_intersectbed" name="Intersect interval files" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + intersectBed + #if $inputA.ext == "bam": + -abam $inputA + #else: + -a $inputA + #end if + + -b $inputB + $split + $strand + #if str($fraction): + -f $fraction + #end if + $reciprocal + $invert + $once + $header + $overlap_mode + > $output + </command> + <inputs> + <param format="bed,bam,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF/BAM file"/> + <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="overlap intervals in this BED file?"/> + + <param name="strand" type="select" label="Calculate the intersection based on strandedness?"> + <option value="" selected="True">Overlaps on either strand</option> + <option value="-s">Only overlaps occurring on the **same** strand.</option> + <option value="-S">Only overlaps occurring on the **opposite** strand.</option> + </param> + + <param name="overlap_mode" type="select" label="What should be written to the output file?"> + <option value="-wa" selected="True">Write the original entry in A for each overlap.</option> + <option value="-wb">Write the original entry in B for each overlap. Useful for knowing what A overlaps. Restricted by the fraction- and reciprocal option.</option> + <option value="-wo">Write the original A and B entries plus the number of base pairs of overlap between the two features. Only A features with overlap are reported. Restricted by the fraction- and reciprocal option.</option> + <option value="-wao">Write the original A and B entries plus the number of base pairs of overlap between the two features. However, A features w/o overlap are also reported with a NULL B feature and overlap = 0. Restricted by the fraction- and reciprocal option.</option> + <option value="-loj">Perform a "left outer join". That is, for each feature in A report each overlap with B. If no overlaps are found, report a NULL feature for B.</option> + </param> + + <param name="split" type="boolean" checked="true" truevalue="-split" falsevalue="" label="Treat split/spliced BAM or BED12 entries as distinct BED intervals when computing coverage." help="If set, the coverage will be calculated based the spliced intervals only. For BAM files, this inspects the CIGAR N operation to infer the blocks for computing coverage. For BED12 files, this inspects the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12). If this option is not set, coverage will be calculated based on the interval's START/END coordinates, and would include introns in the case of RNAseq data." /> + <!-- -f --> + <param name="fraction" type="text" optional="true" label="Minimum overlap required as a fraction of the BAM alignment" help="Alignments are only retained if the overlap with the an interval in the BED file comprises at least this fraction of the BAM alignment's length. For example, to require that the overlap affects 50% of the BAM alignment, use 0.50. (-f)"/> + <!-- -r --> + <param name="reciprocal" type="boolean" checked="false" truevalue="-r" falsevalue="" label="Require reciprocal overlap." help="If set, the overlap between the BAM alignment and the BED interval must affect the above fraction of both the alignment and the BED interval. (-r)" /> + <!-- -v --> + <param name="invert" type="boolean" checked="false" truevalue="-v" falsevalue="" label="Report only those alignments that **do not** overlap the BED file. (-v)"/> + <!-- -u --> + <param name="once" type="boolean" checked="false" truevalue="-u" falsevalue="" label="Write the original A entry _once_ if _any_ overlaps found in B." help="Just report the fact >=1 hit was found. (-u)" /> + <!-- -c --> + <param name="count" type="boolean" checked="false" truevalue="-c" falsevalue="" label="For each entry in A, report the number of overlaps with B." help="Reports 0 for A entries that have no overlap with B. (-c)" /> + <!-- header --> + <param name="header" type="boolean" checked="false" truevalue="-header" falsevalue="" label="Print the header from the A file prior to results." /> + <!-- header --> + <param name="header" type="boolean" checked="false" truevalue="-header" falsevalue="" label="Print the header from the A file prior to results." /> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label="Intersection of ${inputA.name} and ${inputB.name}"/> + </outputs> + <help> + +**What it does** + +By far, the most common question asked of two sets of genomic features is whether or not any of the features in the two sets “overlap” with one another. This is known as feature intersection. bedtools intersect allows one to screen for overlaps between two sets of genomic features. Moreover, it allows one to have fine control as to how the intersections are reported. bedtools intersect works with both BED/GFF/VCF and BAM files as input. + +.. image:: $PATH_TO_IMAGES/intersect-glyph.png + +.. class:: infomark + +Note that each BAM alignment is treated individually. Therefore, if one end of a paired-end alignment overlaps an interval in the BED file, yet the other end does not, the output file will only include the overlapping end. + +.. class:: infomark + +Note that a BAM alignment will be sent to the output file **once** even if it overlaps more than one interval in the BED file. + +@REFERENCES@ + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jaccardBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,48 @@ +<tool id="bedtools_jaccard" name="JaccardBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools jaccard + + $reciprocal + $strand + $split + -f $overlap + -a $inputA + -b $inputB + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param format="bed,vcf,gff,gff3" name="inputB" type="data" label="BED/VCF/GFF file"/> + <param name="overlap" type="float" value="0.000000001" label="Minimum overlap required as a fraction of A" /> + + <param name="reciprocal" type="boolean" checked="false" truevalue="-f" falsevalue="" label="Require that the fraction of overlap be reciprocal for A and B. In other words, if -f is 0.90 and -r is used, this requires that B overlap at least 90% of A and that A also overlaps at least 90% of B" /> + <param name="tab" type="boolean" checked="false" truevalue="-tab" falsevalue="" label="Report extract sequences in a tab-delimited format instead of in FASTA format." /> + <param name="strand" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Force strandedness" help="That is, only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand" /> + <expand macro="strand2" /> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label="Intersection of ${inputA.name} and ${inputB.name}" /> + </outputs> + <help> + +**What it does** + +By default, bedtools jaccard reports the length of the intersection, the length of the union (minus the intersection), the final Jaccard statistic reflecting the similarity of the two sets, as well as the number of intersections. +Whereas the bedtools intersect tool enumerates each an every intersection between two sets of genomic intervals, one often needs a single statistic reflecting the similarity of the two sets based on the intersections between them. The Jaccard statistic is used in set theory to represent the ratio of the intersection of two sets to the union of the two sets. Similarly, Favorov et al [1] reported the use of the Jaccard statistic for genome intervals: specifically, it measures the ratio of the number of intersecting base pairs between two sets to the number of base pairs in the union of the two sets. The bedtools jaccard tool implements this statistic, yet modifies the statistic such that the length of the intersection is subtracted from the length of the union. As a result, the final statistic ranges from 0.0 to 1.0, where 0.0 represents no overlap and 1.0 represent complete overlap. + +.. image:: $PATH_TO_IMAGES/jaccard-glyph.png + +.. class:: warningmark + +The jaccard tool requires that your data is pre-sorted by chromosome and then by start position (e.g., sort -k1,1 -k2,2n in.bed > in.sorted.bed for BED files). + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linksBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,34 @@ +<tool id="bedtools_links" name="LinksBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + linksBed + -base $basename + -org $org + -db $db + -i $inputA + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param name="basename" type="text" value="http://genome.ucsc.edu" label="The “basename” for the UCSC browser" /> + <param name="org" type="text" value="human" label="The organism (e.g. mouse, human)" /> + <param name="db" type="text" value="hg18" label="The genome build" /> + </inputs> + <outputs> + <data name="output" format="html" /> + </outputs> + <help> + +**What it does** + +Creates an HTML file with links to an instance of the UCSC Genome Browser for all features / intervals in a file. This is useful for cases when one wants to manually inspect through a large set of annotations or features. + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,90 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="2.19.1">bedtools</requirement> + </requirements> + <version_command>bedtools --version</version_command> + </xml> + <token name="@WRAPPER_VERSION@">2.19</token> + <xml name="stdio"> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + </xml> + + <xml name="overlap"> + <param name="overlap" type="float" value="0.000000001" label="Minimum overlap required as a fraction of A" /> + </xml> + <xml name="strand2"> + <param name="strand" type="select" label="Calculation based on strandedness?"> + <option value="" selected="True">Overlaps on either strand</option> + <option value="-s">Only overlaps occurring on the **same** strand.</option> + <option value="-S">Only overlaps occurring on the **opposite** strand.</option> + </param> + </xml> + <xml name="seed"> + <conditional name="seed"> + <param name="choose" type="boolean" label="Choose Seed?" selected="False" truevalue="True" falsevalue="False" /> + <when value="True"> + <param name="seed" type="integer" value="12345" label="Enter Seed" /> + </when> + </conditional> + </xml> + <xml name="split"> + <param name="split" type="boolean" checked="true" truevalue="-split" falsevalue="" + label="Treat split/spliced BAM or BED12 entries as distinct BED intervals when computing coverage." + help="If set, the coverage will be calculated based the spliced intervals only. For BAM files, this inspects the CIGAR N operation to infer the blocks for computing coverage. For BED12 files, this inspects the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12). If this option is not set, coverage will be calculated based on the interval's START/END coordinates, and would include introns in the case of RNAseq data." /> + </xml> + <xml name="genome"> + <param format="bed,vcf,gff,gff3" name="genome" type="data" label="Genome file" /> + </xml> + <xml name="addition"> + <conditional name="addition"> + <param name="addition_select" type="select" label="Choose what you want to do"> + <option value="b" selected="True">Increase the BED/GFF/VCF entry by the same number base pairs in each direction.</option> + <option value="lr">Increase by Start Coordinate and End Coordinate</option> + </param> + <when value="b"> + <param name="b" label="Give Value" type="integer" value="0" /> + </when> + <when value="lr"> + <param name="l" label="The number of base pairs to subtract from the start coordinate" type="integer" value="0" /> + <param name="r" label="The number of base pairs to add to the end coordinate" type="integer" value="0" /> + </when> + </conditional> + </xml> + <xml name="math_options"> + <option value="min">Min - numeric only</option> + <option value="max">Max - numeric only</option> + <option value="mean">Mean - numeric only</option> + <option value="median">Median - numeric only</option> + <option value="mode">Mode - numeric only</option> + <option value="antimode">Antimode - numeric only</option> + <option value="collapse">collapse (i.e., print a comma separated list) - numeric or text</option> + </xml> + <xml name="additional_math_options"> + <option value="count">Count - numeric or text</option> + <option value="count_disctinct">Count Distinct - numeric or text</option> + <option value="distinct">distinct (i.e., print a comma separated list) - numeric or text</option> + <option value="concat">concat (i.e., print a comma separated list) - numeric or text</option> + </xml> + <token name="@REFERENCES@"> +------ + +This tool is part of the `bedtools package`_ from the `Quinlan laboratory`_. + +.. _bedtools package: https://github.com/arq5x/bedtools2 +.. _Quinlan laboratory: http://cphg.virginia.edu/quinlan/ + + </token> + <xml name="citations"> + <citations> + <citation type="doi">10.1093/bioinformatics/btq033</citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/makewindowsBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,79 @@ +<tool id="bedtools_makewindowsbed" name="MakeWindowsBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools makewindows + #if $type.type_select == 'genome': + -g $type.genome + #else: + -i $type.inputA + #end if + #if $action.action_select == 'windowsize': + -w $action.windowsize + #if $action.step_size.step_size_select == 'yes': + -s $action.step_size.step_size + #end if + #else: + -n $action.number + #end if + $sourcename + > $output + </command> + <inputs> + <conditional name="type"> + <param name="type_select" type="select" label="Work with"> + <option value="bed" selected="True">Bed File</option> + <option value="genome">Genome File</option> + </param> + <when value="bed"> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + </when> + <when value="genome"> + <expand macro="genome" /> + </when> + </conditional> + <conditional name="action"> + <param name="action_select" type="select" label="Work with"> + <option value="windowsize" selected="True">Set WindowSize</option> + <option value="number">Give Number of Windows</option> + </param> + <when value="windowsize"> + <param name="windowsize" type="integer" value="1" label="Divide each input interval (either a chromosome or a BED interval) to fixed-sized windows (i.e. same number of nucleotide in each window)." /> + <conditional name="step_size"> + <param name="step_size_select" type="select" label="Specify Step size? i.e. how many base pairs to step before creating a new window. Used to create 'sliding' windows. Defaults to window size (non-sliding windows)."> + <option value="yes">Yes</option> + <option value="no" selected="True">No</option> + </param> + <when value="yes"> + <param name="step_size" type="integer" value="100" label="Specify it" /> + </when> + </conditional> + </when> + <when value="number"> + <param name="number" type="integer" value="1" label="Divide each input interval (either a chromosome or a BED interval) to fixed number of windows (i.e. same number of windows, with varying window sizes)." /> + </when> + </conditional> + <param name="sourcename" type="select" label="ID Naming Options"> + <option value="" selected="True">Default</option> + <option value="-i src">use the source interval's name</option> + <option value="-i winnum">use the window number as the ID (e.g. 1,2,3,4...)</option> + <option value="-i srcwinnum">use the source interval's name with the window number.</option> + </param> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label=""/> + </outputs> + <help> + +**What it does** + +Makes adjacent or sliding windows across a genome or BED file. + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,70 @@ +<tool id="bedtools_map" name="MapBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools map + -a $inputA + -b $inputB + $strand + -o $operation + -c $col + -f $overlap + $reciprocal + $split + $header + #if $genome.genome_choose == "-g" : + -g $genome.genome + #end if + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file A"/> + <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="BED/VCF/GFF file B"/> + <param name="col" type="integer" value="5" label="Specify the column from the B file to map onto intervals in A" /> + <expand macro="overlap" /> + <param name="reciprocal" type="boolean" checked="false" truevalue="-r" falsevalue="" label="Require reciprocal overlap." help="If set, the overlap between the BAM alignment and the BED interval must affect the above fraction of both the alignment and the BED interval." /> + <expand macro="strand2" /> + <param name="operation" type="select" label="Specify the operation"> + <option value="sum">Sum - numeric only</option> + <option value="absmin">AbsMin - numeric only</option> + <option value="absmax">AbsMax - numeric only</option> + <expand macro="math_options" /> + <expand macro="additional_math_options" /> + </param> + <param name="split" type="boolean" checked="true" truevalue="-split" falsevalue="" label="Treat split/spliced BAM or BED12 entries as distinct BED intervals when computing coverage." help="If set, the coverage will be calculated based the spliced intervals only. For BAM files, this inspects the CIGAR N operation to infer the blocks for computing coverage. For BED12 files, this inspects the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12). If this option is not set, coverage will be calculated based on the interval's START/END coordinates, and would include introns in the case of RNAseq data." /> + <param name="header" type="boolean" checked="false" truevalue="-header" falsevalue="" label="Print the header from the A file prior to results." /> + <conditional name="genome"> + <param name="genome_choose" type="boolean" checked="false" truevalue="-g" falsevalue="" label="Treat split/spliced BAM or BED12 entries as distinct BED intervals when computing coverage." help="" /> + <when value="-g"> + <expand macro="genome" /> + </when> + </conditional> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label="Mapping of ${inputB.name} into ${inputA.name}"/> + </outputs> + <help> + +**What it does** + +bedtools map allows one to map overlapping features in a B file onto features in an A file and apply statistics and/or summary operations on those features. + +.. image:: $PATH_TO_IMAGES/map-glyph.png + +.. class:: infomark + +bedtools map requires each input file to be sorted by genome coordinate. For BED files, this can be done with sort -k1,1 -k2,2n. Other sorting criteria are allowed if a genome file (-g) is provides that specifies the expected chromosome order. + +.. class:: infomark + +The map tool is substantially faster in versions 2.19.0 and later. The plot below demonstrates the increased speed when, for example, counting the number of exome alignments that align to each exon. The bedtools times are compared to the bedops bedmap utility as a point of reference. + +@REFERENCES@ + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/maskFastaBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,37 @@ +<tool id="bedtools_maskfastabed" name="MaskFastaBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools maskfasta + $soft + -mc $mc + -fi $fasta + -bed $inputA + -fo $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param format="fasta" name="fasta" type="data" label="Fasta file"/> + + <param name="soft" type="boolean" checked="false" truevalue="-name" falsevalue="" label="Soft-mask (that is, convert to lower-case bases) the FASTA sequence. By default, hard-masking (that is, conversion to Ns) is performed" /> + <param name="mc" type="text" value="N" length="1" falsevalue="" label="Replace masking character. That is, instead of masking with Ns, use another character." /> + </inputs> + <outputs> + <data format="fasta" name="output" /> + </outputs> + <help> + +**What it does** + +bedtools maskfasta masks sequences in a FASTA file based on intervals defined in a feature file. The headers in the input FASTA file must exactly match the chromosome column in the feature file. This may be useful fro creating your own masked genome file based on custom annotations or for masking all but your target regions when aligning sequence data from a targeted capture experiment. + +.. image:: $PATH_TO_IMAGES/maskfasta-glyph.png + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mergeBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,204 @@ +<tool id="bedtools_mergebed" name="Merge BED files" version="@WRAPPER_VERSION@.0"> + <description>(mergeBed)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + mergeBed + -i $input + $strandedness + $report_number + -d $distance + $nms + #if str($scores) != 'none' + -scores $scores + #end if + > $output + </command> + <inputs> + <param name="input" format="bed,gff,vcf" type="data" label="Sort the following BED/VCF/GFF file"/> + <param name="strandedness" type="boolean" label="Force strandedness." truevalue="-s" falsevalue="" checked="false" + help="That is, only merge features that are the same strand."/> + <param name="report_number" type="boolean" label="Report the number of BED entries that were merged." truevalue="-n" falsevalue="" checked="false" + help="1 is reported if no merging occurred."/> + <param name="nms" type="boolean" label="Report the names of the merged features separated by commas." truevalue="-nms" falsevalue="" checked="false" + help="1 is reported if no merging occurred."/> + + <param name="distance" type="integer" value="0" label="Maximum distance between features allowed for features to be merged." + help="That is, overlapping and/or book-ended features are merged."/> + <param name="scores" type="select" label="Report the scores of the merged features as"> + <option value="none" selected="True">Do not report at all</option> + <option value="sum">Sum</option> + <expand macro="math_options" /> + </param> + </inputs> + <outputs> + <data format="bed" name="output" metadata_source="input" label="Merged ${input.name}"/> + </outputs> + <tests> + <test> + <param name="input" value="0.bed" ftype="bed" /> + <output name="output" file="0_result.bed" ftype="bed" /> + </test> + <test> + <param name="input" value="1.bed" ftype="bed" /> + <param name="strandedness" value="-s" /> + <output name="output" file="1_result.bed" ftype="bed" /> + </test> + <test> + <param name="input" value="2.bed" ftype="bed" /> + <param name="report_number" value="-n" /> + <output name="output" file="2_result.bed" ftype="bed" /> + </test> + <test> + <param name="input" value="3.bed" ftype="bed" /> + <param name="distance" value="1000" /> + <output name="output" file="3_result.bed" ftype="bed" /> + </test> + </tests> + <help> + +**What it does** + +bedtools merge combines overlapping or "book-ended" features in an interval file into a single feature which spans all of the combined features. + + +.. image:: $PATH_TO_IMAGES/merge-glyph.png + + +.. class:: warningmark + +bedtools merge requires that you presort your data by chromosome and then by start position. + + +========================================================================== +Default behavior +========================================================================== +By default, ``bedtools merge`` combines overlapping (by at least 1 bp) and/or +bookended intervals into a single, "flattened" or "merged" interval. + +:: + + $ cat A.bed + chr1 100 200 + chr1 180 250 + chr1 250 500 + chr1 501 1000 + + $ bedtools merge -i A.bed + chr1 100 500 + chr1 501 1000 + + +========================================================================== +*-s* Enforcing "strandedness" +========================================================================== +The ``-s`` option will only merge intervals that are overlapping/bookended +*and* are on the same strand. + +:: + + $ cat A.bed + chr1 100 200 a1 1 + + chr1 180 250 a2 2 + + chr1 250 500 a3 3 - + chr1 501 1000 a4 4 + + + $ bedtools merge -i A.bed -s + chr1 100 250 + + chr1 501 1000 + + chr1 250 500 - + + +========================================================================== +*-n* Reporting the number of features that were merged +========================================================================== +The -n option will report the number of features that were combined from the +original file in order to make the newly merged feature. If a feature in the +original file was not merged with any other features, a "1" is reported. + +:: + + $ cat A.bed + chr1 100 200 + chr1 180 250 + chr1 250 500 + chr1 501 1000 + + $ bedtools merge -i A.bed -n + chr1 100 500 3 + chr1 501 1000 1 + + +========================================================================== +*-d* Controlling how close two features must be in order to merge +========================================================================== +By default, only overlapping or book-ended features are combined into a new +feature. However, one can force ``merge`` to combine more distant features +with the ``-d`` option. For example, were one to set ``-d`` to 1000, any +features that overlap or are within 1000 base pairs of one another will be +combined. + +:: + + $ cat A.bed + chr1 100 200 + chr1 501 1000 + + $ bedtools merge -i A.bed + chr1 100 200 + chr1 501 1000 + + $ bedtools merge -i A.bed -d 1000 + chr1 100 200 1000 + + +============================================================= +*-nms* Reporting the names of the features that were merged +============================================================= +Occasionally, one might like to know that names of the features that were +merged into a new feature. The ``-nms`` option will add an extra column to the +``merge`` output which lists (separated by semicolons) the names of the +merged features. + +:: + + $ cat A.bed + chr1 100 200 A1 + chr1 150 300 A2 + chr1 250 500 A3 + + $ bedtools merge -i A.bed -nms + chr1 100 500 A1,A2,A3 + + +=============================================================== +*-scores* Reporting the scores of the features that were merged +=============================================================== +Similarly, we might like to know that scores of the features that were +merged into a new feature. Enter the ``-scores`` option. One can specify +how the scores from each overlapping interval should be reported. + +:: + + $ cat A.bed + chr1 100 200 A1 1 + chr1 150 300 A2 2 + chr1 250 500 A3 3 + + $ bedtools merge -i A.bed -scores mean + chr1 100 500 2 + + $ bedtools merge -i A.bed -scores max + chr1 100 500 3 + + $ bedtools merge -i A.bed -scores collapse + chr1 100 500 1,2,3 + + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multiCov.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,58 @@ +<tool id="bedtools_multicovtbed" name="MultiCovBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools multicov + -bed $input1 + -bam + #for $bam in $bams: + $bam.input + #end for + $strand + -f $overlap + $reciprocal + $split + -q $mapq + $duplicate + $failed + $proper + > $output + </command> + <inputs> + <param name="input1" format="bed" type="data" label="First sorted BED file" /> + <!-- Additional files, if the user needs more --> + <repeat name="bams" title="Add BAM files" > + <param name="input" format="bam" type="data" label="BAM file" /> + </repeat> + <expand macro="strand2" /> + <expand macro="overlap" /> + <param name="reciprocal" type="boolean" checked="false" truevalue="-r" falsevalue="" label="Require that the fraction overlap be reciprocal for A and B. In other words, if -f is 0.90 and -r is used, this requires that B overlap 90% of A and A _also_ overlaps 90% of B." /> + <expand macro="split" /> + + <param name="duplicate" type="boolean" checked="false" truevalue="-D" falsevalue="" label="Include duplicate reads. Default counts non-duplicates only" /> + <param name="failed" type="boolean" checked="false" truevalue="-F" falsevalue="" label=" Include failed-QC reads. Default counts pass-QC reads only" /> + <param name="proper" type="boolean" checked="false" truevalue="-p" falsevalue="" label="Only count proper pairs. Default counts all alignments with MAPQ > -q argument, regardless of the BAM FLAG field." /> + </inputs> + + <outputs> + <data format="bed" name="output" label="" /> + </outputs> + <help> + +**What it does** + +bedtools multicov, reports the count of alignments from multiple position-sorted and indexed BAM files that overlap intervals in a BED file. Specifically, for each BED interval provided, it reports a separate count of overlapping alignments from each BAM file. + +.. class:: infomark + +bedtools multicov depends upon index BAM files in order to count the number of overlaps in each BAM file. As such, each BAM file should be position sorted (samtool sort aln.bam aln.sort) and indexed (samtools index aln.sort.bam) with either samtools or bamtools. + +@REFERENCES@ + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multiIntersectBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,196 @@ +<tool id="bedtools_multiintersectbed" name="Intersect multiple sorted BED files" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + multiIntersectBed + $header + #if $zero.value == True: + -empty + -g ${chromInfo} + #end if + + -i '$input1' + '$input2' + #for $q in $beds + '${q.input}' + #end for + + -names + #if $name1.choice == "tag": + '${input1.name}' + #else + '${name1.custom_name}' + #end if + + #if $name2.choice == "tag": + '${input2.name}' + #else + '${name2.custom_name}' + #end if + + #for $q in $beds + #if $q.name.choice == "tag": + '${q.input.name}' + #else + '${q.input.custom_name}' + #end if + #end for + > '$output' + </command> + + <inputs> + <!-- Make it easy for the user, first two input files are always shown --> + <!-- INPUT 1 --> + <param name="input1" format="bed" type="data" label="First sorted BED file" /> + + <conditional name="name1"> + <param name="choice" type="select" label="Sample name"> + <option value="tag" selected="true">Use input's tag</option> + <option value="custom">Enter custom table name</option> + </param> + <when value="tag"> + </when> + <when value="custom"> + <param name="custom_name" type="text" area="false" label="Custom sample name"/> + </when> + </conditional> + + <!-- INPUT 2 --> + <param name="input2" format="bed" type="data" label="Second sorted BED file" /> + + <conditional name="name2"> + <param name="choice" type="select" label="Sample name"> + <option value="tag" selected="true">Use input's tag</option> + <option value="custom">Enter custom table name</option> + </param> + <when value="tag"> + </when> + <when value="custom"> + <param name="custom_name" type="text" area="false" label="Custom sample name"/> + </when> + </conditional> + + <!-- Additional files, if the user needs more --> + <repeat name="beds" title="Add'l sorted BED files" > + <param name="input" format="bed" type="data" label="BED file" /> + + <conditional name="name"> + <param name="choice" type="select" label="Sample name"> + <option value="tag" selected="true">Use input's tag</option> + <option value="custom">Enter custom table name</option> + </param> + <when value="tag"> + </when> + <when value="custom"> + <param name="custom_name" type="text" area="false" label="Custom sample name"/> + </when> + </conditional> + </repeat> + + <param name="header" type="boolean" checked="true" truevalue="-header" falsevalue="" label="Print header line" help="The first line will include the name of each sample." /> + + <param name="zero" type="boolean" checked="true" label="Report regions that are not covered by any of the files" help="If set, regions that are not overlapped by any file will also be reported. Requires a valid organism key for all input datasets" /> + + </inputs> + + <outputs> + <data format="tabular" name="output" metadata_source="input1" label="Common intervals identified from among ${input1.name}, ${input2.name} and so on." /> + </outputs> + <help> + +**What it does** + +This tool identifies common intervals among multiple, sorted BED files. Intervals can be common among 0 to N of the N input BED files. The pictorial and raw data examples below illustrate the behavior of this tool more clearly. + + +.. image:: http://people.virginia.edu/~arq5x/files/bedtools-galaxy/mbi.png + + +.. class:: warningmark + +This tool requires that each BED file is reference-sorted (chrom, then start). + + +.. class:: infomark + +The output file will contain five fixed columns, plus additional columns for each BED file: + + * 1. Chromosome name (or 'genome' for whole-genome coverage). + * 2. The zero-based start position of the interval. + * 3. The one-based end position of the interval. + * 4. The number of input files that had at least one feature overlapping this interval. + * 5. A list of input files or labels that had at least one feature overlapping this interval. + * 6. For each input file, an indication (1 = Yes, 0 = No) of whether or not the file had at least one feature overlapping this interval. + +------ + +**Example input**:: + + # a.bed + chr1 6 12 + chr1 10 20 + chr1 22 27 + chr1 24 30 + + # b.bed + chr1 12 32 + chr1 14 30 + + # c.bed + chr1 8 15 + chr1 10 14 + chr1 32 34 + + +------ + +**Example without a header and without reporting intervals with zero coverage**:: + + + chr1 6 8 1 1 1 0 0 + chr1 8 12 2 1,3 1 0 1 + chr1 12 15 3 1,2,3 1 1 1 + chr1 15 20 2 1,2 1 1 0 + chr1 20 22 1 2 0 1 0 + chr1 22 30 2 1,2 1 1 0 + chr1 30 32 1 2 0 1 0 + chr1 32 34 1 3 0 0 1 + + +**Example adding a header line**:: + + + chrom start end num list a.bed b.bed c.bed + chr1 6 8 1 1 1 0 0 + chr1 8 12 2 1,3 1 0 1 + chr1 12 15 3 1,2,3 1 1 1 + chr1 15 20 2 1,2 1 1 0 + chr1 20 22 1 2 0 1 0 + chr1 22 30 2 1,2 1 1 0 + chr1 30 32 1 2 0 1 0 + chr1 32 34 1 3 0 0 1 + + +**Example adding a header line and custom file labels**:: + + + chrom start end num list joe bob sue + chr1 6 8 1 joe 1 0 0 + chr1 8 12 2 joe,sue 1 0 1 + chr1 12 15 3 joe,bob,sue 1 1 1 + chr1 15 20 2 joe,bob 1 1 0 + chr1 20 22 1 bob 0 1 0 + chr1 22 30 2 joe,bob 1 1 0 + chr1 30 32 1 bob 0 1 0 + chr1 32 34 1 sue 0 0 1 + + +@REFERENCES@ + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nucBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,39 @@ +<tool id="bedtools_nucbed" name="NucBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools nuc + $strand + $seq + $pattern + $case + -fi $fasta + -bed $inputA + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param format="fasta" name="fasta" type="data" label="Fasta file"/> + + <param name="strand" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Profile the sequence according to strand." /> + <param name="seq" type="boolean" checked="false" truevalue="-seq" falsevalue="" label="Print the extracted sequence." /> + <param name="pattern" type="boolean" checked="false" truevalue="-pattern" falsevalue="" label="Report the number of times a user-defined sequence is observed (case-sensitive)." /> + <param name="case" type="boolean" checked="false" truevalue="-C" falsevalue="" label="Igore case when matching -pattern." /> + </inputs> + <outputs> + <data format="fasta" name="output" /> + </outputs> + <help> + +**What it does** + +Profiles the nucleotide content of intervals in a fasta file. + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/overlapBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,30 @@ +<tool id="bedtools_overlapbed" name="OverlapBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + overlap + -i $inputA + -cols $cols + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param name="cols" type="text" value="" area="True" size="20x10" label="Specify the columns (1-based) for the starts and ends of the features for which you’d like to compute the overlap/distance. The columns must be listed in the following order: start1,end1,start2,end2" /> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label="Overlap of ${inputA.name}"/> + </outputs> + <help> + +**What it does** + +overlap computes the amount of overlap (in the case of positive values) or distance (in the case of negative values) between feature coordinates occurring on the same input line and reports the result at the end of the same line. In this way, it is a useful method for computing custom overlap scores from the output of other BEDTools. + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/randomBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,35 @@ +<tool id="bedtools_randombed" name="RandomBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools random + -g $genome + -l $length + -n $intervals + #if $seed.choose: + -seed $seed.seed + #end if + </command> + <inputs> + <expand macro="genome" /> + <param name="length" type="integer" value="100" label="The length of the intervals to generate." /> + <param name="intervals" type="integer" value="1000000" label="The number of intervals to generate." /> + <expand macro="seed" /> + </inputs> + <outputs> + <data format="bed" name="output" /> + </outputs> + <help> + +**What it does** + +bedtools random will generate a random set of intervals in BED6 format. One can specify both the number (-n) and the size (-l) of the intervals that should be generated. + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reldist.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,36 @@ +<tool id="bedtools_reldistbed" name="ReldistBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools reldist + -a $inputA + -b $inputB + $detail + </command> + <inputs> + <param format="bed,bam,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF/BAM file"/> + <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="BED/VCF/GFF file"/> + <param name="detail" type="boolean" checked="false" truevalue="-detail" falsevalue="" label="Instead of a summary, report the relative distance for each interval in A" help="" /> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label="Intersection of ${inputA.name} and ${inputB.name}"/> + </outputs> + <help> + +**What it does** + +Traditional approaches to summarizing the similarity between two sets of genomic intervals are based upon the number or proportion of intersecting intervals. However, such measures are largely blind to spatial correlations between the two sets where, dpesite consistent spacing or proximity, intersections are rare (for example, enhancers and transcription start sites rarely overlap, yet they are much closer to one another than two sets of random intervals). Favorov et al [1] proposed a relative distance metric that describes distribution of relative distances between each interval in one set nd the two closest intervals in another set (see figure above). If there is no spatial correlation between the two sets, one would expect the relative distances to be uniformaly distributed among the relative distances ranging from 0 to 0.5. If, however, the intervals tend to be much closer than expected by chance, the distribution of observed relative distances would be shifted towards low relative distance values (e.g., the figure below). + +.. image:: $PATH_TO_IMAGES/reldist-glyph.png + +.. class:: infomark + +@REFERENCES@ + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shuffleBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,68 @@ +<tool id="bedtools_shufflebed" name="ShuffleBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools shuffle + -g $genome + -i $inputA + $bedpe + #if $seed.choose: + -seed $seed.seed + #end if + #if $excl.choose: + -excl $excl.excl + -f $overlap + #end if + #if $incl.choose: + -incl $incl.incl + #end if + $chrom + $chromfirst + $nooverlap + $allowBeyond + -maxTries $maxtries + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param name="bedpe" type="boolean" label="The file is in BEDPE format" selected="False" truevalue="-bedpe" falsevalue="" /> + <expand macro="genome" /> + <param name="chrom" type="boolean" label="Keep features in the input file on the same chromosome. Solely permute their location on the chromosome. By default, both the chromosome and position are randomly chosen" selected="False" truevalue="-chrom" falsevalue="" /> + <expand macro="seed" /> + <conditional name="excl"> + <param name="choose" type="boolean" label="Choose a BED file of coordinates in which features from -i should not be placed?" selected="False" truevalue="True" falsevalue="False" /> + <when value="True"> + <param name="excl" type="data" format="bed" label="Choose File" /> + <expand macro="overlap" /> + </when> + </conditional> + <conditional name="incl"> + <param name="choose" type="boolean" label="Choose a BED file of coordinates in which features from -i should be placed?" selected="False" truevalue="True" falsevalue="False" /> + <when value="True"> + <param name="incl" type="data" format="bed" label="Choose File" /> + </when> + </conditional> + + <param name="chromfirst" type="boolean" selected="False" truevalue="-chromFirst" falsevalue="" + label="Instead of choosing a position randomly among the entire genome (the default), first choose a chrom randomly, and then choose a random start coordinate on that chrom. This leads to features being ~uniformly distributed among the chroms, as opposed to features being distribute as a function of chrom size" /> + <param name="maxtries" type="integer" value="1000" label="Max. number of attempts to find a home for a shuffled interval in the presence of -incl or -excl" /> + <param name="nooverlap" type="boolean" selected="False" truevalue="-noOverlapping" falsevalue="" label="Don’t allow shuffled intervals to overlap" /> + <param name="allowBeyond" type="boolean" selected="False" truevalue="-allowBeyondChromEnd" falsevalue="" label="Allow the original the length of the original records to extebd beyond the length of the chromosome." /> + </inputs> + <outputs> + <data format="bed" name="output" /> + </outputs> + <help> + +**What it does** + +bedtools shuffle will randomly permute the genomic locations of a feature file among a genome defined in a genome file. One can also provide an “exclusions” BED/GFF/VCF file that lists regions where you do not want the permuted features to be placed. For example, one might want to prevent features from being placed in known genome gaps. shuffle is useful as a null basis against which to test the significance of associations of one feature with another. +.. image:: $PATH_TO_IMAGES/shuffle-glyph.png +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slopBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,49 @@ +<tool id="bedtools_slopbed" name="SlopBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools slop + $pct + $strand + -g $genome + -i $inputA + #if $addition.addition_select == 'b': + -b $addition.b + #else: + -l $addition.l + -r $addition.r + #end if + $header + + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <expand macro="genome" /> + <param name="pct" type="boolean" checked="false" truevalue="-pct" falsevalue="" label="Define -l and -r as a fraction of the feature’s length" help="E.g. if used on a 1000bp feature, -l 0.50, will add 500 bp “upstream”" /> + <param name="strand" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Define -l and -r based on strand" help="For example. if used, -l 500 for a negative-stranded feature, it will add 500 bp to the end coordinate" /> + <expand macro="addition" /> + <param name="header" type="boolean" checked="false" truevalue="-header" falsevalue="" label="Print the header from the input file prior to results." /> + </inputs> + <outputs> + <data format="bed" name="output" label=""/> + </outputs> + <help> + +**What it does** + +bedtools slop will increase the size of each feature in a feature file by a user-defined number of bases. While something like this could be done with an awk '{OFS="\t" print $1,$2-<slop>,$3+<slop>}', bedtools slop will restrict the resizing to the size of the chromosome (i.e. no start < 0 and no end > chromosome size). + +.. image:: $PATH_TO_IMAGES/slop-glyph.png + +.. class:: warningmark + +In order to prevent the extension of intervals beyond chromosome boundaries, bedtools slop requires a genome file defining the length of each chromosome or contig. +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sortBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,46 @@ +<tool id="bedtools_sortbed" name="Sort BED files" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + sortBed -i $input $option > $output + </command> + <inputs> + <param format="bed" name="input" type="data" label="Sort the following BED file"/> + <param name="option" type="select" label="Sort by"> + <!-- sort -k 1,1 -k2,2 -n a.bed --> + <option value="">chromosome, then by start position (asc)</option> + <option value="-sizeA">feature size in ascending order.</option> + <option value="-sizeD">feature size in descending order.</option> + <option value="-chrThenSizeA">chromosome, then by feature size (asc).</option> + <option value="-chrThenSizeD">chromosome, then by feature size (desc).</option> + <option value="-chrThenScoreA">chromosome, then by score (asc).</option> + <option value="-chrThenScoreD">chromosome, then by score (desc).</option> + </param> + </inputs> + + <outputs> + <data format="bed" name="output" metadata_source="input" label="${input.name} (as BED)"/> + </outputs> + + <help> + +**What it does** + +Sorts a feature file by chromosome and other criteria. + + +.. class:: warningmark + +It should be noted that sortBed is merely a convenience utility, as the UNIX sort utility +will sort BED files more quickly while using less memory. For example, UNIX sort will sort a BED file +by chromosome then by start position in the following manner: sort -k 1,1 -k2,2 -n a.bed + +@REFERENCES@ + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/subtractBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,49 @@ +<tool id="bedtools_subtractbed" name="SubtractBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools subtract + $strand + -a $inputA + -b $inputB + -f $overlap + $removeIfOverlap + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="BED/VCF/GFF file"/> + <expand macro="strand2" /> + <expand macro="overlap" /> + + <param name="strand" type="select" label="Calculation based on strandedness?"> + <option value="" selected="True">Overlaps on either strand</option> + <option value="-s">Only overlaps occurring on the **same** strand.</option> + <option value="-S">Only overlaps occurring on the **opposite** strand.</option> + </param> + + <param name="removeIfOverlap" type="select" label="Calculation based on strandedness?"> + <option value="" selected="True">Dont Remove entire feature on overlap</option> + <option value="-A">Remove entire feature if any overlap. That is, by default, only subtract the portion of A that overlaps B. Here, if any overlap is found (or -f amount), the entire feature is removed.</option> + <option value="-N">Same as -A except when used with -f, the amount is the sum of all features (not any single feature).</option> + </param> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label=""/> + </outputs> + <help> + +**What it does** + +bedtools subtract searches for features in B that overlap A. If an overlapping feature is found in B, the overlapping portion is removed from A and the remaining portion of A is reported. If a feature in B overlaps all of a feature in A, the A feature will not be reported. + +.. image:: $PATH_TO_IMAGES/subtract-glyph.png + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tagBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,51 @@ +<tool id="bedtools_tagbed" name="TagBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools tag + -i $inputA + -files + #for $bed in beds: + $bed.input + #end for + -f $overlap + $strand + -tag $tag + $field + > $output + </command> + <inputs> + <param format="bam" name="inputA" type="data" label="BAM file"/> + + <repeat name="beds" title="Add files" > + <param name="input" format="bed,gff,vcf" type="data" label="BED/VCF/GFF file" /> + </repeat> + <expand macro="strand2" /> + <expand macro="overlap" /> + + <param name="tag" type="text" value="YB" label="Dictate what the tag should be." /> + <param name="field" type="select" label="Use which field from the annotation files to populate tags?"> + <option value="-labels" selected="True">labels</option> + <option value="-scores">Scores</option> + <option value="-names">Names</option> + <option value="-labels -intervals">Intervals</option> + </param> + </inputs> + <outputs> + <data format="bed" name="output" label="" /> + </outputs> + <help> + +**What it does** + +Annotates a BAM file based on overlaps with multiple BED/GFF/VCF files on the intervals in an input bam file + +@REFERENCES@ + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/0.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,4 @@ +chr1 100 200 +chr1 180 250 +chr1 250 500 +chr1 501 1000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/0_result.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,2 @@ +chr1 100 500 +chr1 501 1000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,4 @@ +chr1 100 200 a1 1 + +chr1 180 250 a2 2 + +chr1 250 500 a3 3 - +chr1 501 1000 a4 4 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1_result.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,3 @@ +chr1 100 250 + +chr1 501 1000 + +chr1 250 500 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,4 @@ +chr1 100 200 +chr1 180 250 +chr1 250 500 +chr1 501 1000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2_result.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,2 @@ +chr1 100 500 3 +chr1 501 1000 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,2 @@ +chr1 100 200 +chr1 501 1000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3_result_1000.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,1 @@ +chr1 100 200 1000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/A.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,5 @@ +chr1 100 200 +chr1 180 250 +chr1 250 500 +chr1 501 1000 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotateBed1.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,4 @@ +chr1 100 200 nasty 1 - +chr2 500 1000 ugly 2 + +chr3 1000 5000 big 3 - +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotateBed2.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,3 @@ +chr1 150 200 geneA 1 + +chr1 175 250 geneB 2 + +chr3 0 10000 geneC 3 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotateBed3.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,3 @@ +chr1 0 10000 cons1 1 + +chr2 700 10000 cons2 2 - +chr3 4000 10000 cons3 3 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotateBed4.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,4 @@ +chr1 0 120 known1 - +chr1 150 160 known2 - +chr2 0 10000 known3 + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/closestBedA.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,1 @@ +chr1 100 200
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/closestBedB.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,2 @@ +chr1 500 1000 +chr1 1300 2000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/expandInput.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,2 @@ +chr1 10 20 1,2,3 10,20,30 +chr1 40 50 4,5,6 40,50,60
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/groupbyBed.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,14 @@ +chr21 9719758 9729320 variant1 chr21 9719768 9721892 ALR/Alpha 1004 + +chr21 9719758 9729320 variant1 chr21 9721905 9725582 ALR/Alpha 1010 + +chr21 9719758 9729320 variant1 chr21 9725582 9725977 L1PA3 3288 + +chr21 9719758 9729320 variant1 chr21 9726021 9729309 ALR/Alpha 1051 + +chr21 9729310 9757478 variant2 chr21 9729320 9729809 L1PA3 3897 - +chr21 9729310 9757478 variant2 chr21 9729809 9730866 L1P1 8367 + +chr21 9729310 9757478 variant2 chr21 9730866 9734026 ALR/Alpha 1036 - +chr21 9729310 9757478 variant2 chr21 9734037 9757471 ALR/Alpha 1182 - +chr21 9795588 9796685 variant3 chr21 9795589 9795713 (GAATG)n 308 + +chr21 9795588 9796685 variant3 chr21 9795736 9795894 (GAATG)n 683 + +chr21 9795588 9796685 variant3 chr21 9795911 9796007 (GAATG)n 345 + +chr21 9795588 9796685 variant3 chr21 9796028 9796187 (GAATG)n 756 + +chr21 9795588 9796685 variant3 chr21 9796202 9796615 (GAATG)n 891 + +chr21 9795588 9796685 variant3 chr21 9796637 9796824 (GAATG)n 621 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/groupbyinput.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,14 @@ +chr21 9719758 9729320 variant1 chr21 9719768 9721892 ALR/Alpha 1004 + +chr21 9719758 9729320 variant1 chr21 9721905 9725582 ALR/Alpha 1010 + +chr21 9719758 9729320 variant1 chr21 9725582 9725977 L1PA3 3288 + +chr21 9719758 9729320 variant1 chr21 9726021 9729309 ALR/Alpha 1051 + +chr21 9729310 9757478 variant2 chr21 9729320 9729809 L1PA3 3897 - +chr21 9729310 9757478 variant2 chr21 9729809 9730866 L1P1 8367 + +chr21 9729310 9757478 variant2 chr21 9730866 9734026 ALR/Alpha 1036 - +chr21 9729310 9757478 variant2 chr21 9734037 9757471 ALR/Alpha 1182 - +chr21 9795588 9796685 variant3 chr21 9795589 9795713 (GAATG)n 308 + +chr21 9795588 9796685 variant3 chr21 9795736 9795894 (GAATG)n 683 + +chr21 9795588 9796685 variant3 chr21 9795911 9796007 (GAATG)n 345 + +chr21 9795588 9796685 variant3 chr21 9796028 9796187 (GAATG)n 756 + +chr21 9795588 9796685 variant3 chr21 9796202 9796615 (GAATG)n 891 + +chr21 9795588 9796685 variant3 chr21 9796637 9796824 (GAATG)n 621 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mapBedA.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,4 @@ +chr1 10 20 a1 1 + +chr1 50 60 a2 2 - +chr1 80 90 a3 3 - +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mapBedB.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,5 @@ +chr1 12 14 b1 2 + +chr1 13 15 b2 5 - +chr1 16 18 b3 5 + +chr1 82 85 b4 2 - +chr1 85 87 b5 3 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mygenome.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,2 @@ +chr1 1000 +chr2 800
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/shuffleBedA.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,2 @@ +chr1 0 100 a1 1 + +chr1 0 1000 a2 2 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/shuffleBedGenome.genome Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,5 @@ +chr1 10000 +chr2 8000 +chr3 5000 +chr4 2000 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/t Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,4 @@ +chr1 100 200 nasty 1 - +chr2 500 1000 ugly 2 + +chr3 1000 5000 big 3 - +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/windowBedA.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,1 @@ +chr1 100 200
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/windowBedB.bed Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,2 @@ +chr1 500 1000 +chr1 1300 2000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="bedtools" version="2.19.1"> + <repository changeset_revision="fb3a854c7104" name="package_bedtools_2_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unionBedGraphs.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,232 @@ +<tool id="bedtools_mergebedgraph" name="Merge BedGraph files" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command>unionBedGraphs + $header + -filler '$filler' + #if $zero.value == True: + -empty + -g ${chromInfo} + #end if + + -i '$input1' + '$input2' + #for $q in $bedgraphs + '${q.input}' + #end for + + -names + #if $name1.choice == "tag": + '${input1.name}' + #else + '${name1.custom_name}' + #end if + + #if $name2.choice == "tag": + '${input2.name}' + #else + '${name2.custom_name}' + #end if + + #for $q in $bedgraphs + #if $q.name.choice == "tag": + '${q.input.name}' + #else + '${q.input.custom_name}' + #end if + #end for + > '$output' + </command> + <inputs> + <!-- Make it easy for the user, first two input files are always shown --> + <!-- INPUT 1 --> + <param name="input1" format="bedgraph" type="data" label="First BedGraph file" /> + + <conditional name="name1"> + <param name="choice" type="select" label="Sample name"> + <option value="tag" selected="true">Use input's tag</option> + <option value="custom">Enter custom table name</option> + </param> + <when value="tag"> + </when> + <when value="custom"> + <param name="custom_name" type="text" area="false" label="Custom sample name"/> + </when> + </conditional> + + <!-- INPUT 2 --> + <param name="input2" format="bedgraph" type="data" label="Second BedGraph file" /> + + <conditional name="name2"> + <param name="choice" type="select" label="Sample name"> + <option value="tag" selected="true">Use input's tag</option> + <option value="custom">Enter custom table name</option> + </param> + <when value="tag"> + </when> + <when value="custom"> + <param name="custom_name" type="text" area="false" label="Custom sample name"/> + </when> + </conditional> + + <!-- Additional files, if the user needs more --> + <repeat name="bedgraphs" title="Add'l BedGraph files" > + <param name="input" format="bedgraph" type="data" label="BedGraph file" /> + <conditional name="name"> + <param name="choice" type="select" label="Sample name"> + <option value="tag" selected="true">Use input's tag</option> + <option value="custom">Enter custom table name</option> + </param> + <when value="tag"> + </when> + <when value="custom"> + <param name="custom_name" type="text" area="false" label="Custom sample name"/> + </when> + </conditional> + </repeat> + + <param name="header" type="boolean" checked="true" truevalue="-header" falsevalue="" label="Print header line" help="The first line will include the name of each sample." /> + + <param name="zero" type="boolean" checked="true" label="Report regions with zero coverage" help="If set, regions without any coverage will also be reported. Requires a valid organism key for all input datasets" /> + + <param name="filler" type="text" value="0" label="Text to use for no-coverage value" help="Can be 0.0, N/A, - or any other value." /> + </inputs> + + <outputs> + <data format="tabular" name="output" metadata_source="input1" label="Merged BedGraphs of ${input1.name}, ${input2.name} and so on." /> + </outputs> + <help> + +**What it does** + +This tool merges multiple BedGraph files, allowing direct and fine-scale coverage comparisons among many samples/files. The BedGraph files need not represent the same intervals; the tool will identify both common and file-specific intervals. In addition, the BedGraph values need not be numeric: one can use any text as the BedGraph value and the tool will compare the values from multiple files. + +.. image:: http://people.virginia.edu/~arq5x/files/bedtools-galaxy/ubg.png + + +.. class:: warningmark + +This tool requires that each BedGraph file is reference-sorted (chrom, then start) and contains non-overlapping intervals (within a given file). + + +------ + +**Example input**:: + + # 1.bedgraph + chr1 1000 1500 10 + chr1 2000 2100 20 + + # 2.bedgraph + chr1 900 1600 60 + chr1 1700 2050 50 + + # 3.bedgraph + chr1 1980 2070 80 + chr1 2090 2100 20 + + +------ + +**Examples using the Zero Coverage checkbox** + +Output example (*without* checking "Report regions with zero coverage"):: + + chr1 900 1000 0 60 0 + chr1 1000 1500 10 60 0 + chr1 1500 1600 0 60 0 + chr1 1700 1980 0 50 0 + chr1 1980 2000 0 50 80 + chr1 2000 2050 20 50 80 + chr1 2050 2070 20 0 80 + chr1 2070 2090 20 0 0 + chr1 2090 2100 20 0 20 + + +Output example (*with* checking "Report regions with zero coverage"). The lines marked with (*) are not covered in any input file, but are still reported (The asterisk marking does not appear in the file).:: + + chr1 0 900 0 0 0 (*) + chr1 900 1000 0 60 0 + chr1 1000 1500 10 60 0 + chr1 1500 1600 0 60 0 + chr1 1600 1700 0 0 0 (*) + chr1 1700 1980 0 50 0 + chr1 1980 2000 0 50 80 + chr1 2000 2050 20 50 80 + chr1 2050 2070 20 0 80 + chr1 2070 2090 20 0 0 + chr1 2090 2100 20 0 20 + chr1 2100 247249719 0 0 0 (*) + + +------ + +**Examples adjusting the "Filler value" for no-covered intervals** + +The default value is '0', but you can use any other value. + +Output example with **filler = N/A**:: + + chr1 900 1000 N/A 60 N/A + chr1 1000 1500 10 60 N/A + chr1 1500 1600 N/A 60 N/A + chr1 1600 1700 N/A N/A N/A + chr1 1700 1980 N/A 50 N/A + chr1 1980 2000 N/A 50 80 + chr1 2000 2050 20 50 80 + chr1 2050 2070 20 N/A 80 + chr1 2070 2090 20 N/A N/A + chr1 2090 2100 20 N/A 20 + + +------ + +**Examples using the "sample name" labels**:: + + chrom start end WT-1 WT-2 KO-1 + chr1 900 1000 N/A 60 N/A + chr1 1000 1500 10 60 N/A + chr1 1500 1600 N/A 60 N/A + chr1 1600 1700 N/A N/A N/A + chr1 1700 1980 N/A 50 N/A + chr1 1980 2000 N/A 50 80 + chr1 2000 2050 20 50 80 + chr1 2050 2070 20 N/A 80 + chr1 2070 2090 20 N/A N/A + chr1 2090 2100 20 N/A 20 + + +------ + +**Non-numeric values** + +The input BedGraph files can contain any kind of value in the fourth column, not necessarily a numeric value. + +Input Example:: + + File-1 File-2 + chr1 200 300 Sample1 chr1 100 240 0.75 + chr1 400 450 Sample1 chr1 250 700 0.43 + chr1 530 600 Sample2 + +Output Example:: + + chr1 100 200 0 0.75 + chr1 200 240 Sample1 0.75 + chr1 240 250 Sample1 0 + chr1 250 300 Sample1 0.43 + chr1 300 400 0 0.43 + chr1 400 450 Sample1 0.43 + chr1 450 530 0 0.43 + chr1 530 600 Sample2 0.43 + chr1 600 700 0 0.43 + +@REFERENCES@ + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/windowBed.xml Tue Nov 04 01:45:04 2014 -0500 @@ -0,0 +1,75 @@ +<tool id="bedtools_windowbed" name="WindowBed" version="@WRAPPER_VERSION@.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools window + #if $inputA.ext == "bam": + -abam $inputA + #else: + -a $inputA + #end if + -b $inputB + $ubam + $bed + $strandB + #if $addition.addition_select == 'b': + -w $addition.b + #elif $addition.addition_select == 'lr': + -l $addition.l + -r $addition.r + #end if + $original + $number + $nooverlaps + $header + > $output + </command> + <inputs> + <param format="bed,bam,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF/BAM file"/> + <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="BED/VCF/GFF file"/> + <param name="ubam" type="boolean" checked="false" truevalue="-ubam" falsevalue="" label="Write uncompressed BAM output" /> + <param name="bed" type="boolean" checked="false" truevalue="bed" falsevalue="" label="When using BAM input, write output as BED. The default is to write output in BAM when using a bam file" /> + <conditional name="addition"> + <param name="addition_select" type="select" label="Choose what you want to do"> + <option value="huhn" selected="True">Do not change added base pairs</option> + <option value="b">Add Base pairs for **both** upstream and downstream of each entry in A when searching for overlaps in B</option> + <option value="lr">Add Base pairs **separately** for upstream and downstream of each entry in A when searching for overlaps in B</option> + </param> + <when value="b"> + <param name="b" label="Give Value" type="integer" value="1000" /> + </when> + <when value="lr"> + <param name="l" label="Base pairs added upstream (left of) of each entry in A when searching for overlaps in B. Allows one to create assymetrical “windows”. Default is 1000bp" type="integer" value="1000" /> + <param name="r" label="Base pairs added downstream (right of) of each entry in A when searching for overlaps in B. Allows one to create assymetrical “windows”. Default is 1000bp" type="integer" value="1000" /> + </when> + </conditional> + <param name="strandB" type="select" label="Calculation based on strandedness?"> + <option value="" selected="True">Report any hit in B</option> + <option value="-sm">Only report hits in B that overlap A on the **same** strand</option> + <option value="-Sm">Only report hits in B that overlap A on the **opposite** strand</option> + </param> + <param name="original" type="boolean" checked="false" truevalue="-u" falsevalue="" label="Write original A entry once if any overlaps found in B. In other words, just report the fact at least one overlap was found in B" /> + <param name="number" type="boolean" checked="false" truevalue="-c" falsevalue="" label="For each entry in A, report the number of hits in B while restricting to -f. Reports 0 for A entries that have no overlap with B" /> + <param name="nooverlaps" type="boolean" checked="false" truevalue="-v" falsevalue="" label="Only report those entries in A that have no overlaps with B" /> + <param name="header" type="boolean" checked="false" truevalue="-header" falsevalue="" label="Print the header from the A file prior to results" /> + + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label=""/> + </outputs> + <help> + +**What it does** + +Similar to bedtools intersect, window searches for overlapping features in A and B. However, window adds a specified number (1000, by default) of base pairs upstream and downstream of each feature in A. In effect, this allows features in B that are “near” features in A to be detected. + +.. image:: $PATH_TO_IMAGES/window-glyph.png + +@REFERENCES@ + </help> + <expand macro="citations" /> +</tool>