Mercurial > repos > iuc > bedtools
diff intersectBed.xml @ 1:82aac94b06c3 draft
Uploaded
author | iuc |
---|---|
date | Thu, 08 Jan 2015 14:25:51 -0500 |
parents | b8348686a0b9 |
children | 607c0576c6ab |
line wrap: on
line diff
--- a/intersectBed.xml Tue Nov 04 01:45:04 2014 -0500 +++ b/intersectBed.xml Thu Jan 08 14:25:51 2015 -0500 @@ -6,65 +6,92 @@ <expand macro="requirements" /> <expand macro="stdio" /> <command> - intersectBed - #if $inputA.ext == "bam": - -abam $inputA - #else: - -a $inputA - #end if +<![CDATA[ + #set inputBs = '" "'.join( [ str( $file ) for $file in $inputB ] ) + #set modes = ' '.join( str($overlap_mode).split(',') ) + + bedtools intersect + #if $inputA.ext == "bam": + -abam "${inputA}" + #else: + -a "${inputA}" + #end if - -b $inputB - $split - $strand - #if str($fraction): - -f $fraction - #end if - $reciprocal - $invert - $once - $header - $overlap_mode - > $output + -b "${inputBs}" + $split + $strand + #if str($fraction) != "None" and str($fraction): + -f "${fraction}" + #end if + $reciprocal + $invert + $once + $header + $modes + > "${output}" +]]> </command> <inputs> <param format="bed,bam,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF/BAM file"/> - <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="overlap intervals in this BED file?"/> - - <param name="strand" type="select" label="Calculate the intersection based on strandedness?"> - <option value="" selected="True">Overlaps on either strand</option> - <option value="-s">Only overlaps occurring on the **same** strand.</option> - <option value="-S">Only overlaps occurring on the **opposite** strand.</option> - </param> - - <param name="overlap_mode" type="select" label="What should be written to the output file?"> - <option value="-wa" selected="True">Write the original entry in A for each overlap.</option> - <option value="-wb">Write the original entry in B for each overlap. Useful for knowing what A overlaps. Restricted by the fraction- and reciprocal option.</option> - <option value="-wo">Write the original A and B entries plus the number of base pairs of overlap between the two features. Only A features with overlap are reported. Restricted by the fraction- and reciprocal option.</option> - <option value="-wao">Write the original A and B entries plus the number of base pairs of overlap between the two features. However, A features w/o overlap are also reported with a NULL B feature and overlap = 0. Restricted by the fraction- and reciprocal option.</option> - <option value="-loj">Perform a "left outer join". That is, for each feature in A report each overlap with B. If no overlaps are found, report a NULL feature for B.</option> + <param format="bed,bam,gff,vcf,gff3" name="inputB" type="data" multiple="True" label="One or more BAM/BED/GFF/VCF file(s)"/> + <expand macro="strand2" /> + <param name="overlap_mode" type="select" multiple="True" label="What should be written to the output file?"> + <option value="-wa" selected="True">Write the original entry in A for each overlap (-wa)</option> + <option value="-wb">Write the original entry in B for each overlap. Useful for knowing what A overlaps. Restricted by the fraction- and reciprocal option (-wb)</option> + <option value="-wo">Write the original A and B entries plus the number of base pairs of overlap between the two features. Only A features with overlap are reported. Restricted by the fraction- and reciprocal option (-wo)</option> + <option value="-wao">Write the original A and B entries plus the number of base pairs of overlap between the two features. However, A features w/o overlap are also reported with a NULL B feature and overlap = 0. Restricted by the fraction- and reciprocal option (-wao)</option> + <option value="-loj">Perform a "left outer join". That is, for each feature in A report each overlap with B. If no overlaps are found, report a NULL feature for B (-loj)</option> </param> - <param name="split" type="boolean" checked="true" truevalue="-split" falsevalue="" label="Treat split/spliced BAM or BED12 entries as distinct BED intervals when computing coverage." help="If set, the coverage will be calculated based the spliced intervals only. For BAM files, this inspects the CIGAR N operation to infer the blocks for computing coverage. For BED12 files, this inspects the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12). If this option is not set, coverage will be calculated based on the interval's START/END coordinates, and would include introns in the case of RNAseq data." /> + <expand macro="split" /> <!-- -f --> - <param name="fraction" type="text" optional="true" label="Minimum overlap required as a fraction of the BAM alignment" help="Alignments are only retained if the overlap with the an interval in the BED file comprises at least this fraction of the BAM alignment's length. For example, to require that the overlap affects 50% of the BAM alignment, use 0.50. (-f)"/> + <param name="fraction" type="text" + label="Minimum overlap required as a fraction of the BAM alignment" + help="Alignments are only retained if the overlap with the an interval in the BED file comprises at least this fraction of the BAM alignment's length. For example, to require that the overlap affects 50% of the BAM alignment, use 0.50. (-f)"/> <!-- -r --> - <param name="reciprocal" type="boolean" checked="false" truevalue="-r" falsevalue="" label="Require reciprocal overlap." help="If set, the overlap between the BAM alignment and the BED interval must affect the above fraction of both the alignment and the BED interval. (-r)" /> + <expand macro="reciprocal" /> <!-- -v --> - <param name="invert" type="boolean" checked="false" truevalue="-v" falsevalue="" label="Report only those alignments that **do not** overlap the BED file. (-v)"/> + <param name="invert" type="boolean" checked="false" truevalue="-v" falsevalue="" + label="Report only those alignments that **do not** overlap the BED file" + help="(-v)"/> <!-- -u --> - <param name="once" type="boolean" checked="false" truevalue="-u" falsevalue="" label="Write the original A entry _once_ if _any_ overlaps found in B." help="Just report the fact >=1 hit was found. (-u)" /> + <param name="once" type="boolean" checked="false" truevalue="-u" falsevalue="" + label="Write the original A entry _once_ if _any_ overlaps found in B." + help="Just report the fact >=1 hit was found. (-u)" /> <!-- -c --> - <param name="count" type="boolean" checked="false" truevalue="-c" falsevalue="" label="For each entry in A, report the number of overlaps with B." help="Reports 0 for A entries that have no overlap with B. (-c)" /> - <!-- header --> - <param name="header" type="boolean" checked="false" truevalue="-header" falsevalue="" label="Print the header from the A file prior to results." /> - <!-- header --> - <param name="header" type="boolean" checked="false" truevalue="-header" falsevalue="" label="Print the header from the A file prior to results." /> + <param name="count" type="boolean" checked="false" truevalue="-c" falsevalue="" + label="For each entry in A, report the number of overlaps with B." + help="Reports 0 for A entries that have no overlap with B. (-c)" /> + <expand macro="print_header" /> </inputs> <outputs> - <data format_source="inputA" name="output" metadata_source="inputA" label="Intersection of ${inputA.name} and ${inputB.name}"/> + <data format_source="inputA" name="output" metadata_source="inputA"/> </outputs> + <tests> + <test> + <param name="inputA" value="intersectBed1.bed" ftype="bed" /> + <param name="inputB" value="intersectBed2.bed" ftype="bed" /> + <param name="overlap_mode" value="-wa" /> + <param name="split" value="False" /> + <output name="output" file="intersectBed_result1.bed" ftype="bed" /> + </test> + <test> + <param name="inputA" value="intersectBed1.bed" ftype="bed" /> + <param name="inputB" value="intersectBed2.bed" ftype="bed" /> + <param name="overlap_mode" value="-wa,-wb" /> + <param name="split" value="False" /> + <output name="output" file="intersectBed_result2.bed" ftype="bed" /> + </test> + <test> + <param name="inputA" value="intersectBed1.bed" ftype="bed" /> + <param name="inputB" value="intersectBed2.bed" ftype="bed" /> + <param name="invert" value="True" /> + <param name="split" value="False" /> + <output name="output" file="intersectBed_result3.bed" ftype="bed" /> + </test> + </tests> <help> - +<![CDATA[ **What it does** By far, the most common question asked of two sets of genomic features is whether or not any of the features in the two sets “overlap” with one another. This is known as feature intersection. bedtools intersect allows one to screen for overlaps between two sets of genomic features. Moreover, it allows one to have fine control as to how the intersections are reported. bedtools intersect works with both BED/GFF/VCF and BAM files as input. @@ -80,7 +107,7 @@ Note that a BAM alignment will be sent to the output file **once** even if it overlaps more than one interval in the BED file. @REFERENCES@ - +]]> </help> <expand macro="citations" /> </tool>