view alignr.xml @ 23:4e646baac551

Uploaded
author xuebing
date Sat, 31 Mar 2012 11:53:40 -0400 (2012-03-31)
parents b11a21c704ec
children
line wrap: on
line source
<tool id="alignr" name="align">
  <description>two interval sets</description>
  <command interpreter="python"> alignr.py -a $inputa -w $windowsize -n $nbins -o $output_data -v $output_plot $stranded  -q -l $outputlabel --ylim=$ylim --span $span
    #if $inputb_source_type.inputb_select == "user":
          -b "$inputb"
    #else:
        -b "${inputb_source_type.selectedb.fields.value}"
    #end if
    #if $inputa_format.inputa_select == "BAM":
    -f BAM
    #end if
  </command>
  <inputs>
    <conditional name="inputa_format">
    	<param name="inputa_select" type="select" label="Select your first input format" >
		<option value="BED" selected="true">BED-like (chrNum	start	end	...) </option>
		<option value="BAM"> BAM</option>
	    </param>
	    <when value="BED">
		    <param name="inputa" type="data" format="interval" label="Input file for the first interval set (-a)"/>
	    </when>
	    <when value="BAM">
		    <param name="inputa" type="data" format="bam" label="Input file for the first interval set (-a)"/>
	    </when>
    </conditional>
    <conditional name="inputb_source_type">
        <param name="inputb_select" type="select" label="Input source for the second interval set">
            <option value="mm9ucsc" selected="true">mm9 ucsc knownGene annotations</option>
            <option value="mm9refseq">mm9 refseq gene annotations</option>
            <option value="mm9ensembl">mm9 ensembl gene annotations</option>
            <option value="hg18ucsc" >hg18 ucsc knownGene annotations</option>
            <option value="hg18refseq">hg18 refseq gene annotations</option>
            <option value="hg18ensembl">hg18 ensembl gene annotations</option>
            <option value="user">Dataset in Your History</option>
        </param>
        <when value="user">
            <param name="inputb" type="data" format="interval" label="Input file for the second interval set (-b)" />
        </when>
        <when value="mm9ucsc">
            <param name="selectedb" type="select" label="Input for the second interval set (-b)" >
                <options from_file="aligndb-mm9-knownGene.loc">
                    <column name="name" index="0"/>
                    <column name="value" index="1"/>
                </options>
            </param>
        </when>
        <when value="mm9refseq">
            <param name="selectedb" type="select" label="Input for the second interval set (-b)" >
                <options from_file="aligndb-mm9-refGene.loc">
                    <column name="name" index="0"/>
                    <column name="value" index="1"/>
                </options>
            </param>
        </when>
        <when value="mm9ensembl">
            <param name="selectedb" type="select" label="Input for the second interval set (-b)" >
                <options from_file="aligndb-mm9-ensGene.loc">
                    <column name="name" index="0"/>
                    <column name="value" index="1"/>
                </options>
            </param>
        </when>
        <when value="hg18ucsc">
            <param name="selectedb" type="select" label="Input for the second interval set (-b)" >
                <options from_file="aligndb-hg18-knownGene.loc">
                    <column name="name" index="0"/>
                    <column name="value" index="1"/>
                </options>
            </param>
        </when>
        <when value="hg18refseq">
            <param name="selectedb" type="select" label="Input for the second interval set (-b)" >
                <options from_file="aligndb-hg18-refGene.loc">
                    <column name="name" index="0"/>
                    <column name="value" index="1"/>
                </options>
            </param>
        </when>
        <when value="hg18ensembl">
            <param name="selectedb" type="select" label="Input for the second interval set (-b)" >
                <options from_file="aligndb-hg18-ensGene.loc">
                    <column name="name" index="0"/>
                    <column name="value" index="1"/>
                </options>
            </param>
        </when>
                                                
    </conditional>    
    <param name="windowsize" size="10" type="integer" value="0" label="Change input 2 interval size (-w)"  help="will create new intervals of w bp flanking the original center. set to 0 will not change input interval size)"/>
    <param name="nbins" size="10" type="integer" value="100" label="Number of bins dividing intervals in input 2(-n)"/>
    <param name="span" size="10" type="float" value="0.1" label="loess span: smoothing parameter" help="value less then 0.1 disables smoothing"/>
    <param name="stranded" label="Check if require overlap on the same strand (-s)" type="boolean" truevalue="-s" falsevalue="" checked="False"/>
    <param name="outputlabel" size="80" type="text" label="Output label" value="test"/>
    <param name="ylim" size="10" type="text" label="set ylim of the plot" value="min,max" help="e.g. 0,1 (default is the min and max of the signal)"/>
   
</inputs>
  <outputs>
    <data format="tabular" name="output_data" label="${outputlabel} (data)"/> 
    <data format="pdf" name="output_plot" label="${outputlabel} (plot)"/> 
  </outputs>
  <help>

**What it does**

This tool aligns two sets of intervals, finds overlaps, calculates and plots the coverage of the first set across the second set. Applications include:  

- check read distribution around TSS/poly A site/splice site/motif site/miRNA target site
- check relative position/overlap of two lists of ChIP-seq peaks

Two output files are generated. One is the coverage/profile for each interval in input 2. The first two columns are interval ID and the total number of overlapping intervals from input 1. Column 3 to column nbins+2 are coverage at each bin. The other file is an PDF file plotting the average coverage of each bin. To modify the visualization, please downlaod the coverage file and make your own plots.

-----

**Annotated features**

Currently supports mouse genome build mm9 and human hg18. Each interval spans 1000bp upstream and 1000bp downstream of a feature such as TSS. Features with overlapping exons in the intronic/intergenic part of the 2000bp interval are removed.

-----

**Usage**

  -h, --help        show this help message and exit
  -a INPUTA         (required) input file A, BED-like (first 3 columns: chr, start, end) or BAM format. The
                    script computes the depth of coverage of features in file
                    A across the features in file B
  -b INPUTB         (required) input file B, BED format or MACS peak file.
                    Requires an unique name for each line in column 4
  -m                inputB is a MACS peak file.
  -f AFORMAT        Format of input file A. Can be BED (default) or BAM
  -w WINDOW         Generate new inputB by making a window of 2 x WINDOW bp
                    (in total) flanking the center of each input feature
  -n NBINS          number of bins. Features in B are binned, and the coverage
                    is computed for each bin. Default is 100
  -s                enforce strandness: require overlapping on the same
                    strand. Default is off
  -p                load existed intersectBed outputfile
  -q                suppress output on screen
  -o OUTPUTPROFILE  (optional) output profile name.
  -v PLOTFILE       (optional) plot file name
  </help>
</tool>