view bed2annotation.xml @ 0:0475e4175855 draft default tip

planemo upload commit 81ece2551cea27cbd0e718ef5b7a2fe8d4abd071-dirty
author yqiancolumbia
date Mon, 30 Apr 2018 05:25:11 -0400
parents
children
line wrap: on
line source

<tool id="bed2annotation" name="Annotate genomic intervals">
	<description></description>

	<command interpreter="perl">
	/home/galaxy/tools/CTK/bed2annotation.pl -v 
	-conf /home/galaxy/tools/CTK/annotation.loc
	$bigFileFlag $ssFlag -dbkey $dbkey $geneFlag $miRNAFlag $rmskFlag $regionFlag 

#if $customFeature.annotateCustomFeature =="yes":
	-custom $customFeature.customFeatureBedFile --custom-name "$customFeature.customFeatureName" --custom-summary $customFeature.customSummaryMethod
#end if
 -summary $outputSummary $inputBed $outputAnnotation
	</command>
	<inputs>
		<param type="data" format="bed" name="inputBed" label="Select a BED file to be annotated"/>
		<param name="bigFileFlag" type="boolean" truevalue="-big" falsevalue="" checked="yes" label="Big file (over 6M lines)" />
                <param name="dbkey" type="select" label="Genome build name">
                                        <option value="hg19">hg19</option>
                                        <option value="mm10">mm10</option>
                </param>
		<param name="ssFlag" type="boolean" truevalue="-ss" falsevalue="" checked="yes" label="Consider the two strands separately when possible" />
                <param name="regionFlag" type="boolean" truevalue="-region" falsevalue="" checked="yes" label="Identify the genomic region where the peaks are located, it will give the percentage of tags mapped to CDS, 3'UTR, introns, etc.
" />
		<param name="geneFlag" type="boolean" truevalue="-gene" falsevalue="" checked="no" label="Annotate overlapping genes (RefSeq/UCSC known genes)" />
		<param name="miRNAFlag" type="boolean" truevalue="-miRNA" falsevalue="" checked="no" label="Annotate overlapping microRNAs" />
		<param name="rmskFlag" type="boolean" truevalue="-rmsk" falsevalue="" checked="no" label="Annotate overlapping RepeatMasked sequences (type and %)" />
		<conditional name="customFeature">
			<param name="annotateCustomFeature" type="select" label="Do you want to also annotate custom feature in history?">
				<option value="yes">Yes</option>
				<option value="no" selected="true">No</option>
			</param>
			<when value="yes">
				<param type="data" format="bed" name="customFeatureBedFile" label="Select a BED file with custom features"/>
				<param type="text" name="customFeatureName" value="custom_feature" size="80" label="Name your custom features (No space, no special character)"/>
				<param name="customSummaryMethod" type="select" label="Select how you would like to combine multiple items of the annotation">
					<option value="all" selected="true">List the name of all</option>
					<option value="max_num">List the name and score of the one with the max score</option>
					<option value="min_num">List the name and score of the one with the min score</option>
					<option value="max_overlap">List the name and overlap of the one with the max overlap proportion</option>
				</param>
			</when>
			<when value="no">
			</when>
		</conditional>

	</inputs>

	<outputs>
		<data name="outputAnnotation" format="tabular" label="Annotate internals on ${on_string}" />
		<data name="outputSummary" format="tabular" label="Summary of the annotation on ${on_string}" />
	</outputs>
	<help>

.. class:: infomark

**What the tool does**

Functional annotation of CLIP tags.

It will take as input files in BED format of unique CLIP tags (with or without RGB color) and annotate genomic intervals with various features built in or from history. 

Note 1: The input can be CLIP tags, CLIP cluster, CIMS, or anything else.  However, if the input is big, it might involve extensive computational load. Therefore, it is recommended that tags are clustered for some of the annotations (such as overlapping genes, rmsk, etc).

Note 2: Strand is not considered for RepeatMasked regions even when you choose to separate the two strands (because for some repeats, such as simple_repeat or micro satellites, strand does not make sense).

-----

**Output files**

There are two output files.

The first output file has the detailed annotation, with intervals exactly in the same order as the input file, so that you can easily put different types of information together.

The second output file is a summary of annotation.

	</help>
</tool>