Mercurial > repos > rnateam > paralyzer
diff paralyzer.xml @ 0:4dbe81be8b81 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/paralyzer commit 52c43a8b9958fc46ab0284638038e690f5a0da3a
author | rnateam |
---|---|
date | Tue, 06 Dec 2016 03:28:59 -0500 |
parents | |
children | f880686a9194 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/paralyzer.xml Tue Dec 06 03:28:59 2016 -0500 @@ -0,0 +1,387 @@ +<tool id="paralyzer" name="PARalyzer" version="1.5"> + + <description>A method to map interaction sites between RNA-binding proteins + and their targets</description> + + <requirements> + <!-- conda dependency --> + <requirement type="package" version="1.5">paralyzer</requirement> + <requirement type="package" version="324">ucsc-fatotwobit</requirement> + </requirements> + + <command> +<![CDATA[ + #if $refGenomeSource.genomeSource == "history": + faToTwoBit '$refGenomeSource.ownFile' ownFile.2bit + && + #end if + + ## execute paralyzer + ## note the 2nd argument is the memory parameter + ## the parameter can be override in job_conf.xml e.g. + ## <env id="_JAVA_OPTIONS">-Xmx12G</env> + PARalyzer 2G $input_ini + +]]> + </command> + <inputs> + <param name="input_sam" type="data" + format="sam" label="Alignment" + help="The sam file containing alignment of the read data."/> + + <param name="collapse" type="boolean" truevalue="=COLLAPSED" + falsevalue="" checked="True" + label="Incorporate the read copy number" + help="If reads were collapse before alignment and you want + to incorporate the read copy number, please select Yes, + otherwise select No" /> + + <!-- Genome source. --> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" + label="Will you select a reference genome from your + history or use a built-in genome?" + help="The version of genome against which the reads were aligned."> + <option value="2bit" selected="True"> + Use a built-in genome</option> + <option value="history"> + Use a genome from my current history</option> + </param> + <when value="2bit"> + <param name="builtin" type="select" + label="Select a reference genome"> + <options from_data_table="lastz_seqs"> + <filter type="sort_by" column="1" /> + <validator type="no_options" + message="A built-in reference genome is not available + for the build associated with the selected input file"/> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" + label="Select the reference genome" /> + </when> + </conditional> + + <conditional name="methods"> + <param name="choice" type="select" + label="Please select one of the approaches" + help=""> + <option value="EXTEND_BY_READ"> + EXTEND_BY_READ + </option> + <option value="HAFFNER_APPROACH"> + HAFFNER_APPROACH + </option> + <option value="ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL"> + ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL + </option> + </param> + <when value="EXTEND_BY_READ" /> + <when value="HAFFNER_APPROACH" /> + <when value="ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL"> + <param name="max_num" type="integer" + value="0" label="The maximum number" + help=" + The maximum number of reads to extend beyond the positive + signal in each direction (default 0) the cluster is defined + "> + <validator type="in_range" + message="Minimum allowed value is 0" min="0"/> + </param> + </when> + </conditional> + + <conditional name="conversion"> + <param name="selection" type="select" + label="Conversion" + help="Please specify characters"> + <option value="default">Use defaults: T to C</option> + <option value="custom">Specify other characters</option> + </param> + <when value="default" /> + <when value="custom"> + <param name="character_from" type="text" + size="1" value="T" label="Character from" + help="Character representing the modified ribonucleotide + (default 'T')"> + </param> + <param name="character_to" type="text" + size="1" value="C" label="Character to" + help="Character representing what the modified ribonucleotide + is read as by rTranscriptase (default 'C')"> + </param> + </when> + </conditional> + + <!-- optional parameters --> + <conditional name="params"> + <param name="settingsType" type="select" + label="Required parameters" + help="You can use the default settings or + set custom values for any of paralyzer's parameters."> + <option value="default">Use defaults</option> + <option value="custom">Full parameter list</option> + </param> + <when value="default" /> + <!-- Full/advanced params. --> + <when value="custom"> + <param name="BANDWIDTH" type="integer" + value="3" label="BANDWIDTH" + help="Size of bandwidth for KDE calculation (default 3)"> + <validator type="in_range" + message="Minimum allowed value is 1" min="1"/> + </param> + + <param name="min_read_group" type="integer" + value="5" label="MINIMUM_READ_COUNT_PER_GROUP" + help="Minimum number of reads required to call a group (default 5)"> + <validator type="in_range" + message="Minimum allowed value is 1" min="1"/> + </param> + + <param name="min_read_cluster" type="integer" + value="2" label="MINIMUM_READ_COUNT_PER_CLUSTER" + help="Minimum number of reads required to call a cluster (default 2)"> + <validator type="in_range" + message="Minimum allowed value is 1" min="1"/> + </param> + + <param name="min_read_kde" type="integer" + value="3" label="MINIMUM_READ_COUNT_FOR_KDE" + help="Minimum read depth at a location to + make a KDE estimate (default 3)"> + <validator type="in_range" + message="Minimum allowed value is 1" min="1"/> + </param> + + <param name="min_read_cluster_inc" type="integer" + value="1" label="MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION" + help="Minimum read depth for a location to be included + within a cluster (default 1)"> + <validator type="in_range" + message="Minimum allowed value is 1" min="1"/> + </param> + + <param name="min_cluster_size" type="integer" + value="11" label="MINIMUM_CLUSTER_SIZE" + help="Minimum length required for a cluster + to be reported (default 11)"> + <validator type="in_range" + message="Minimum allowed value is 1" min="1"/> + </param> + + <param name="min_conv_loc_cluster" type="integer" + value="2" label="MINIMUM_CONVERSION_LOCATIONS_FOR_CLUSTER" + help="Minimum number of separate locations to have a + reported conversion for a cluster to be + reported (default 2)"> + <validator type="in_range" + message="Minimum allowed value is 1" min="1"/> + </param> + + <param name="min_conv_cluster" type="integer" + value="2" label="MINIMUM_CONVERSION_COUNT_FOR_CLUSTER" + help="Minimum number of conversion events within a + region to report a cluster (default 2)"> + <validator type="in_range" + message="Minimum allowed value is 1" min="1"/> + </param> + + <param name="min_read_len" type="integer" + value="20" label="MINIMUM_READ_LENGTH" + help="Minimum length of mapped read to be included + in the analysis (default 20)"> + <validator type="in_range" + message="Minimum allowed value is 1" min="1"/> + </param> + + <param name="max_num_conv_mis" type="integer" + value="1" label="MAXIMUM_NUMBER_OF_NON_CONVERSION_MISMATCHES" + help="Maximum number of non-conversion mismatches of + a mapped read to be included in the analysis (default 1)"> + <validator type="in_range" + message="Minimum allowed value is 1" min="1"/> + </param> + </when> <!-- full --> + </conditional> + </inputs> + <configfiles> + <configfile name="input_ini"> +## genome source +#if $refGenomeSource.genomeSource == "history": +GENOME_2BIT_FILE=ownFile.2bit +#else +GENOME_2BIT_FILE=$refGenomeSource.builtin.fields.path +#end if + +SAM_FILE=$input_sam$collapse + +#if $methods.choice == "ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL": +ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL=$methods.max_num +#else: +$methods.choice +#end if + +#if $conversion.selection == "custom": +CONVERSION=$conversion.character_from>$conversion.character_to +#end if + +## required parameters +#if $params.settingsType == "custom": +BANDWIDTH=$params.BANDWIDTH +MINIMUM_READ_COUNT_PER_GROUP=$params.min_read_group +MINIMUM_READ_COUNT_PER_CLUSTER=$params.min_read_cluster +MINIMUM_READ_COUNT_FOR_KDE=$params.min_read_kde +MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION=$params.min_read_cluster_inc +MINIMUM_CLUSTER_SIZE=$params.min_cluster_size +MINIMUM_CONVERSION_LOCATIONS_FOR_CLUSTER=$params.min_conv_loc_cluster +MINIMUM_CONVERSION_COUNT_FOR_CLUSTER=$params.min_conv_cluster +MINIMUM_READ_LENGTH=$params.min_read_len +MAXIMUM_NUMBER_OF_NON_CONVERSION_MISMATCHES=$params.max_num_conv_mis +#end if + +OUTPUT_DISTRIBUTIONS_FILE=out.distribution +OUTPUT_GROUPS_FILE=out.groups +OUTPUT_CLUSTERS_FILE=out.clusters + </configfile> + </configfiles> + <outputs> + <data name="distribution" format="txt" + from_work_dir="out.distribution" + label="${tool.name} on ${on_string}: DISTRIBUTIONS"/> + + <data name="groups" format="txt" + from_work_dir="out.groups" + label="${tool.name} on ${on_string}: GROUPS"/> + + <data name="clusters" format="txt" + from_work_dir="out.clusters" + label="${tool.name} on ${on_string}: CLUSTERS"/> + + <!-- + <data name="PARalyzer_Utilized" format="sam" + from_work_dir="out_PARalyzer_Utilized.sam" + label="${tool.name} on ${on_string}: PARalyzer_Utilized.sam"/> + --> + </outputs> + <tests> + <test> + <param name="input_sam" value="input.sam" ftype="sam" /> + <param name="genomeSource" value="history" /> + <param name="ownFile" value="input.fa" /> + <param name="choice" value="EXTEND_BY_READ" /> + <param name="selection" value="custom" /> + <param name="character_from" value="T" /> + <param name="character_to" value="C" /> + <param name="settingsType" value="custom" /> + <param name="$BANDWIDTH" value="3" /> + <param name="min_read_group" value="5" /> + <param name="min_read_cluster" value="2" /> + <param name="min_read_kde" value="3" /> + <param name="min_read_cluster_inc" value="1" /> + <param name="min_cluster_size" value="11" /> + <param name="min_conv_loc_cluster" value="2" /> + <param name="min_conv_cluster" value="2" /> + <param name="min_read_len" value="20" /> + <param name="max_num_conv_mis" value="1" /> + <output name="distribution" file="out.distribution" + ftype="txt"/> + <output name="groups" file="out.groups" + ftype="txt"/> + <output name="clusters" file="out.clusters" + ftype="txt"/> + <!-- + <output name="PARalyzer_Utilized" file="out_PARalyzer_Utilized.sam" + ftype="sam"/> + --> + </test> + </tests> + <help> +<![CDATA[ +.. class:: infomark + +**What it does** + +`paralyzer`_ is an algorithm to generate a high resolution +map of interaction sites between RNA-binding proteins and their targets. The +algorithm utilizes the deep sequencing reads generated by `PAR-CLIP`_ +(Photoactivatable-Ribonucleoside-Enhanced Crosslinking and +Immunoprecipitation) protocol.The use of photoactivatable nucleotides in the +PAR-CLIP protocol results in more efficient crosslinking between the +RNA-binding protein and its target relative to other CLIP methods; in addition +a nucleotide substitution occurs at the site of crosslinking, providing for +single-nucleotide resolution binding information. PARalyzer utilizes this +nucleotide substition in a kernel density estimate classifier to generate +the high resolution set of Protein-RNA interaction sites. + +.. _paralyzer: https://ohlerlab.mdc-berlin.de/software/PARalyzer_85/ +.. _PAR-CLIP: http://www.ncbi.nlm.nih.gov/pubmed/20371350 + +.. class:: infomark + +**Approaches** + +``EXTEND_BY_READ``: including this line means that the cluster will be extended +beyond the signal to include a region such that it extends to +the end of any read that falls within the cluster and contained +a conversion, or until the minimum read depth +(MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met + +``HAFNER_APPROACH``: identifies the location with the largest number of conversion +events and extends the cluster up to +( parameter ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL)nt +in each direction from that point, or until the minimum +read depth (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met + +``ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL``: the maximum number of reads to +extend beyond the positive signal in each direction (default 0) +the cluster is defined as the region where the conversion KDE is above +the background KDE and then extended up to #integer#, or until the minimum +read depth (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met + +.. class:: infomark + +**Outputs** + +DISTRIBUTIONS: contains the signal KDE, background KDE, read count & conversion for all locations within each group + * The data will be in blocks of four lines for each group + * groups on the reverse strand do not need to be reversed; the values always equal nucleotdies from GroupStart to GroupEnd, regardless of Strand + * First Column = Chromosome = chromosome on which the group resides + * Second Column = Strand = orientation in which the group resides + * Third Column = GroupStart = beginning coordinate on the chromosome of the group + * Fourth Column = GroupEnd = ending coordinate on the chromosome of the group + * Fifth Column = GroupID = unique ID for the group + * Sixth Column = Information = reports if the current line contains the Signal, Background, Conversion Percent, or ReadCount + * All nucleotides that do not have any possibility of having a conversion event are given a value of -1 + * All Subsequent Columns: the values for each nucleotide from GroupStart until GroupEnd + + +GROUPS: a comma separated file containing the information about the resulting groups + * Chromosome = chromosome on which the group resides + * Strand = orientation in which the group resides + * GroupStart = beginning coordinate on the chromosome of the group + * GroupEnd = ending coordinate on the chromosome of the group + * GroupID = unique ID for the group + * ReadCount = number of reads within the group + +CLUSTERS: a comma separated file containing the information about the resulting clusters + * Chromosome = chromosome on which the cluster resides + * Strand = orientation in which the cluster resides + * ClusterStart = beginning coordinate on the chromosome of the cluster + * ClusterEnd = ending coordinate on the chromosome of the cluster + * ClusterID = unique ID for the cluster + * ClusterSequence = sequence of the cluster + * ReadCount = number of reads that overlap the cluster by at least 1 nucleotide + * ModeLocation = coordinate of the location with the highest signal / (signal + background) value + * ModeScore = score of the highest signal / (signal + background) value + * ConversionLocationCount = number of unique location where at least 1 conversion occurred + * ConversionEventCount = total number of conversions that occurred within the cluster + * NonConversionEventCount = total number of possible conversion events that did not occur + +]]></help> + <citations> + <citation type="doi">10.1186/gb-2011-12-8-r79</citation> + </citations> +</tool>