diff htseq_clip.xml @ 0:94a987a7da69 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/htseq-clip commit 4879439f0df3386b97d8507c5991051fbdda053a
author bgruening
date Tue, 11 Oct 2022 16:09:23 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/htseq_clip.xml	Tue Oct 11 16:09:23 2022 +0000
@@ -0,0 +1,265 @@
+<tool id="htseq_clip" name="htseq-clip" version="0.1.0+galaxy0" python_template_version="3.5" profile="21.05">
+
+    <description>- A toolset for the analysis of eCLIP/iCLIP datasets</description>
+    <requirements>
+        <requirement type="package" version="2.14.0b0">htseq-clip</requirement>
+        <requirement type="package" version="2.30.0">bedtools</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+
+
+        #if $action_type.action_type_selector == 'create_sliding_windows':
+            python '$__tool_directory__/htsc_create_sliding_windows.py'
+                --gff '$action_type.gff_file'
+                --out ./
+                $action_type.gff_unsorted
+                --hcw-w $action_type.hcw_options.hcw_w
+                --hcw-s $action_type.hcw_options.hcw_s
+                --no-zipper
+        #elif $action_type.action_type_selector == 'create_count_table':
+            python '$__tool_directory__/htsc_create_count_table.py'
+                --win-bed '$action_type.win_bed_file'
+                --exp-bams 
+                #for $i in $action_type.exp_bams:
+                    $i.exp_bam 
+                #end for
+                --ctr-bams 
+                #for $i in $action_type.ctr_bams:
+                    $i.ctr_bam 
+                #end for
+                --data-id '$action_type.data_id'
+                --out ./
+                --hce-e $action_type.hce_options.hce_e
+                --hce-s '${action_type.hce_options.hce_s}'
+                --hce-g $action_type.hce_options.hce_g
+                --hce-q $action_type.hce_options.hce_q
+                $action_type.hce_options.hce_primary
+                --hce-c \${GALAXY_SLOTS:-1}
+                --hce-m $action_type.hce_options.hce_m
+                --hce-x $action_type.hce_options.hce_x
+                --hce-l $action_type.hce_options.hce_l
+                #if $action_type.hce_options.hce_f:
+                    --hce-f '$action_type.hce_options.hce_f'
+                #end if
+                #if $action_type.hce_options.filter_bed:
+                    --filter-bed '$action_type.hce_options.filter_bed'
+                    --filter-mode $action_type.hce_options.filter_mode
+                #end if
+                $action_type.hcc_options.hcc_unstranded
+                --no-zipper
+        #end if
+
+    ]]></command>
+
+    <inputs>
+        <conditional name="action_type">
+        
+            <param name="action_type_selector" type="select" label="Select an action">
+                <option value="create_sliding_windows" selected="true">Create sliding windows</option>
+                <option value="create_count_table">Create count table </option>
+            </param>
+
+            <when value="create_sliding_windows">
+
+                <param name="gff_file" type="data" format="gff3"
+                       label="GFF annotation file"
+                       help="Provide a genomic annotation file in GFF3 format"/>
+                <param name="gff_unsorted" label="Is the GFF file unsorted?" type="boolean"
+                       truevalue="--hca-unsorted" falsevalue="" checked="False"
+                       help="Check if GFF file is unsorted (default: GFF file is assumed to be sorted)"/>
+
+                <section name="hcw_options" title="Sliding window settings">
+                    <param name="hcw_w" type="integer" value="50"
+                           label="Sliding window size"
+                           help="Set the sliding window size in nucleotides. If unsure, try 75-100 (default: 50)"/>
+                    <param name="hcw_s" type="integer" value="20"
+                           label="Sliding window step size"
+                           help="Set the sliding window step size (default: 20)"/>
+                </section>
+
+                <section name="win_out_options" title="Output options">
+                    <param name="annot_bed_out" label="Output annotation BED file" type="boolean"
+                               checked="False"
+                               help="Output annotation BED file used for creating sliding windows"/>
+                </section>
+
+            </when>
+
+            <when value="create_count_table">
+                <repeat name="exp_bams" min="1" title="CLIP-seq experiment BAM inputs">
+                    <param name="exp_bam" type="data" format="bam" label="BAM files belonging to the CLIP-seq experiment" help="Select BAM file belonging to the CLIP-seq experiment. NOTE that order determines replicate numbering in output tables"/>
+                </repeat>
+                <repeat name="ctr_bams" min="1" title="CLIP-seq control BAM inputs">
+                    <param name="ctr_bam" type="data" format="bam" label="BAM files belonging to the CLIP-seq control" help="Select BAM file belonging to the CLIP-seq control. NOTE that order determines replicate numbering in output tables"/>
+                </repeat>
+                <param name="win_bed_file" type="data" format="bed"
+                       label="Sliding windows BED file"
+                       help="Provide a genomic regions BED file for calculating crosslink site overlap counts. Typically this is the sliding windows BED file created with htseq-clip's 'Create sliding windows' procedure"/>
+                <param name="data_id" type="text" value="Rbp"
+                       label="Dataset ID"
+                       help="Provide a dataset ID (e.g., RNA-binding protein name) used in the generated data table (default: Rbp)"/>
+                <section name="hce_options" title="Crosslink site extraction settings">
+                    <param name="hce_e" type="integer" value="1" min="1" max="2"
+                           label="Read mate to extract crosslink sites from"
+                           help="Select the read mate (1, 2) to extract crosslink sites from. For single-end CLIP-seq data, select 1 (default: 1)"/>
+                    <param name="hce_s" type="select" label="Specify crosslink site position on read"
+                        help="Specify crosslink site position in the read, i.e., the genomic position to be extrated (default: middle position)">
+                        <option value="m" selected="true">Middle position of read</option>
+                        <option value="s">First position of read</option>
+                        <option value="e">Last position of read</option>
+                        <option value="i">Insertion site</option>
+                        <option value="d">Deletion site</option>
+                    </param>
+                    <param name="hce_g" type="integer" value="0"
+                           label="Crosslink site offset"
+                           help="Number of nucleotides to offset for crosslink sites. Can be positive (upstream direction) or negative (downstream direction) (default: 0)"/>
+                    <param name="hce_q" type="integer" value="10"
+                           label="Minimum alignment quality"
+                           help="Minimum alignment quality for filtering input BAM files. BAM entries greater than set quality will be filtered out (default: 10)"/>
+                    <param name="hce_primary" label="Use only primary positions of multimapping reads?" type="boolean"
+                        truevalue="--hce-primary" falsevalue="" checked="False"
+                        help="Check if only primary positions of multimapping reads should be kept"/>
+                    <param name="hce_m" type="integer" value="0"
+                           label="Minimum read length"
+                           help="Minimum read length for filtering input BAM files (default: 0)"/>
+                    <param name="hce_x" type="integer" value="500"
+                           label="Maximum read length"
+                           help="Maximum read length for filtering input BAM files (default: 500)"/>
+                    <param name="hce_l" type="integer" value="10000"
+                           label="Maximum read interval length"
+                           help="Maximum read interval length for filtering input BAM files (default: 10000)"/>
+                    <param name="hce_f" type="data" format="txt" optional="True"
+                        label="Specify chromosomes to extract crosslink sites from"
+                        help="Extract crosslink sites only from chromosomes given in this file (format: one chromsome ID per file)"/>
+                    <param name="filter_bed" type="data" format="bed" optional="True"
+                        label="BED file for filtering out BAM entries"
+                        help="Provide BED file to filter BAM entries based on their overlap with genomic regions inside the provided BED file"/>
+                    <param name="filter_mode" type="select" label="Filtering mode for BED filtering"
+                        help="Specify mode of filtering out BAM entries, with respect to the genomic regions inside the provided BED file)">
+                        <option value="1" selected="true">Keep BAM entries not overlapping with BED regions</option>
+                        <option value="2">Keep only BAM entries overlapping with BED regions</option>
+                    </param>
+                </section>
+
+                <section name="hcc_options" title="Overlap count settings">
+                    <param name="hcc_unstranded" label="Should crosslink site counting be non-strand-specific?" type="boolean"
+                        truevalue="--hcc-unstranded" falsevalue="" checked="False"
+                        help="Check if crosslink site position should be counted for overlapping features on both strands"/>
+                </section>
+            </when>
+
+
+        </conditional>
+
+
+    </inputs>
+
+    <outputs>
+
+        <data name="annotation_bed_file" format="bed" from_work_dir="annotation.bed" label="${tool.name} on ${on_string}: Annotation BED file">
+            <filter>action_type["action_type_selector"] == "create_sliding_windows" and action_type["win_out_options"]["annot_bed_out"]</filter>
+        </data>
+        <data name="windows_bed_file" format="bed" from_work_dir="windows.bed" label="${tool.name} on ${on_string}: Sliding windows BED file">
+            <filter>action_type["action_type_selector"] == "create_sliding_windows"</filter>
+        </data>
+        <data name="windows_txt_file" format="tabular" from_work_dir="windows_mapped_to_ids.txt" label="${tool.name} on ${on_string}: Windows annotation table file (DEWSeq input)">
+            <filter>action_type["action_type_selector"] == "create_sliding_windows"</filter>
+        </data>
+        <data name="sample_info_file" format="tabular" from_work_dir="sample_info.txt" label="${tool.name} on ${on_string}: Sample information table file (DEWSeq input)">
+            <filter>action_type["action_type_selector"] == "create_count_table"</filter>
+        </data>
+        <data name="count_matrix_file" format="tabular" from_work_dir="count_matrix.txt" label="${tool.name} on ${on_string}: Count table file (DEWSeq input)">
+            <filter>action_type["action_type_selector"] == "create_count_table"</filter>
+        </data>
+    </outputs>
+    <tests>
+
+        <test>
+            <param name="action_type_selector" value="create_sliding_windows"/>
+            <param name="gff_file" value="paper_tus.Synechocystis_pSYSM.gff3" ftype="gff3"/>
+            <param name="hcw_w" value="50"/>
+            <param name="hcw_s" value="20"/>
+            <param name="annot_bed_out" value="True"/>
+            <output name="annotation_bed_file" file="annotation.exp.bed"/>
+            <output name="windows_bed_file" file="windows.exp.bed"/>
+            <output name="windows_txt_file" file="windows.exp.txt"/>
+        </test>
+
+        <test>
+            <param name="action_type_selector" value="create_count_table"/>
+            <param name="win_bed_file" value="windows.exp.bed" ftype="bed"/>           
+            <param name="data_id" value="Rbp"/>
+            <repeat name="exp_bams">
+                <param name="exp_bam" value="Rbp_exp_rep1.Synechocystis_pSYSM.bam"/>
+            </repeat>
+            <repeat name="exp_bams">
+                <param name="exp_bam" value="Rbp_exp_rep2.Synechocystis_pSYSM.bam"/>
+            </repeat>
+            <repeat name="ctr_bams">
+                <param name="ctr_bam" value="Rbp_ctrl_rep1.Synechocystis_pSYSM.bam"/>
+            </repeat>
+            <output name="sample_info_file" file="sample_info.exp.txt"/>
+            <output name="count_matrix_file" file="Rbp_count_matrix.exp.txt" sort="true"/>
+        </test>
+
+    </tests>
+    <help><![CDATA[
+
+**Overview**
+
+htseq-clip is a toolset for the analysis of eCLIP/iCLIP datasets. It can be used to generate files necessary for data analysis using the companion R/Bioconductor package DEWSeq_ (available on Galaxy as well).
+
+The Galaxy wrapper of htseq-clip provides the following two functionalities: 
+
+1) Create sliding windows
+2) Create count table
+
+
+**Create sliding windows**
+
+In this mode, htseq-clip takes a genomic annotation file (GFF3 format, tested with GENCODE_ GFF3 files), flattens it (i.e., overlapping regions get merged), 
+and based on the flattened annotation BED file creates a sliding windows BED file. The window size and step size can be specified. 
+E.g., a window size of 50 and a step size of 20 means that a window of 50 nt is extracted at every 20 nt step along each of the regions in the annotation BED file.
+In the end, a table file is output, containing the windows and additional annotation information. This table file serves as one of the input files for DEWSeq. 
+In addition, the windows BED file is output, which is needed as input for the "Create count table" mode.
+
+
+**Create count table**
+
+In this mode, htseq-clip takes the windows BED file created in "Create count table" mode, as well as the CLIP-seq BAM files (experiment BAMs and control BAMs). 
+Various options are available for filtering the BAM files and modifying the counting procedure. htseq-clip then counts the number of overlapping BAM entries 
+for each BAM file and each window in the input BED file. In the end, a count table file is output, as well as a sample information table file, which both 
+serve as input files for DEWSeq.
+
+
+**Documentation and Repository**
+
+htseq-clip's online documentation can be found at:
+
+https://htseq-clip.readthedocs.io
+
+Its GitHub page is available at:
+
+https://github.com/EMBL-Hentze-group/htseq-clip
+
+
+.. _DEWSeq: https://bioconductor.org/packages/release/bioc/html/DEWSeq.html
+.. _GENCODE: http://gencodegenes.org
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+            @incollection{sahadevan2022pipeline,
+                doi={0.1007/978-1-0716-1851-6_10},
+                url={https://doi.org/10.1007/978-1-0716-1851-6_10},
+                title={A Pipeline for Analyzing eCLIP and iCLIP Data with Htseq-clip and DEWSeq},
+                author={Sahadevan, Sudeep and Sekaran, Thileepan and Schwarzl, Thomas},
+                booktitle={Post-Transcriptional Gene Regulation},
+                pages={189--205},
+                year={2022},
+                publisher={Springer}
+              }
+        </citation>
+    </citations>
+</tool>