changeset 0:6e13890afaef draft default tip

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/qc/fastq_pair commit e8517297f4bf2ab5afc544e6af7bb4c5f152dd3e"
author ebi-gxa
date Thu, 17 Mar 2022 01:46:15 +0000
parents
children
files fastq_pair.xml get_test_data.sh
diffstat 2 files changed, 160 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_pair.xml	Thu Mar 17 01:46:15 2022 +0000
@@ -0,0 +1,129 @@
+<tool id="fastq_pair" name="Paired-end fastq pairer" profile="18.01" version="1.0+galaxy0">
+    <description>Rewrite paired-end fastq files to make sure that all reads have a mate and to separate out singletons.</description>
+    <requirements>
+        <requirement type="package" version="1.0">fastq-pair</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+
+    ## Unzip gzipped inputs
+    #if $fastq1.is_of_type("fastqsanger.gz"):
+        gzip -d -c '$fastq1' > 1.fq &&
+    #else:
+        mv '$fastq1' 1.fq &&
+    #end if
+
+    #if $fastq2.is_of_type("fastqsanger.gz"):
+        gzip -d -c '$fastq2' > 2.fq &&
+    #else:
+        mv '$fastq2' 2.fq &&
+    #end if
+
+
+    ## Run fastq_pair
+    fastq_pair
+
+    #if $table_size:
+        -t '$table_size'
+    #end if
+
+    #if $print_elements:
+        -p '$print_elements'
+    #end if
+
+    #if $verbose:
+        '$verbose'
+    #end if
+
+    1.fq 2.fq
+
+    ## Compress outputs
+    #if $compress == "yes":
+        ; gzip 1.fq.paired.fq
+        ; gzip 2.fq.paired.fq
+        ; gzip 1.fq.single.fq
+        ; gzip 2.fq.single.fq
+    #end if
+
+    ]]></command>
+    <inputs>
+        <param name="verbose" label="Verbose" optional="true" value="false" argument="-v" type="boolean" truevalue="-v" falsevalue="" checked="true" help="Verbose output. This is mainly for debugging." />
+        <param name="fastq1" label="FASTQ file 1" type="data" format="fastqsanger,fastqsanger.gz" optional="false" help="FASTQ file input, file 1 of pair" />
+        <param name="fastq2" label="FASTQ file 2" type="data" format="fastqsanger,fastqsanger.gz" optional="false" help="FASTQ file input, file 2 of pair" />
+        <param name="table_size" label="Table size" argument="-t" optional="true" value="100003" type="integer" help="Table size (default 100003)" />
+        <param name="print_elements" label="Number elements to print" argument="-p" type="integer" optional="true" help="Print this number of elements in each bucket in the table." />
+        <param name="compress" label="Compress outputs?" type="boolean" optional="true" value="true" truevalue="yes" falsevalue="no" checked="false" help="Check if outputs should be compressed (.gz)" />
+    </inputs>
+    <outputs>
+        <collection name="uncompressed" type="list" format="fastqsanger" label="${tool.name} on ${on_string}: Paired reads and singletons">
+            <filter>compress == False</filter>
+                <data name="fwd_paired" from_work_dir="1.fq.paired.fq"/>
+                <data name="rev_paired" from_work_dir="2.fq.paired.fq"/>
+                <data name="fwd_single" from_work_dir="1.fq.single.fq"/>
+                <data name="rev_single" from_work_dir="2.fq.single.fq"/>
+        </collection>        
+        <collection name="compressed" type="list" format="fastqsanger.gz" label="${tool.name} on ${on_string}: Paired reads and singletons, compressed">
+            <filter>compress</filter>
+                <data name="fwd_paired_gz" from_work_dir="1.fq.paired.fq.gz"/>
+                <data name="rev_paired_gz" from_work_dir="2.fq.paired.fq.gz"/>
+                <data name="fwd_single_gz" from_work_dir="1.fq.single.fq.gz"/>
+                <data name="rev_single_gz" from_work_dir="2.fq.single.fq.gz"/>
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="table_size" value="1000"/>
+            <param name="fastq1" value="left.fastq"/>
+            <param name="fastq2" value="right.fastq"/>
+            <output_collection name="uncompressed" type="list" count="4">
+                <element name="fwd_paired" ftype="fastqsanger">
+                    <assert_contents><has_line_matching expression="^@GFH7CG303GRT2I/1[ -~]+" /></assert_contents>
+                </element>
+                <element name="rev_paired" ftype="fastqsanger">
+                    <assert_contents><has_line_matching expression="^@GFH7CG303GRT2I/2[ -~]+" /></assert_contents>
+                </element>
+                <element name="fwd_single" ftype="fastqsanger">
+                    <assert_contents><has_line_matching expression="^@GFH7CG303GGIBL/1[ -~]+" /></assert_contents>
+                </element>
+                <element name="rev_single" ftype="fastqsanger">
+                    <assert_contents><has_line_matching expression="^@GFH7CG303GSASH/2[ -~]+" /></assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="table_size" value="1000"/>
+            <param name="fastq1" value="left.fastq.gz"/>
+            <param name="fastq2" value="right.fastq.gz"/>
+            <param name="compress" value="true"/>
+            <output_collection name="compressed" type="list" count="4">
+                <element name="fwd_paired_gz" ftype="fastqsanger.gz">
+                    <assert_contents><has_size value="16000" delta="1600"/></assert_contents>
+                </element>
+                <element name="rev_paired_gz" ftype="fastqsanger.gz">
+                    <assert_contents><has_size value="16000" delta="1600"/></assert_contents>
+                </element>
+                <element name="fwd_single_gz" ftype="fastqsanger.gz">
+                    <assert_contents><has_size value="62000" delta="6200"/></assert_contents>
+                </element>
+                <element name="rev_single_gz" ftype="fastqsanger.gz">
+                    <assert_contents><has_size value="8500" delta="850"/></assert_contents>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+=======================================================
+Paired-end fastq pairer (fastq_pair)
+=======================================================
+
+Match up paired end fastq files quickly and efficiently. 
+This galaxy tool can accept gzipped fastq files.
+
+OPTIONS
+-t table size (default 100003)
+-p print the number of elements in each bucket in the table
+-v verbose output. This is mainly for debugging
+]]></help>
+    <citations>
+        <citation type="doi">10.1101/552885</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_test_data.sh	Thu Mar 17 01:46:15 2022 +0000
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+BASE_LINK="https://raw.githubusercontent.com/linsalrob/fastq-pair/master/test"
+
+LEFT_FILE="left.fastq"
+RIGHT_FILE="right.fastq"
+LEFTGZ_FILE="left.fastq.gz"
+RIGHTGZ_FILE="right.fastq.gz"
+
+LEFT_LINK=$BASE_LINK"/"$LEFT_FILE
+RIGHT_LINK=$BASE_LINK"/"$RIGHT_FILE
+LEFTGZ_LINK=$BASE_LINK"/"$LEFTGZ_FILE
+RIGHTGZ_LINK=$BASE_LINK"/"$RIGHTGZ_FILE
+
+function get_data {
+  local link=$1
+  local fname=$2
+
+  if [ ! -f $fname ]; then
+    echo "$fname not available locally, downloading.."
+    wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link
+  fi
+}
+
+# Get test data
+pushd test-data
+
+get_data $LEFT_LINK $LEFT_FILE
+get_data $RIGHT_LINK $RIGHT_FILE
+get_data $LEFTGZ_LINK $LEFTGZ_FILE
+get_data $RIGHTGZ_LINK $RIGHTGZ_FILE