changeset 0:df87d29fac7f draft default tip

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/qc/fastq_utils commit 94b7fe8e2147adc32748e2921729b99669e13a1b"
author ebi-gxa
date Tue, 15 Feb 2022 16:38:56 +0000
parents
children
files fastq_filter_n.xml get_test_data.sh
diffstat 2 files changed, 96 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_filter_n.xml	Tue Feb 15 16:38:56 2022 +0000
@@ -0,0 +1,48 @@
+<tool id="fastq_filter_n" name="FASTQ filterer" profile="18.01" version="0.25.1+galaxy0">
+    <description>Discards reads with more than x% of uncalled bases (N)</description>
+    <requirements>
+        <requirement type="package" version="0.25.1">fastq_utils</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+    fastq_filter_n -n '$threshold' '$file' > $outfile
+    ]]></command>
+    <inputs>
+        <param name="file" label="Input file" type="data" format="fastqsanger.gz" optional="false" help="Fastq (optional gzipped) file name"/>
+        <param name="threshold" label="Filter threshold" argument="-n" optional="true" value="0" type="integer" min="0" max="100" help="[0-100] Maximum percentage of uncalled bases in a read. Default is 0."/>
+    </inputs>
+    <outputs>
+        <data label="${tool.name} on ${on_string}: Output file" name="outfile" format="fastqsanger.gz" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="file" value="test_21_2.fastq.gz"/>
+            <param name="threshold" value="5"/>
+            <output name="outfile" md5="019ce04559162b973e13a514a5c01595"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+============================================================
+FASTQ filterer (fastq_filter_n)
+============================================================
+
+Outputs a gzipped fastq file where reads with more than the
+maximum number of allowed uncalled bases are not included.
+
+Threshold is the maximum percentage (ranging from 0 to 100)
+of uncalled bases that a read can have. Default value is 0,
+which means that a read with a single N would be discarded.
+
+     ]]></help>
+    <citations>
+        <citation type="bibtex"><![CDATA[
+            @ARTICLE{Fonseca2017,
+            author = {Fonseca, N.},
+            title = {fastq_utils},
+            year = {2017},
+            publisher = {GitHub},
+            journal = {GitHub repository},
+            howpublished = {\url{https://github.com/nunofonseca/fastq_utils}},
+            commit = {c6cf3f954c5286e62fbe36bb9ffecd89d7823b07}
+            }]]></citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_test_data.sh	Tue Feb 15 16:38:56 2022 +0000
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+BASE_LINK="https://raw.githubusercontent.com/nunofonseca/fastq_utils/master/tests"
+
+BAR11_FILE="barcode_test_1.fastq.gz"
+BAR12_FILE="barcode_test_2.fastq.gz"
+BAR21_FILE="barcode_test2_1.fastq.gz"
+BAR22_FILE="barcode_test2_2.fastq.gz"
+INTER_FILE="inter.fastq.gz"
+A1_FILE="a_1.fastq.gz"
+POLYAT_FILE="poly_at.fastq.gz"
+POLYAT3_FILE="poly_at_len3.fastq.gz"
+TEST212_FILE="test_21_2.fastq.gz"
+
+BAR11_LINK=$BASE_LINK"/"$BAR11_FILE
+BAR12_LINK=$BASE_LINK"/"$BAR12_FILE
+BAR21_LINK=$BASE_LINK"/"$BAR21_FILE
+BAR22_LINK=$BASE_LINK"/"$BAR22_FILE
+INTER_LINK=$BASE_LINK"/"$INTER_FILE
+A1_LINK=$BASE_LINK"/"$A1_FILE
+POLYAT_LINK=$BASE_LINK"/"$POLYAT_FILE
+POLYAT3_LINK=$BASE_LINK"/"$POLYAT3_FILE
+TEST212_LINK=$BASE_LINK"/"$TEST212_FILE
+
+function get_data {
+  local link=$1
+  local fname=$2
+
+  if [ ! -f $fname ]; then
+    echo "$fname not available locally, downloading.."
+    wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link
+  fi
+}
+
+# Get test data
+pushd test-data
+
+get_data $BAR11_LINK $BAR11_FILE
+get_data $BAR12_LINK $BAR12_FILE
+get_data $BAR21_LINK $BAR21_FILE
+get_data $BAR22_LINK $BAR22_FILE
+get_data $INTER_LINK $INTER_FILE
+get_data $A1_LINK $A1_FILE
+get_data $POLYAT_LINK $POLYAT_FILE
+get_data $TEST212_LINK $TEST212_FILE
+get_data $POLYAT3_LINK $POLYAT3_FILE
+get_data $TEST212_LINK $TEST212_FILE
+get_data $POLYAT3_LINK $POLYAT3_FILE