Mercurial > repos > jshay > filterinfo

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filterinfo/.shed.yml	Tue Aug 13 15:34:03 2019 -0400
@@ -0,0 +1,8 @@
+name: filterinfo
+owner: jashay90
+description: Report proportion of reads in one file vs. the other
+long_description: Takes two fastq files, counts the number of lines/sequences per file, and reports the fraction of reads in one file vs. the other. Can also double the read count for both files with the paired option.
+remote_repository_url: https://github.com/jashay90/galaxytools/tree/master/filterinfo
+type: unrestricted
+categories:
+  - Fastq Manipulation
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filterinfo/filterinfo.sh	Tue Aug 13 15:34:03 2019 -0400
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Julie Shay
+# July 30, 2019
+# Literally just runs wc on two fastq files, assumes paired reads so it divides wc by two
+# to get total number of reads.
+# Then outputs number of reads and proportion of first vs. second input
+
+
+while getopts '1:2:o:ph' flag; do
+	case $flag in
+		1)
+			FULL=$OPTARG
+			;;
+		2)
+			FILTERED=$OPTARG
+			;;
+		o)
+			OUT=$OPTARG
+			;;
+		p)
+			divideby=2
+			;;
+		h)
+			h=1
+			;;
+	esac
+done
+
+if [[ -n "$h" || ! -f $FULL || ! -f $FILTERED ]]; then
+	echo "Usage: $0 -1 full.fq -2 filtered.fq -o outfile [-p]"
+	echo "Where full.fq contains unfiltered sequences (either R1 or R2)"
+	echo "and filtered.fq filtered sequences."
+	echo "Use -p to specify that inputs are part of a set of paired reads"
+	echo "The script will just print the number of sequences (* 2 with -p option) and the proportion filtered/full"
+else
+	if [ -z "$divideby" ]; then
+		divideby=4
+	fi
+	# print % reads passing filter to an outfile
+	if [[ $FILTERED == *.gz ]]; then
+		pass=$(gunzip -c $FILTERED | wc -l | awk '{print $1}')
+	else
+		pass=$(wc -l $FILTERED | awk '{print $1}')
+	fi
+	pass=$(expr $pass / $divideby)
+	if [[ $FULL == *.gz ]]; then
+		total=$(gunzip -c $FULL | wc -l | awk '{print $1}')
+	else
+		total=$(wc -l $FULL | awk '{print $1}')
+	fi
+	total=$(expr $total / $divideby)
+	prop=$(echo "scale=5; $pass/$total" | bc -l)
+	echo -e "Input Reads\tReads Passing Filter\tFraction Reads Passing Filter" > $OUT
+	echo -e ${total}"\t"${pass}"\t"$prop >> $OUT
+fi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filterinfo/filterinfo.xml	Tue Aug 13 15:34:03 2019 -0400
@@ -0,0 +1,83 @@
+<tool id="filterinfo" name="Report proportion of reads in one file vs. the other" version="0.1.0">
+    <requirements>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+	'$__tool_directory__/filterinfo.sh' -1
+#if str( $orig_cond.orig_type ) == "one_of_pair"
+	'$orig_cond.orig'
+#else
+	'$orig_cond.orig.forward'
+#end if
+	-2
+#if str( $filtered_cond.filtered_type ) == "one_of_pair"
+	'$filtered_cond.filt'
+#else
+	'$filtered_cond.filt.forward'
+#end if
+#if $ispaired
+	'-p'
+#end if
+	-o '$out'
+    ]]></command>
+    <inputs>
+	    <conditional name="orig_cond">
+		<param name="orig_type" type="select" label="Original Input Type">
+		    <option value="one_of_pair">Fastq File</option>
+                    <option value="paired_collection">Paired Collection</option>
+		</param>
+		<when value="one_of_pair">
+                    <param name="orig" type="data" format="fastqsanger" label="Fastq sequences before filtering" help="Specify a single fastq file (either unpaired or half of a pair)"/>
+		</when>
+		<when value="paired_collection">
+		    <param name="orig" format="fastqsanger" type="data_collection" collection_type="paired" label="Fastq sequences before filtering" help="Specify paired dataset collection containing paired reads"/>
+		</when>
+            </conditional>
+	    <conditional name="filtered_cond">
+		<param name="filtered_type" type="select" label="Filtered Input Type">
+		    <option value="one_of_pair">Fastq File</option>
+                    <option value="paired_collection">Paired Collection</option>
+		</param>
+		<when value="one_of_pair">
+                    <param name="filt" type="data" format="fastqsanger" label="Fastq sequences before filtering" help="Specify a single fastq file (either unpaired or half of a pair)"/>
+		</when>
+		<when value="paired_collection">
+		    <param name="filt" format="fastqsanger" type="data_collection" collection_type="paired" label="Fastq sequences before filtering" help="Specify paired dataset collection containing paired reads"/>
+		</when>
+            </conditional>
+            <param name="ispaired" type="boolean" label="Do the input fastq files come from sets of paired reads?"/>
+    </inputs>
+    <outputs>
+	    <data name="out" format="tabular" />
+    </outputs>
+    <tests>
+	    <test>
+		    <param name="orig_type" value="paired_collection"/>
+		    <param name="orig">
+			<collection type="paired">
+			    <element name="forward" value="full.fq" />
+			    <element name="reverse" value="full2.fq" />
+			</collection>
+		    </param>
+		    <param name="filtered_type" value="one_of_pair"/>
+		    <param name="filt" value="filtered.fq"/>
+                    <param name="ispaired" value="true"/>
+		    <output name="out" file="prop.tsv"/>
+	    </test>
+    </tests>
+    <help><![CDATA[
+	Usage: ./filterinfo.sh -1 full.fq -2 filtered.fq -o out.tsv
+
+]]></help>
+    <citations>
+	     <citation type="bibtex">
+@misc{githubfilterinfo,
+  author = {LastTODO, FirstTODO},
+  year = {TODO},
+  title = {filterinfo},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/jashay90/galaxytools},
+}</citation>
+    </citations>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filterinfo/test-data/filtered.fq	Tue Aug 13 15:34:03 2019 -0400
@@ -0,0 +1,12 @@
+@totally
+A
++
+F
+@fastq
+C
++
+F
+@file
+G
++
+F
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filterinfo/test-data/full.fq	Tue Aug 13 15:34:03 2019 -0400
@@ -0,0 +1,16 @@
+@totally
+A
++
+F
+@real
+T
++
+F
+@fastq
+C
++
+F
+@file
+G
++
+F
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filterinfo/test-data/full2.fq	Tue Aug 13 15:34:03 2019 -0400
@@ -0,0 +1,16 @@
+@totally
+A
++
+F
+@real
+T
++
+F
+@fastq
+C
++
+F
+@file
+G
++
+F
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filterinfo/test-data/prop.tsv	Tue Aug 13 15:34:03 2019 -0400
@@ -0,0 +1,2 @@
+Input Reads	Reads Passing Filter	Fraction Reads Passing Filter
+8	6	.75000