Mercurial > repos > jshay > filterinfo
changeset 0:e40ccd3eab44 draft
Uploaded
author | jshay |
---|---|
date | Tue, 13 Aug 2019 15:34:03 -0400 |
parents | |
children | ce6806e0539f |
files | filterinfo/.shed.yml filterinfo/filterinfo.sh filterinfo/filterinfo.xml filterinfo/test-data/filtered.fq filterinfo/test-data/full.fq filterinfo/test-data/full2.fq filterinfo/test-data/prop.tsv |
diffstat | 7 files changed, 192 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterinfo/.shed.yml Tue Aug 13 15:34:03 2019 -0400 @@ -0,0 +1,8 @@ +name: filterinfo +owner: jashay90 +description: Report proportion of reads in one file vs. the other +long_description: Takes two fastq files, counts the number of lines/sequences per file, and reports the fraction of reads in one file vs. the other. Can also double the read count for both files with the paired option. +remote_repository_url: https://github.com/jashay90/galaxytools/tree/master/filterinfo +type: unrestricted +categories: + - Fastq Manipulation
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterinfo/filterinfo.sh Tue Aug 13 15:34:03 2019 -0400 @@ -0,0 +1,55 @@ +#!/bin/bash +# Julie Shay +# July 30, 2019 +# Literally just runs wc on two fastq files, assumes paired reads so it divides wc by two +# to get total number of reads. +# Then outputs number of reads and proportion of first vs. second input + + +while getopts '1:2:o:ph' flag; do + case $flag in + 1) + FULL=$OPTARG + ;; + 2) + FILTERED=$OPTARG + ;; + o) + OUT=$OPTARG + ;; + p) + divideby=2 + ;; + h) + h=1 + ;; + esac +done + +if [[ -n "$h" || ! -f $FULL || ! -f $FILTERED ]]; then + echo "Usage: $0 -1 full.fq -2 filtered.fq -o outfile [-p]" + echo "Where full.fq contains unfiltered sequences (either R1 or R2)" + echo "and filtered.fq filtered sequences." + echo "Use -p to specify that inputs are part of a set of paired reads" + echo "The script will just print the number of sequences (* 2 with -p option) and the proportion filtered/full" +else + if [ -z "$divideby" ]; then + divideby=4 + fi + # print % reads passing filter to an outfile + if [[ $FILTERED == *.gz ]]; then + pass=$(gunzip -c $FILTERED | wc -l | awk '{print $1}') + else + pass=$(wc -l $FILTERED | awk '{print $1}') + fi + pass=$(expr $pass / $divideby) + if [[ $FULL == *.gz ]]; then + total=$(gunzip -c $FULL | wc -l | awk '{print $1}') + else + total=$(wc -l $FULL | awk '{print $1}') + fi + total=$(expr $total / $divideby) + prop=$(echo "scale=5; $pass/$total" | bc -l) + echo -e "Input Reads\tReads Passing Filter\tFraction Reads Passing Filter" > $OUT + echo -e ${total}"\t"${pass}"\t"$prop >> $OUT +fi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterinfo/filterinfo.xml Tue Aug 13 15:34:03 2019 -0400 @@ -0,0 +1,83 @@ +<tool id="filterinfo" name="Report proportion of reads in one file vs. the other" version="0.1.0"> + <requirements> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + '$__tool_directory__/filterinfo.sh' -1 +#if str( $orig_cond.orig_type ) == "one_of_pair" + '$orig_cond.orig' +#else + '$orig_cond.orig.forward' +#end if + -2 +#if str( $filtered_cond.filtered_type ) == "one_of_pair" + '$filtered_cond.filt' +#else + '$filtered_cond.filt.forward' +#end if +#if $ispaired + '-p' +#end if + -o '$out' + ]]></command> + <inputs> + <conditional name="orig_cond"> + <param name="orig_type" type="select" label="Original Input Type"> + <option value="one_of_pair">Fastq File</option> + <option value="paired_collection">Paired Collection</option> + </param> + <when value="one_of_pair"> + <param name="orig" type="data" format="fastqsanger" label="Fastq sequences before filtering" help="Specify a single fastq file (either unpaired or half of a pair)"/> + </when> + <when value="paired_collection"> + <param name="orig" format="fastqsanger" type="data_collection" collection_type="paired" label="Fastq sequences before filtering" help="Specify paired dataset collection containing paired reads"/> + </when> + </conditional> + <conditional name="filtered_cond"> + <param name="filtered_type" type="select" label="Filtered Input Type"> + <option value="one_of_pair">Fastq File</option> + <option value="paired_collection">Paired Collection</option> + </param> + <when value="one_of_pair"> + <param name="filt" type="data" format="fastqsanger" label="Fastq sequences before filtering" help="Specify a single fastq file (either unpaired or half of a pair)"/> + </when> + <when value="paired_collection"> + <param name="filt" format="fastqsanger" type="data_collection" collection_type="paired" label="Fastq sequences before filtering" help="Specify paired dataset collection containing paired reads"/> + </when> + </conditional> + <param name="ispaired" type="boolean" label="Do the input fastq files come from sets of paired reads?"/> + </inputs> + <outputs> + <data name="out" format="tabular" /> + </outputs> + <tests> + <test> + <param name="orig_type" value="paired_collection"/> + <param name="orig"> + <collection type="paired"> + <element name="forward" value="full.fq" /> + <element name="reverse" value="full2.fq" /> + </collection> + </param> + <param name="filtered_type" value="one_of_pair"/> + <param name="filt" value="filtered.fq"/> + <param name="ispaired" value="true"/> + <output name="out" file="prop.tsv"/> + </test> + </tests> + <help><![CDATA[ + Usage: ./filterinfo.sh -1 full.fq -2 filtered.fq -o out.tsv + +]]></help> + <citations> + <citation type="bibtex"> +@misc{githubfilterinfo, + author = {LastTODO, FirstTODO}, + year = {TODO}, + title = {filterinfo}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/jashay90/galaxytools}, +}</citation> + </citations> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterinfo/test-data/filtered.fq Tue Aug 13 15:34:03 2019 -0400 @@ -0,0 +1,12 @@ +@totally +A ++ +F +@fastq +C ++ +F +@file +G ++ +F
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterinfo/test-data/full.fq Tue Aug 13 15:34:03 2019 -0400 @@ -0,0 +1,16 @@ +@totally +A ++ +F +@real +T ++ +F +@fastq +C ++ +F +@file +G ++ +F