# HG changeset patch # User jshay # Date 1565724843 14400 # Node ID e40ccd3eab4483e90a9207fce146c28fb9c0b3fa Uploaded diff -r 000000000000 -r e40ccd3eab44 filterinfo/.shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterinfo/.shed.yml Tue Aug 13 15:34:03 2019 -0400 @@ -0,0 +1,8 @@ +name: filterinfo +owner: jashay90 +description: Report proportion of reads in one file vs. the other +long_description: Takes two fastq files, counts the number of lines/sequences per file, and reports the fraction of reads in one file vs. the other. Can also double the read count for both files with the paired option. +remote_repository_url: https://github.com/jashay90/galaxytools/tree/master/filterinfo +type: unrestricted +categories: + - Fastq Manipulation diff -r 000000000000 -r e40ccd3eab44 filterinfo/filterinfo.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterinfo/filterinfo.sh Tue Aug 13 15:34:03 2019 -0400 @@ -0,0 +1,55 @@ +#!/bin/bash +# Julie Shay +# July 30, 2019 +# Literally just runs wc on two fastq files, assumes paired reads so it divides wc by two +# to get total number of reads. +# Then outputs number of reads and proportion of first vs. second input + + +while getopts '1:2:o:ph' flag; do + case $flag in + 1) + FULL=$OPTARG + ;; + 2) + FILTERED=$OPTARG + ;; + o) + OUT=$OPTARG + ;; + p) + divideby=2 + ;; + h) + h=1 + ;; + esac +done + +if [[ -n "$h" || ! -f $FULL || ! -f $FILTERED ]]; then + echo "Usage: $0 -1 full.fq -2 filtered.fq -o outfile [-p]" + echo "Where full.fq contains unfiltered sequences (either R1 or R2)" + echo "and filtered.fq filtered sequences." + echo "Use -p to specify that inputs are part of a set of paired reads" + echo "The script will just print the number of sequences (* 2 with -p option) and the proportion filtered/full" +else + if [ -z "$divideby" ]; then + divideby=4 + fi + # print % reads passing filter to an outfile + if [[ $FILTERED == *.gz ]]; then + pass=$(gunzip -c $FILTERED | wc -l | awk '{print $1}') + else + pass=$(wc -l $FILTERED | awk '{print $1}') + fi + pass=$(expr $pass / $divideby) + if [[ $FULL == *.gz ]]; then + total=$(gunzip -c $FULL | wc -l | awk '{print $1}') + else + total=$(wc -l $FULL | awk '{print $1}') + fi + total=$(expr $total / $divideby) + prop=$(echo "scale=5; $pass/$total" | bc -l) + echo -e "Input Reads\tReads Passing Filter\tFraction Reads Passing Filter" > $OUT + echo -e ${total}"\t"${pass}"\t"$prop >> $OUT +fi diff -r 000000000000 -r e40ccd3eab44 filterinfo/filterinfo.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterinfo/filterinfo.xml Tue Aug 13 15:34:03 2019 -0400 @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +@misc{githubfilterinfo, + author = {LastTODO, FirstTODO}, + year = {TODO}, + title = {filterinfo}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/jashay90/galaxytools}, +} + + + diff -r 000000000000 -r e40ccd3eab44 filterinfo/test-data/filtered.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterinfo/test-data/filtered.fq Tue Aug 13 15:34:03 2019 -0400 @@ -0,0 +1,12 @@ +@totally +A ++ +F +@fastq +C ++ +F +@file +G ++ +F diff -r 000000000000 -r e40ccd3eab44 filterinfo/test-data/full.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterinfo/test-data/full.fq Tue Aug 13 15:34:03 2019 -0400 @@ -0,0 +1,16 @@ +@totally +A ++ +F +@real +T ++ +F +@fastq +C ++ +F +@file +G ++ +F diff -r 000000000000 -r e40ccd3eab44 filterinfo/test-data/full2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterinfo/test-data/full2.fq Tue Aug 13 15:34:03 2019 -0400 @@ -0,0 +1,16 @@ +@totally +A ++ +F +@real +T ++ +F +@fastq +C ++ +F +@file +G ++ +F diff -r 000000000000 -r e40ccd3eab44 filterinfo/test-data/prop.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterinfo/test-data/prop.tsv Tue Aug 13 15:34:03 2019 -0400 @@ -0,0 +1,2 @@ +Input Reads Reads Passing Filter Fraction Reads Passing Filter +8 6 .75000