annotate filterinfo/filterinfo.sh @ 1:ce6806e0539f draft

Uploaded
author jshay
date Thu, 15 Aug 2019 16:18:59 -0400
parents e40ccd3eab44
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
1 #!/bin/bash
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
2 # Julie Shay
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
3 # July 30, 2019
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
4 # Literally just runs wc on two fastq files, assumes paired reads so it divides wc by two
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
5 # to get total number of reads.
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
6 # Then outputs number of reads and proportion of first vs. second input
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
7
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
8
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
9 while getopts '1:2:o:ph' flag; do
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
10 case $flag in
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
11 1)
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
12 FULL=$OPTARG
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
13 ;;
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
14 2)
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
15 FILTERED=$OPTARG
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
16 ;;
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
17 o)
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
18 OUT=$OPTARG
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
19 ;;
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
20 p)
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
21 divideby=2
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
22 ;;
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
23 h)
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
24 h=1
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
25 ;;
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
26 esac
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
27 done
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
28
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
29 if [[ -n "$h" || ! -f $FULL || ! -f $FILTERED ]]; then
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
30 echo "Usage: $0 -1 full.fq -2 filtered.fq -o outfile [-p]"
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
31 echo "Where full.fq contains unfiltered sequences (either R1 or R2)"
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
32 echo "and filtered.fq filtered sequences."
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
33 echo "Use -p to specify that inputs are part of a set of paired reads"
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
34 echo "The script will just print the number of sequences (* 2 with -p option) and the proportion filtered/full"
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
35 else
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
36 if [ -z "$divideby" ]; then
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
37 divideby=4
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
38 fi
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
39 # print % reads passing filter to an outfile
1
ce6806e0539f Uploaded
jshay
parents: 0
diff changeset
40 if [ $(file $FILTERED | awk '{print $2}') == "gzip" ]; then
0
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
41 pass=$(gunzip -c $FILTERED | wc -l | awk '{print $1}')
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
42 else
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
43 pass=$(wc -l $FILTERED | awk '{print $1}')
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
44 fi
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
45 pass=$(expr $pass / $divideby)
1
ce6806e0539f Uploaded
jshay
parents: 0
diff changeset
46 if [ $(file $FULL | awk '{print $2}') == "gzip" ]; then
0
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
47 total=$(gunzip -c $FULL | wc -l | awk '{print $1}')
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
48 else
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
49 total=$(wc -l $FULL | awk '{print $1}')
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
50 fi
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
51 total=$(expr $total / $divideby)
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
52 prop=$(echo "scale=5; $pass/$total" | bc -l)
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
53 echo -e "Input Reads\tReads Passing Filter\tFraction Reads Passing Filter" > $OUT
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
54 echo -e ${total}"\t"${pass}"\t"$prop >> $OUT
e40ccd3eab44 Uploaded
jshay
parents:
diff changeset
55 fi