0
|
1 #!/bin/bash
|
|
2 # Julie Shay
|
|
3 # July 30, 2019
|
|
4 # Literally just runs wc on two fastq files, assumes paired reads so it divides wc by two
|
|
5 # to get total number of reads.
|
|
6 # Then outputs number of reads and proportion of first vs. second input
|
|
7
|
|
8
|
|
9 while getopts '1:2:o:ph' flag; do
|
|
10 case $flag in
|
|
11 1)
|
|
12 FULL=$OPTARG
|
|
13 ;;
|
|
14 2)
|
|
15 FILTERED=$OPTARG
|
|
16 ;;
|
|
17 o)
|
|
18 OUT=$OPTARG
|
|
19 ;;
|
|
20 p)
|
|
21 divideby=2
|
|
22 ;;
|
|
23 h)
|
|
24 h=1
|
|
25 ;;
|
|
26 esac
|
|
27 done
|
|
28
|
|
29 if [[ -n "$h" || ! -f $FULL || ! -f $FILTERED ]]; then
|
|
30 echo "Usage: $0 -1 full.fq -2 filtered.fq -o outfile [-p]"
|
|
31 echo "Where full.fq contains unfiltered sequences (either R1 or R2)"
|
|
32 echo "and filtered.fq filtered sequences."
|
|
33 echo "Use -p to specify that inputs are part of a set of paired reads"
|
|
34 echo "The script will just print the number of sequences (* 2 with -p option) and the proportion filtered/full"
|
|
35 else
|
|
36 if [ -z "$divideby" ]; then
|
|
37 divideby=4
|
|
38 fi
|
|
39 # print % reads passing filter to an outfile
|
1
|
40 if [ $(file $FILTERED | awk '{print $2}') == "gzip" ]; then
|
0
|
41 pass=$(gunzip -c $FILTERED | wc -l | awk '{print $1}')
|
|
42 else
|
|
43 pass=$(wc -l $FILTERED | awk '{print $1}')
|
|
44 fi
|
|
45 pass=$(expr $pass / $divideby)
|
1
|
46 if [ $(file $FULL | awk '{print $2}') == "gzip" ]; then
|
0
|
47 total=$(gunzip -c $FULL | wc -l | awk '{print $1}')
|
|
48 else
|
|
49 total=$(wc -l $FULL | awk '{print $1}')
|
|
50 fi
|
|
51 total=$(expr $total / $divideby)
|
|
52 prop=$(echo "scale=5; $pass/$total" | bc -l)
|
|
53 echo -e "Input Reads\tReads Passing Filter\tFraction Reads Passing Filter" > $OUT
|
|
54 echo -e ${total}"\t"${pass}"\t"$prop >> $OUT
|
|
55 fi
|