0
|
1 #/bin/bash
|
|
2 # rDiff wrapper script to start the interpreter with the correct list of arguments
|
|
3
|
|
4 set -e
|
|
5
|
|
6
|
|
7 function usage () {
|
|
8 echo "
|
|
9 Usage: rdiff [-OPTION VALUE]
|
|
10
|
|
11 Options:
|
|
12
|
|
13 -h Display this help
|
|
14 -o The output directory for the results
|
|
15 -d Directory where the bam-files are
|
|
16 -a Comma separated list of bam-files for sample 1 (No spaces between the files: File1.bam,File2.bam,...)
|
|
17 -b Comma separated list of bam-files for sample 2 (No spaces between the files: File1.bam,File2.bam,...)
|
|
18 -g Path to GFF3 gene structure
|
|
19 -L Read length used for rDiff.parametric (Default: 75)
|
|
20 -m Method to use for testing: param for rDiff.parametric (Default)
|
|
21 nonparam for rDiff.nonparametric
|
|
22 poisson for rDiff.poisson
|
|
23 mmd for rDiff.mmd
|
|
24
|
|
25 Advanced options:
|
|
26
|
|
27 -M Minimal read length required (Default: 30)
|
|
28 -e Estimate the gene expression and counts in alternative regions (Yes (Default):1, No: 0)
|
|
29 -E Only estimate the gene expression do not perform testing (Yes:1, No (Default): 0)
|
|
30 -A Path to variance function for sample 1 (Experimental)
|
|
31 -B Path to variance function for sample 2 (Experimental)
|
|
32 -S Filename under which variance function for sample 1 will be saved (Experimental)
|
|
33 -T Filename under which variance function for sample 2 will be saved (Experimental)
|
|
34 -P Parameters of variance function for sample 1 of the form a+b*x+b*x^2 (Given as: a,b,c) (Experimental)
|
|
35 -Q Parameters of variance function for sample 2 of the form a+b*x+b*x^2 (Given as: a,b,c) (Experimental)
|
|
36 -y Use only the gene start and stop for the rDiff.nonparametric variance function estimation (Yes:1, No (Default): 0) (Experimental)
|
|
37 -s Number of reads per gene to which to downsample (Default: 10000)
|
|
38 -C Number of bases to clip from each end of each read (Default: 3)
|
|
39 -p Number of permutations performed for rDiff.nonparametric (Default: 1000)
|
|
40 -x Merge sample 1 and sample 2 for variance function estimation (Yes:1, No (Default): 0)
|
|
41
|
|
42 "
|
|
43 exit 0
|
|
44 }
|
|
45
|
|
46 CURR_DIR=`dirname $0`
|
|
47 . $CURR_DIR/rdiff_config.sh
|
|
48
|
|
49 PROG=`basename $0`
|
|
50 DIR=`dirname $0`
|
|
51
|
|
52
|
|
53 #check if arguments are given
|
|
54 [[ ! $1 ]] && { usage; }
|
|
55
|
|
56
|
|
57 #Initializing defaults
|
|
58 RDIFF_RES_DIR="."
|
|
59 RDIFF_INPUT_DIR="."
|
|
60 BAM_INPUT1=""
|
|
61 BAM_INPUT2=""
|
|
62 GFF_INPUT=""
|
|
63 READ_LENGTH="75"
|
|
64 MIN_READ_LENGTH="30"
|
|
65 EST_GENE_EXPR="1"
|
|
66 ONLY_GENE_EXPR="0"
|
|
67 VAR_PATH1=""
|
|
68 VAR_PATH2=""
|
|
69 SAVE_VAR1=""
|
|
70 SAVE_VAR2=""
|
|
71 PRED_VAR1=""
|
|
72 PRED_VAR2=""
|
|
73 ONLY_GENE_START="0"
|
|
74 SUBSAMPLE="10000"
|
|
75 CLIP="3"
|
|
76 BOOTSTRAP="1000"
|
|
77 TEST_METH_NAME="param"
|
|
78
|
|
79 #Parsing command line arguments
|
|
80 while getopts ":d:o:a:b:g:L:m:e:E:A:B:S:T:P:Q:s:C:p:m:hx:y:" opt; do
|
|
81 case $opt in
|
|
82 o ) RDIFF_RES_DIR="$OPTARG" ;;
|
|
83 d ) RDIFF_INPUT_DIR="$OPTARG";;
|
|
84 a ) BAM_INPUT1="$OPTARG";;
|
|
85 b ) BAM_INPUT2="$OPTARG";;
|
|
86 g ) GFF_INPUT="$OPTARG";;
|
|
87 L ) READ_LENGTH="$OPTARG";;
|
|
88 M ) MIN_READ_LENGTH="$OPTARG";;
|
|
89 e ) EST_GENE_EXPR="$OPTARG";;
|
|
90 E ) ONLY_GENE_EXPR="$OPTARG";;
|
|
91 A ) VAR_PATH1="$OPTARG";;
|
|
92 B ) VAR_PATH2="$OPTARG";;
|
|
93 S ) SAVE_VAR1="$OPTARG";;
|
|
94 T ) SAVE_VAR2="$OPTARG";;
|
|
95 P ) PRED_VAR1="$OPTARG";;
|
|
96 Q ) PRED_VAR2="$OPTARG";;
|
|
97 y ) ONLY_GENE_START="$OPTARG";;
|
|
98 s ) SUBSAMPLE="$OPTARG";;
|
|
99 C ) CLIP="$OPTARG";;
|
|
100 p ) BOOTSTRAP="$OPTARG";;
|
|
101 m ) TEST_METH_NAME="$OPTARG";;
|
|
102 x ) MERGE_SAMPLE="$OPTARG";;
|
|
103 h ) usage ;exit ;;
|
|
104 \?) echo "Unkown parameter: $OPTARG"
|
|
105 usage ;;
|
|
106
|
|
107 esac
|
|
108 done
|
|
109
|
|
110 #Perform baic checks of variables
|
|
111
|
|
112 errorFlag=true #No errors ecountered
|
|
113
|
|
114 if [ -z "$BAM_INPUT1" ]; then
|
|
115 echo "Please provide bamfiles for sample 1" >&2
|
|
116 errorFlag=false
|
|
117 fi
|
|
118 if [ -z "$BAM_INPUT2" ]; then
|
|
119 echo "Please provide bamfiles for sample 2" >&2
|
|
120 errorFlag=false
|
|
121 fi
|
|
122 if [ -z "$GFF_INPUT" ]; then
|
|
123 echo "Please provide genome annotation file in GFF" >&2
|
|
124 errorFlag=false
|
|
125 fi
|
|
126
|
|
127 if [ ! $errorFlag ]
|
|
128 then
|
|
129 echo "Error" >&2
|
|
130 exit 1
|
|
131 fi
|
|
132
|
|
133
|
|
134 #Generate parameter vector
|
|
135 PARAM_VECT="RDIFF_RES_DIR:$RDIFF_RES_DIR;"
|
|
136 PARAM_VECT="$PARAM_VECT""RDIFF_INPUT_DIR:$RDIFF_INPUT_DIR;"
|
|
137 PARAM_VECT="$PARAM_VECT""BAM_INPUT1:$BAM_INPUT1;"
|
|
138 PARAM_VECT="$PARAM_VECT""BAM_INPUT2:$BAM_INPUT2;"
|
|
139 PARAM_VECT="$PARAM_VECT""READ_LENGTH:$READ_LENGTH;"
|
|
140 PARAM_VECT="$PARAM_VECT""MIN_READ_LENGTH:$MIN_READ_LENGTH;"
|
|
141 PARAM_VECT="$PARAM_VECT""EST_GENE_EXPR:$EST_GENE_EXPR;"
|
|
142 PARAM_VECT="$PARAM_VECT""ONLY_GENE_EXPR:$ONLY_GENE_EXPR;"
|
|
143 PARAM_VECT="$PARAM_VECT""VAR_PATH1:$VAR_PATH1;"
|
|
144 PARAM_VECT="$PARAM_VECT""VAR_PATH2:$VAR_PATH2;"
|
|
145 PARAM_VECT="$PARAM_VECT""SAVE_VAR1:$SAVE_VAR1;"
|
|
146 PARAM_VECT="$PARAM_VECT""SAVE_VAR2:$SAVE_VAR2;"
|
|
147 PARAM_VECT="$PARAM_VECT""PRED_VAR1:$PRED_VAR1;"
|
|
148 PARAM_VECT="$PARAM_VECT""PRED_VAR2:$PRED_VAR2;"
|
|
149 PARAM_VECT="$PARAM_VECT""ONLY_GENE_START:$ONLY_GENE_START;"
|
|
150 PARAM_VECT="$PARAM_VECT""SUBSAMPLE:$SUBSAMPLE;"
|
|
151 PARAM_VECT="$PARAM_VECT""CLIP:$CLIP;"
|
|
152 PARAM_VECT="$PARAM_VECT""BOOTSTRAP:$BOOTSTRAP;"
|
|
153 PARAM_VECT="$PARAM_VECT""TEST_METH_NAME:$TEST_METH_NAME;"
|
|
154 PARAM_VECT="$PARAM_VECT""MERGE_SAMPLE:$MERGE_SAMPLE;"
|
|
155
|
|
156
|
|
157
|
|
158 echo %%%%%%%%%%%%%%%%%%%%%%%
|
|
159 echo % 1. Data preparation %
|
|
160 echo %%%%%%%%%%%%%%%%%%%%%%%
|
|
161 echo
|
|
162
|
|
163 #Check wether results directory exists
|
|
164 #if [ -d $RDIFF_RES_DIR ]
|
|
165 #then
|
|
166 # echo "Writting results to: $RDIFF_RES_DIR"
|
|
167 #else
|
|
168 # mkdir $RDIFF_RES_DIR
|
|
169 #fi
|
|
170
|
|
171
|
|
172 echo 1a. load the genome annotation in GFF3 format, create an annotation object #\[Log file in ${RDIFF_RES_DIR}}/elegans-gff2anno.log\]
|
|
173 export PYTHONPATH=$PYTHONPATH:$RDIFF_PYTHON_PATH:${SCIPY_PATH}
|
|
174 ${RDIFF_PYTHON_PATH} -W ignore::RuntimeWarning ${DIR}/../tools/ParseGFF.py ${GFF_INPUT} ${RDIFF_RES_DIR}/genes.mat #> ${RDIFF_RES_DIR}}/elegans-gff2anno.log
|
|
175 ${DIR}/../bin/genes_cell2struct ${RDIFF_RES_DIR}/genes.mat
|
|
176
|
|
177
|
|
178 PARAM_VECT="$PARAM_VECT""GFF_INPUT:${RDIFF_RES_DIR}/genes.mat;"
|
|
179
|
|
180 echo %%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
181 echo % 2. Differential testing %
|
|
182 echo %%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
183 echo
|
|
184
|
|
185 exec ${DIR}/start_interpreter.sh ${PROG} "$PARAM_VECT"
|
|
186
|