comparison wrapper.sh @ 0:c33d93683a09 draft

Uploaded
author davidvanzessen
date Thu, 13 Oct 2016 10:52:24 -0400
parents
children faae21ba5c63
comparison
equal deleted inserted replaced
-1:000000000000 0:c33d93683a09
1 #!/bin/bash
2 #set -e
3 dir="$(cd "$(dirname "$0")" && pwd)"
4 input=$1
5 method=$2
6 log=$3 #becomes the main html page at the end
7 outdir=$4
8 output="$outdir/index.html" #copied to $log location at the end
9 title=$5
10 include_fr1=$6
11 functionality=$7
12 unique=$8
13 naive_output_ca=$9
14 naive_output_cg=${10}
15 naive_output_cm=${11}
16 filter_unique=${12}
17 class_filter=${13}
18 empty_region_filter=${14}
19 mkdir $outdir
20
21 tar -xzf $dir/style.tar.gz -C $outdir
22
23 echo "---------------- read parameters ----------------"
24 echo "---------------- read parameters ----------------<br />" > $log
25
26 echo "unpacking IMGT file"
27
28 type="`file $input`"
29 if [[ "$type" == *"Zip archive"* ]] ; then
30 echo "Zip archive"
31 echo "unzip $input -d $PWD/files/"
32 unzip $input -d $PWD/files/
33 elif [[ "$type" == *"XZ compressed data"* ]] ; then
34 echo "ZX archive"
35 echo "tar -xJf $input -C $PWD/files/"
36 mkdir -p $PWD/files/$title
37 tar -xJf $input -C $PWD/files/$title
38 fi
39
40 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
41 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
42 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
43 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
44 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
45 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
46 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
47
48 if [[ ${#BLASTN_DIR} -ge 5 ]] ; then
49 echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}"
50 else
51 BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin"
52 echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}"
53 fi
54
55 echo "---------------- class identification ----------------"
56 echo "---------------- class identification ----------------<br />" >> $log
57
58 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
59
60 echo "---------------- merge_and_filter.r ----------------"
61 echo "---------------- merge_and_filter.r ----------------<br />" >> $log
62
63 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
64
65 echo "---------------- creating new IMGT zips ----------------"
66 echo "---------------- creating new IMGT zips ----------------<br />" >> $log
67
68 mkdir $outdir/new_IMGT
69
70 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
71 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
72 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
73 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
74 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
75 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
76 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
77 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
78 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
79 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
80
81 mkdir $outdir/new_IMGT_IGA
82 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA
83
84 mkdir $outdir/new_IMGT_IGA1
85 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1
86
87 mkdir $outdir/new_IMGT_IGA2
88 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2
89
90 mkdir $outdir/new_IMGT_IGG
91 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG
92
93 mkdir $outdir/new_IMGT_IGG1
94 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1
95
96 mkdir $outdir/new_IMGT_IGG2
97 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2
98
99 mkdir $outdir/new_IMGT_IGG3
100 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3
101
102 mkdir $outdir/new_IMGT_IGG4
103 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4
104
105 mkdir $outdir/new_IMGT_IGM
106 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM
107
108 Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
109
110 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1
111 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1
112 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1
113
114 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1
115 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1
116 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1
117 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1
118 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1
119
120 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1
121
122
123 tmp="$PWD"
124 cd $outdir/new_IMGT/ #tar weirdness...
125 tar -cJf ../new_IMGT.txz *
126
127 cd $outdir/new_IMGT_IGA/
128 tar -cJf ../new_IMGT_IGA.txz *
129
130 cd $outdir/new_IMGT_IGA1/
131 tar -cJf ../new_IMGT_IGA1.txz *
132
133 cd $outdir/new_IMGT_IGA2/
134 tar -cJf ../new_IMGT_IGA2.txz *
135
136 cd $outdir/new_IMGT_IGG/
137 tar -cJf ../new_IMGT_IGG.txz *
138
139 cd $outdir/new_IMGT_IGG1/
140 tar -cJf ../new_IMGT_IGG1.txz *
141
142 cd $outdir/new_IMGT_IGG2/
143 tar -cJf ../new_IMGT_IGG2.txz *
144
145 cd $outdir/new_IMGT_IGG3/
146 tar -cJf ../new_IMGT_IGG3.txz *
147
148 cd $outdir/new_IMGT_IGG4/
149 tar -cJf ../new_IMGT_IGG4.txz *
150
151 cd $outdir/new_IMGT_IGM/
152 tar -cJf ../new_IMGT_IGM.txz *
153
154 cd $tmp
155
156 echo "---------------- shm_csr.r ----------------"
157 echo "---------------- shm_csr.r ----------------<br />" >> $log
158
159 classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,unmatched"
160 echo "R mutation analysis"
161 Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
162
163
164 echo "---------------- shm_csr.py ----------------"
165 echo "---------------- shm_csr.py ----------------<br />" >> $log
166
167 python $dir/shm_csr.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
168
169 echo "---------------- aa_histogram.r ----------------"
170 echo "---------------- aa_histogram.r ----------------<br />" >> $log
171
172 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM" $outdir/ 2>&1
173 if [ -e "$outdir/aa_histogram_.png" ]; then
174 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
175 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
176 fi
177
178 genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM)
179
180 funcs=(sum mean median)
181 funcs=(sum)
182
183 echo "---------------- sequence_overview.r ----------------"
184 echo "---------------- sequence_overview.r ----------------<br />" >> $log
185
186 mkdir $outdir/sequence_overview
187
188 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
189
190 echo "<table border='1'>" > $outdir/base_overview.html
191
192 while IFS=$'\t' read ID class seq A C G T
193 do
194 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
195 done < $outdir/sequence_overview/ntoverview.txt
196
197 echo "<html><center><h1>$title</h1></center>" > $output
198 echo "<meta name='viewport' content='width=device-width, initial-scale=1'>" >> $output
199 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
200 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
201 echo "<script type='text/javascript' src='script.js'></script>" >> $output
202 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
203 echo "<link rel='stylesheet' type='text/css' href='pure-min.css'>" >> $output
204
205 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
206 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
207 total_count=$((matched_count + unmatched_count))
208 perc_count=$((unmatched_count / total_count * 100))
209 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
210 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
211
212 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
213 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
214 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
215
216 echo "---------------- main tables ----------------"
217 echo "---------------- main tables ----------------<br />" >> $log
218
219 echo "<div class='tabber'>" >> $output
220 echo "<div class='tabbertab' title='SHM Overview'>" >> $output
221
222 for func in ${funcs[@]}
223 do
224
225 echo "---------------- $func table ----------------"
226 echo "---------------- $func table ----------------<br />" >> $log
227
228 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
229
230 echo "---------------- pattern_plots.r ----------------"
231 echo "---------------- pattern_plots.r ----------------<br />" >> $log
232
233 Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/plot1 $outdir/plot2 $outdir/plot3 2>&1
234
235 echo "<table class='pure-table pure-table-striped'>" >> $output
236 echo "<thead><tr><th>info</th>" >> $output
237
238 if [ "${class_filter}" != "101_101" ] ; then
239
240 for gene in ${genes[@]}
241 do
242 tmp=`cat $outdir/${gene}_${func}_n.txt`
243 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
244 done
245
246 tmp=`cat $outdir/all_${func}_n.txt`
247 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
248 tmp=`cat $outdir/unmatched_${func}_n.txt`
249 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output
250
251 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
252 do
253 if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh
254 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td><td>${unx}/${uny} (${unz})</td></tr>" >> $output
255 elif [ "$name" == "Median of Number of Mutations (%)" ] ; then
256 echo "<tr><td>$name</td><td>${caz}%</td><td>${ca1z}%</td><td>${ca2z}%</td><td>${cgz}%</td><td>${cg1z}%</td><td>${cg2z}%</td><td>${cg3z}%</td><td>${cg4z}%</td><td>${cmz}%</td><td>${allz}%</td><td>${unz}%</td></tr>" >> $output
257 else
258 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
259 fi
260 done < $outdir/data_${func}.txt
261
262 else
263 tmp=`cat $outdir/unmatched_${func}_n.txt`
264 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
265
266 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
267 do
268 if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh
269 echo "<tr><td>$name</td><td>${unx}/${uny}</td></tr>" >> $output
270 elif [ "$name" == "Median of Number of Mutations (%)" ] ; then
271 echo "<tr><td>$name</td><td>${unz}%</td></tr>" >> $output
272 else
273 echo "<tr><td>$name</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
274 fi
275 done < $outdir/data_${func}.txt
276
277 fi
278 echo "</table>" >> $output
279 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
280 done
281
282 echo "<img src='plot1.png' /><br />" >> $output
283 echo "<img src='plot2.png' /><br />" >> $output
284 echo "<img src='plot3.png' /><br />" >> $output
285
286 echo "</div>" >> $output #SHM overview tab end
287
288 echo "---------------- images ----------------"
289 echo "---------------- images ----------------<br />" >> $log
290
291 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output
292
293 if [ -a $outdir/scatter.png ]
294 then
295 echo "<img src='scatter.png'/><br />" >> $output
296 fi
297 if [ -a $outdir/frequency_ranges.png ]
298 then
299 echo "<img src='frequency_ranges.png'/><br />" >> $output
300 fi
301
302 echo "</div>" >> $output #SHM frequency tab end
303
304 echo "<div class='tabbertab' title='Transition tables'>" >> $output
305
306 echo "<table border='0'>" >> $output
307
308 for gene in ${genes[@]}
309 do
310 echo "<tr>" >> $output
311 echo "<td><h1>${gene}</h1></td>" >> $output
312 echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output
313 echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output
314 echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output
315 echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output
316 first="true"
317 while IFS=, read from a c g t
318 do
319 if [ "$first" == "true" ] ; then
320 echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
321 first="false"
322 else
323 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
324 fi
325 done < $outdir/transitions_${gene}_sum.txt
326 echo "</table></td>" >> $output
327
328 echo "</tr>" >> $output
329 done
330
331 echo "<tr>" >> $output
332 echo "<td><h1>All</h1></td>" >> $output
333 echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output
334 echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output
335 echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output
336 echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output
337 first="true"
338 while IFS=, read from a c g t
339 do
340 if [ "$first" == "true" ] ; then
341 echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
342 first="false"
343 else
344 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
345 fi
346 done < $outdir/transitions_all_sum.txt
347 echo "</table></td>" >> $output
348
349 echo "</tr>" >> $output
350
351 echo "</table>" >> $output
352
353 echo "</div>" >> $output #transition tables tab end
354
355 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
356
357 if [ -a $outdir/aa_histogram.png ]
358 then
359 echo "<img src='aa_histogram.png'/><br />" >> $output
360 echo "<img src='aa_histogram_IGA.png'/><br />" >> $output
361 echo "<img src='aa_histogram_IGG.png'/><br />" >> $output
362 echo "<img src='aa_histogram_IGM.png'/><br />" >> $output
363 fi
364
365 echo "<embed src='baseline.pdf' width='700px' height='1000px'>" >> $output
366 echo "<embed src='baseline_IGA.pdf' width='700px' height='1000px'>" >> $output
367 echo "<embed src='baseline_IGG.pdf' width='700px' height='1000px'>" >> $output
368 echo "<embed src='baseline_IGM.pdf' width='700px' height='1000px'>" >> $output
369
370 echo "</div>" >> $output #antigen selection tab end
371
372 echo "<div class='tabbertab' title='CSR'>" >> $output #CSR tab
373
374 if [ -a $outdir/IGA.png ]
375 then
376 echo "<img src='IGA.png'/><br />" >> $output
377 fi
378 if [ -a $outdir/IGG.png ]
379 then
380 echo "<img src='IGG.png'/><br />" >> $output
381 fi
382
383 echo "</div>" >> $output #CSR tab end
384
385 echo "---------------- change-o MakeDB ----------------"
386
387 mkdir $outdir/change_o
388
389 tmp="$PWD"
390
391 cd $outdir/change_o
392
393 bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt
394 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt
395
396 Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1
397
398 echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1"
399
400 if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then
401 bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt
402 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt
403 else
404 echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt"
405 echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt"
406 fi
407
408 if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then
409 bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt
410 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt
411 else
412 echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt"
413 echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt"
414 fi
415
416 if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then
417 bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt
418 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt
419 else
420 echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt"
421 echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt"
422 fi
423
424 PWD="$tmp"
425
426 echo "<div class='tabbertab' title='Clonality'>" >> $output #clonality tab
427
428 function clonality_table {
429 local infile=$1
430 local outfile=$2
431
432 echo "<table class='pure-table pure-table-striped'>" >> $outfile
433 echo "<thead><tr><th>Clone size</th><th>Nr of clones</th><th>Nr of sequences</th></tr></thead>" >> $outfile
434
435 first='true'
436
437 while read size clones seqs
438 do
439 if [[ "$first" == "true" ]]; then
440 first="false"
441 continue
442 fi
443 echo "<tr><td>$size</td><td>$clones</td><td>$seqs</td></tr>" >> $outfile
444 done < $infile
445
446 echo "</table>" >> $outfile
447 }
448 echo "<div class='tabber'>" >> $output
449
450 echo "<div class='tabbertab' title='All'>" >> $output
451 clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output
452 echo "</div>" >> $output
453
454 echo "<div class='tabbertab' title='Ca'>" >> $output
455 clonality_table $outdir/change_o/change-o-defined_clones-summary-IGA.txt $output
456 echo "</div>" >> $output
457
458 echo "<div class='tabbertab' title='Cg'>" >> $output
459 clonality_table $outdir/change_o/change-o-defined_clones-summary-IGG.txt $output
460 echo "</div>" >> $output
461
462 echo "<div class='tabbertab' title='Cm'>" >> $output
463 clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output
464 echo "</div>" >> $output
465
466 echo "<div class='tabbertab' title='Overview'>" >> $output
467 cat "$outdir/sequence_overview/index.html" >> $output
468 echo "</div>" >> $output
469
470
471 echo "</div>" >> $output #clonality tabber end
472
473 echo "</div>" >> $output #clonality tab end
474
475 echo "<div class='tabbertab' title='Downloads'>" >> $output
476
477 echo "<table class='pure-table pure-table-striped'>" >> $output
478 echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output
479 echo "<tr><td>The complete dataset</td><td><a href='merged.txt' download='merged.txt' >Download</a></td></tr>" >> $output
480 echo "<tr><td>The filtered dataset</td><td><a href='filtered.txt' download='filtered.txt' >Download</a></td></tr>" >> $output
481 echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt' download='data_sum.txt' >Download</a></td></tr>" >> $output
482 echo "<tr><td>The data used to generate the first SHM Overview plot</td><td><a href='plot1.txt' download='plot1.txt' >Download</a></td></tr>" >> $output
483 echo "<tr><td>The data used to generate the second SHM Overview plot</td><td><a href='plot2.txt' download='plot2.txt' >Download</a></td></tr>" >> $output
484 echo "<tr><td>The data used to generate the third SHM Overview plot</td><td><a href='plot3.txt' download='plot3.txt' >Download</a></td></tr>" >> $output
485 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt' download='unmatched.txt' >Download</a></td></tr>" >> $output
486
487 echo "<tr><td>The data generate the frequency scatter plot</td><td><a href='scatter.txt' download='scatter.txt' >Download</a></td></tr>" >> $output
488 echo "<tr><td>The data used to generate the frequency by class plot</td><td><a href='frequency_ranges_classes.txt' download='frequency_ranges_classes.txt' >Download</a></td></tr>" >> $output
489 echo "<tr><td>The data for frequency by subclass</td><td><a href='frequency_ranges_subclasses.txt' download='frequency_ranges_subclasses.txt' >Download</a></td></tr>" >> $output
490
491
492 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output
493 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output
494 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt' download='aa_id_mutations.txt' >Download</a></td></tr>" >> $output
495 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt' download='absent_aa_id.txt' >Download</a></td></tr>" >> $output
496 echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output
497
498 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output
499
500 echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf' download='baseline.pdf' >Download</a></td></tr>" >> $output
501 echo "<tr><td>Baseline data</td><td><a href='baseline.txt' download='baseline.txt' >Download</a></td></tr>" >> $output
502 echo "<tr><td>Baseline IGA PDF</td><td><a href='baseline_IGA.pdf' download='baseline_IGA.pdf' >Download</a></td></tr>" >> $output
503 echo "<tr><td>Baseline IGA data</td><td><a href='baseline_IGA.txt' download='baseline_IGA.txt' >Download</a></td></tr>" >> $output
504 echo "<tr><td>Baseline IGG PDF</td><td><a href='baseline_IGG.pdf' download='baseline_IGG.pdf' >Download</a></td></tr>" >> $output
505 echo "<tr><td>Baseline IGG data</td><td><a href='baseline_IGG.txt' download='baseline_IGG.txt' >Download</a></td></tr>" >> $output
506 echo "<tr><td>Baseline IGM PDF</td><td><a href='baseline_IGM.pdf' download='baseline_IGM.pdf' >Download</a></td></tr>" >> $output
507 echo "<tr><td>Baseline IGM data</td><td><a href='baseline_IGM.txt' download='baseline_IGM.txt' >Download</a></td></tr>" >> $output
508
509 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz' download='new_IMGT.txz' >Download</a></td></tr>" >> $output
510 echo "<tr><td>An IMGT archive with just the matched and filtered IGA sequences</td><td><a href='new_IMGT_IGA.txz' download='new_IMGT_IGA.txz' >Download</a></td></tr>" >> $output
511 echo "<tr><td>An IMGT archive with just the matched and filtered IGA1 sequences</td><td><a href='new_IMGT_IGA1.txz' download='new_IMGT_IGA1.txz' >Download</a></td></tr>" >> $output
512 echo "<tr><td>An IMGT archive with just the matched and filtered IGA2 sequences</td><td><a href='new_IMGT_IGA2.txz' download='new_IMGT_IGA2.txz' >Download</a></td></tr>" >> $output
513 echo "<tr><td>An IMGT archive with just the matched and filtered IGG sequences</td><td><a href='new_IMGT_IGG.txz' download='new_IMGT_IGG.txz' >Download</a></td></tr>" >> $output
514 echo "<tr><td>An IMGT archive with just the matched and filtered IGG1 sequences</td><td><a href='new_IMGT_IGG1.txz' download='new_IMGT_IGG1.txz' >Download</a></td></tr>" >> $output
515 echo "<tr><td>An IMGT archive with just the matched and filtered IGG2 sequences</td><td><a href='new_IMGT_IGG2.txz' download='new_IMGT_IGG2.txz' >Download</a></td></tr>" >> $output
516 echo "<tr><td>An IMGT archive with just the matched and filtered IGG3 sequences</td><td><a href='new_IMGT_IGG3.txz' download='new_IMGT_IGG3.txz' >Download</a></td></tr>" >> $output
517 echo "<tr><td>An IMGT archive with just the matched and filtered IGG4 sequences</td><td><a href='new_IMGT_IGG4.txz' download='new_IMGT_IGG4.txz' >Download</a></td></tr>" >> $output
518 echo "<tr><td>An IMGT archive with just the matched and filtered IGM sequences</td><td><a href='new_IMGT_IGM.txz' download='new_IMGT_IGM.txz' >Download</a></td></tr>" >> $output
519
520 echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output
521 echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output
522 echo "<tr><td>The Change-O DB file with defined clones of IGA</td><td><a href='change_o/change-o-db-defined_clones-IGA.txt' download='change_o/change-o-db-defined_clones-IGA.txt' >Download</a></td></tr>" >> $output
523 echo "<tr><td>The Change-O DB defined clones summary file of IGA</td><td><a href='change_o/change-o-defined_clones-summary-IGA.txt' download='change_o/change-o-defined_clones-summary-IGA.txt' >Download</a></td></tr>" >> $output
524 echo "<tr><td>The Change-O DB file with defined clones of IGG</td><td><a href='change_o/change-o-db-defined_clones-IGG.txt' download='change_o/change-o-db-defined_clones-IGG.txt' >Download</a></td></tr>" >> $output
525 echo "<tr><td>The Change-O DB defined clones summary file of IGG</td><td><a href='change_o/change-o-defined_clones-summary-IGG.txt' download='change_o/change-o-defined_clones-summary-IGG.txt' >Download</a></td></tr>" >> $output
526 echo "<tr><td>The Change-O DB file with defined clones of IGM</td><td><a href='change_o/change-o-db-defined_clones-IGM.txt' download='change_o/change-o-db-defined_clones-IGM.txt' >Download</a></td></tr>" >> $output
527 echo "<tr><td>The Change-O DB defined clones summary file of IGM</td><td><a href='change_o/change-o-defined_clones-summary-IGM.txt' download='change_o/change-o-defined_clones-summary-IGM.txt' >Download</a></td></tr>" >> $output
528
529 echo "</table>" >> $output
530
531 echo "</div>" >> $output #downloads tab end
532
533 echo "</div>" >> $output #tabs end
534
535 echo "</html>" >> $output
536
537 echo "---------------- baseline ----------------"
538 echo "---------------- baseline ----------------<br />" >> $log
539 tmp="$PWD"
540
541 mkdir $outdir/baseline
542
543
544 mkdir $outdir/baseline/IGA_IGG_IGM
545 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
546 cd $outdir/baseline/IGA_IGG_IGM
547 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "IGA_IGG_IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
548 else
549 echo "No sequences" > "$outdir/baseline.txt"
550 fi
551
552 mkdir $outdir/baseline/IGA
553 if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then
554 cd $outdir/baseline/IGA
555 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt"
556 else
557 echo "No IGA sequences" > "$outdir/baseline_IGA.txt"
558 fi
559
560 mkdir $outdir/baseline/IGG
561 if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then
562 cd $outdir/baseline/IGG
563 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGG.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt"
564 else
565 echo "No IGG sequences" > "$outdir/baseline_IGG.txt"
566 fi
567
568 mkdir $outdir/baseline/IGM
569 if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then
570 cd $outdir/baseline/IGM
571 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt"
572 else
573 echo "No IGM sequences" > "$outdir/baseline_IGM.txt"
574 fi
575
576 cd $tmp
577
578 echo "---------------- naive_output.r ----------------"
579 echo "---------------- naive_output.r ----------------<br />" >> $log
580
581 if [[ "$naive_output" != "None" ]]
582 then
583 cp $outdir/new_IMGT_IGA.txz ${naive_output_ca}
584 cp $outdir/new_IMGT_IGG.txz ${naive_output_cg}
585 cp $outdir/new_IMGT_IGM.txz ${naive_output_cm}
586 fi
587
588 echo "</table>" >> $outdir/base_overview.html
589
590 mv $log $outdir/log.html
591
592 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
593 echo "<table border = 1>" >> $log
594 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
595 tIFS="$TMP"
596 IFS=$'\t'
597 while read step seq perc
598 do
599 echo "<tr>" >> $log
600 echo "<td>$step</td>" >> $log
601 echo "<td>$seq</td>" >> $log
602 echo "<td>${perc}%</td>" >> $log
603 echo "</tr>" >> $log
604 done < $outdir/filtering_steps.txt
605 echo "</table border></center></html>" >> $log
606
607 IFS="$tIFS"
608
609
610 echo "---------------- Done! ----------------"
611 echo "---------------- Done! ----------------<br />" >> $outdir/log.html
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632