Mercurial > repos > davidvanzessen > shm_csr
comparison wrapper.sh @ 0:c33d93683a09 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 13 Oct 2016 10:52:24 -0400 |
parents | |
children | faae21ba5c63 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c33d93683a09 |
---|---|
1 #!/bin/bash | |
2 #set -e | |
3 dir="$(cd "$(dirname "$0")" && pwd)" | |
4 input=$1 | |
5 method=$2 | |
6 log=$3 #becomes the main html page at the end | |
7 outdir=$4 | |
8 output="$outdir/index.html" #copied to $log location at the end | |
9 title=$5 | |
10 include_fr1=$6 | |
11 functionality=$7 | |
12 unique=$8 | |
13 naive_output_ca=$9 | |
14 naive_output_cg=${10} | |
15 naive_output_cm=${11} | |
16 filter_unique=${12} | |
17 class_filter=${13} | |
18 empty_region_filter=${14} | |
19 mkdir $outdir | |
20 | |
21 tar -xzf $dir/style.tar.gz -C $outdir | |
22 | |
23 echo "---------------- read parameters ----------------" | |
24 echo "---------------- read parameters ----------------<br />" > $log | |
25 | |
26 echo "unpacking IMGT file" | |
27 | |
28 type="`file $input`" | |
29 if [[ "$type" == *"Zip archive"* ]] ; then | |
30 echo "Zip archive" | |
31 echo "unzip $input -d $PWD/files/" | |
32 unzip $input -d $PWD/files/ | |
33 elif [[ "$type" == *"XZ compressed data"* ]] ; then | |
34 echo "ZX archive" | |
35 echo "tar -xJf $input -C $PWD/files/" | |
36 mkdir -p $PWD/files/$title | |
37 tar -xJf $input -C $PWD/files/$title | |
38 fi | |
39 | |
40 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt | |
41 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt | |
42 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt | |
43 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt | |
44 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt | |
45 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt | |
46 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt | |
47 | |
48 if [[ ${#BLASTN_DIR} -ge 5 ]] ; then | |
49 echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}" | |
50 else | |
51 BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin" | |
52 echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}" | |
53 fi | |
54 | |
55 echo "---------------- class identification ----------------" | |
56 echo "---------------- class identification ----------------<br />" >> $log | |
57 | |
58 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt | |
59 | |
60 echo "---------------- merge_and_filter.r ----------------" | |
61 echo "---------------- merge_and_filter.r ----------------<br />" >> $log | |
62 | |
63 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1 | |
64 | |
65 echo "---------------- creating new IMGT zips ----------------" | |
66 echo "---------------- creating new IMGT zips ----------------<br />" >> $log | |
67 | |
68 mkdir $outdir/new_IMGT | |
69 | |
70 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" | |
71 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" | |
72 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt" | |
73 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt" | |
74 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt" | |
75 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt" | |
76 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt" | |
77 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt" | |
78 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt" | |
79 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt" | |
80 | |
81 mkdir $outdir/new_IMGT_IGA | |
82 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA | |
83 | |
84 mkdir $outdir/new_IMGT_IGA1 | |
85 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1 | |
86 | |
87 mkdir $outdir/new_IMGT_IGA2 | |
88 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2 | |
89 | |
90 mkdir $outdir/new_IMGT_IGG | |
91 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG | |
92 | |
93 mkdir $outdir/new_IMGT_IGG1 | |
94 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1 | |
95 | |
96 mkdir $outdir/new_IMGT_IGG2 | |
97 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2 | |
98 | |
99 mkdir $outdir/new_IMGT_IGG3 | |
100 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3 | |
101 | |
102 mkdir $outdir/new_IMGT_IGG4 | |
103 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4 | |
104 | |
105 mkdir $outdir/new_IMGT_IGM | |
106 cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM | |
107 | |
108 Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1 | |
109 | |
110 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1 | |
111 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1 | |
112 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1 | |
113 | |
114 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1 | |
115 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1 | |
116 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1 | |
117 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1 | |
118 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1 | |
119 | |
120 Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1 | |
121 | |
122 | |
123 tmp="$PWD" | |
124 cd $outdir/new_IMGT/ #tar weirdness... | |
125 tar -cJf ../new_IMGT.txz * | |
126 | |
127 cd $outdir/new_IMGT_IGA/ | |
128 tar -cJf ../new_IMGT_IGA.txz * | |
129 | |
130 cd $outdir/new_IMGT_IGA1/ | |
131 tar -cJf ../new_IMGT_IGA1.txz * | |
132 | |
133 cd $outdir/new_IMGT_IGA2/ | |
134 tar -cJf ../new_IMGT_IGA2.txz * | |
135 | |
136 cd $outdir/new_IMGT_IGG/ | |
137 tar -cJf ../new_IMGT_IGG.txz * | |
138 | |
139 cd $outdir/new_IMGT_IGG1/ | |
140 tar -cJf ../new_IMGT_IGG1.txz * | |
141 | |
142 cd $outdir/new_IMGT_IGG2/ | |
143 tar -cJf ../new_IMGT_IGG2.txz * | |
144 | |
145 cd $outdir/new_IMGT_IGG3/ | |
146 tar -cJf ../new_IMGT_IGG3.txz * | |
147 | |
148 cd $outdir/new_IMGT_IGG4/ | |
149 tar -cJf ../new_IMGT_IGG4.txz * | |
150 | |
151 cd $outdir/new_IMGT_IGM/ | |
152 tar -cJf ../new_IMGT_IGM.txz * | |
153 | |
154 cd $tmp | |
155 | |
156 echo "---------------- shm_csr.r ----------------" | |
157 echo "---------------- shm_csr.r ----------------<br />" >> $log | |
158 | |
159 classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,unmatched" | |
160 echo "R mutation analysis" | |
161 Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1 | |
162 | |
163 | |
164 echo "---------------- shm_csr.py ----------------" | |
165 echo "---------------- shm_csr.py ----------------<br />" >> $log | |
166 | |
167 python $dir/shm_csr.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt | |
168 | |
169 echo "---------------- aa_histogram.r ----------------" | |
170 echo "---------------- aa_histogram.r ----------------<br />" >> $log | |
171 | |
172 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM" $outdir/ 2>&1 | |
173 if [ -e "$outdir/aa_histogram_.png" ]; then | |
174 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png | |
175 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt | |
176 fi | |
177 | |
178 genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM) | |
179 | |
180 funcs=(sum mean median) | |
181 funcs=(sum) | |
182 | |
183 echo "---------------- sequence_overview.r ----------------" | |
184 echo "---------------- sequence_overview.r ----------------<br />" >> $log | |
185 | |
186 mkdir $outdir/sequence_overview | |
187 | |
188 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 | |
189 | |
190 echo "<table border='1'>" > $outdir/base_overview.html | |
191 | |
192 while IFS=$'\t' read ID class seq A C G T | |
193 do | |
194 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html | |
195 done < $outdir/sequence_overview/ntoverview.txt | |
196 | |
197 echo "<html><center><h1>$title</h1></center>" > $output | |
198 echo "<meta name='viewport' content='width=device-width, initial-scale=1'>" >> $output | |
199 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output | |
200 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output | |
201 echo "<script type='text/javascript' src='script.js'></script>" >> $output | |
202 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output | |
203 echo "<link rel='stylesheet' type='text/css' href='pure-min.css'>" >> $output | |
204 | |
205 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`" | |
206 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`" | |
207 total_count=$((matched_count + unmatched_count)) | |
208 perc_count=$((unmatched_count / total_count * 100)) | |
209 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"` | |
210 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"` | |
211 | |
212 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output | |
213 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output | |
214 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output | |
215 | |
216 echo "---------------- main tables ----------------" | |
217 echo "---------------- main tables ----------------<br />" >> $log | |
218 | |
219 echo "<div class='tabber'>" >> $output | |
220 echo "<div class='tabbertab' title='SHM Overview'>" >> $output | |
221 | |
222 for func in ${funcs[@]} | |
223 do | |
224 | |
225 echo "---------------- $func table ----------------" | |
226 echo "---------------- $func table ----------------<br />" >> $log | |
227 | |
228 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt | |
229 | |
230 echo "---------------- pattern_plots.r ----------------" | |
231 echo "---------------- pattern_plots.r ----------------<br />" >> $log | |
232 | |
233 Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/plot1 $outdir/plot2 $outdir/plot3 2>&1 | |
234 | |
235 echo "<table class='pure-table pure-table-striped'>" >> $output | |
236 echo "<thead><tr><th>info</th>" >> $output | |
237 | |
238 if [ "${class_filter}" != "101_101" ] ; then | |
239 | |
240 for gene in ${genes[@]} | |
241 do | |
242 tmp=`cat $outdir/${gene}_${func}_n.txt` | |
243 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output | |
244 done | |
245 | |
246 tmp=`cat $outdir/all_${func}_n.txt` | |
247 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output | |
248 tmp=`cat $outdir/unmatched_${func}_n.txt` | |
249 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output | |
250 | |
251 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz | |
252 do | |
253 if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh | |
254 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td><td>${unx}/${uny} (${unz})</td></tr>" >> $output | |
255 elif [ "$name" == "Median of Number of Mutations (%)" ] ; then | |
256 echo "<tr><td>$name</td><td>${caz}%</td><td>${ca1z}%</td><td>${ca2z}%</td><td>${cgz}%</td><td>${cg1z}%</td><td>${cg2z}%</td><td>${cg3z}%</td><td>${cg4z}%</td><td>${cmz}%</td><td>${allz}%</td><td>${unz}%</td></tr>" >> $output | |
257 else | |
258 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output | |
259 fi | |
260 done < $outdir/data_${func}.txt | |
261 | |
262 else | |
263 tmp=`cat $outdir/unmatched_${func}_n.txt` | |
264 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output | |
265 | |
266 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz | |
267 do | |
268 if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh | |
269 echo "<tr><td>$name</td><td>${unx}/${uny}</td></tr>" >> $output | |
270 elif [ "$name" == "Median of Number of Mutations (%)" ] ; then | |
271 echo "<tr><td>$name</td><td>${unz}%</td></tr>" >> $output | |
272 else | |
273 echo "<tr><td>$name</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output | |
274 fi | |
275 done < $outdir/data_${func}.txt | |
276 | |
277 fi | |
278 echo "</table>" >> $output | |
279 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output | |
280 done | |
281 | |
282 echo "<img src='plot1.png' /><br />" >> $output | |
283 echo "<img src='plot2.png' /><br />" >> $output | |
284 echo "<img src='plot3.png' /><br />" >> $output | |
285 | |
286 echo "</div>" >> $output #SHM overview tab end | |
287 | |
288 echo "---------------- images ----------------" | |
289 echo "---------------- images ----------------<br />" >> $log | |
290 | |
291 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output | |
292 | |
293 if [ -a $outdir/scatter.png ] | |
294 then | |
295 echo "<img src='scatter.png'/><br />" >> $output | |
296 fi | |
297 if [ -a $outdir/frequency_ranges.png ] | |
298 then | |
299 echo "<img src='frequency_ranges.png'/><br />" >> $output | |
300 fi | |
301 | |
302 echo "</div>" >> $output #SHM frequency tab end | |
303 | |
304 echo "<div class='tabbertab' title='Transition tables'>" >> $output | |
305 | |
306 echo "<table border='0'>" >> $output | |
307 | |
308 for gene in ${genes[@]} | |
309 do | |
310 echo "<tr>" >> $output | |
311 echo "<td><h1>${gene}</h1></td>" >> $output | |
312 echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output | |
313 echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output | |
314 echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output | |
315 echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output | |
316 first="true" | |
317 while IFS=, read from a c g t | |
318 do | |
319 if [ "$first" == "true" ] ; then | |
320 echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
321 first="false" | |
322 else | |
323 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
324 fi | |
325 done < $outdir/transitions_${gene}_sum.txt | |
326 echo "</table></td>" >> $output | |
327 | |
328 echo "</tr>" >> $output | |
329 done | |
330 | |
331 echo "<tr>" >> $output | |
332 echo "<td><h1>All</h1></td>" >> $output | |
333 echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output | |
334 echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output | |
335 echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output | |
336 echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output | |
337 first="true" | |
338 while IFS=, read from a c g t | |
339 do | |
340 if [ "$first" == "true" ] ; then | |
341 echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
342 first="false" | |
343 else | |
344 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
345 fi | |
346 done < $outdir/transitions_all_sum.txt | |
347 echo "</table></td>" >> $output | |
348 | |
349 echo "</tr>" >> $output | |
350 | |
351 echo "</table>" >> $output | |
352 | |
353 echo "</div>" >> $output #transition tables tab end | |
354 | |
355 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output | |
356 | |
357 if [ -a $outdir/aa_histogram.png ] | |
358 then | |
359 echo "<img src='aa_histogram.png'/><br />" >> $output | |
360 echo "<img src='aa_histogram_IGA.png'/><br />" >> $output | |
361 echo "<img src='aa_histogram_IGG.png'/><br />" >> $output | |
362 echo "<img src='aa_histogram_IGM.png'/><br />" >> $output | |
363 fi | |
364 | |
365 echo "<embed src='baseline.pdf' width='700px' height='1000px'>" >> $output | |
366 echo "<embed src='baseline_IGA.pdf' width='700px' height='1000px'>" >> $output | |
367 echo "<embed src='baseline_IGG.pdf' width='700px' height='1000px'>" >> $output | |
368 echo "<embed src='baseline_IGM.pdf' width='700px' height='1000px'>" >> $output | |
369 | |
370 echo "</div>" >> $output #antigen selection tab end | |
371 | |
372 echo "<div class='tabbertab' title='CSR'>" >> $output #CSR tab | |
373 | |
374 if [ -a $outdir/IGA.png ] | |
375 then | |
376 echo "<img src='IGA.png'/><br />" >> $output | |
377 fi | |
378 if [ -a $outdir/IGG.png ] | |
379 then | |
380 echo "<img src='IGG.png'/><br />" >> $output | |
381 fi | |
382 | |
383 echo "</div>" >> $output #CSR tab end | |
384 | |
385 echo "---------------- change-o MakeDB ----------------" | |
386 | |
387 mkdir $outdir/change_o | |
388 | |
389 tmp="$PWD" | |
390 | |
391 cd $outdir/change_o | |
392 | |
393 bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt | |
394 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt | |
395 | |
396 Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1 | |
397 | |
398 echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1" | |
399 | |
400 if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then | |
401 bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt | |
402 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt | |
403 else | |
404 echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt" | |
405 echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt" | |
406 fi | |
407 | |
408 if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then | |
409 bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt | |
410 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt | |
411 else | |
412 echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt" | |
413 echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt" | |
414 fi | |
415 | |
416 if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then | |
417 bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt | |
418 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt | |
419 else | |
420 echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt" | |
421 echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt" | |
422 fi | |
423 | |
424 PWD="$tmp" | |
425 | |
426 echo "<div class='tabbertab' title='Clonality'>" >> $output #clonality tab | |
427 | |
428 function clonality_table { | |
429 local infile=$1 | |
430 local outfile=$2 | |
431 | |
432 echo "<table class='pure-table pure-table-striped'>" >> $outfile | |
433 echo "<thead><tr><th>Clone size</th><th>Nr of clones</th><th>Nr of sequences</th></tr></thead>" >> $outfile | |
434 | |
435 first='true' | |
436 | |
437 while read size clones seqs | |
438 do | |
439 if [[ "$first" == "true" ]]; then | |
440 first="false" | |
441 continue | |
442 fi | |
443 echo "<tr><td>$size</td><td>$clones</td><td>$seqs</td></tr>" >> $outfile | |
444 done < $infile | |
445 | |
446 echo "</table>" >> $outfile | |
447 } | |
448 echo "<div class='tabber'>" >> $output | |
449 | |
450 echo "<div class='tabbertab' title='All'>" >> $output | |
451 clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output | |
452 echo "</div>" >> $output | |
453 | |
454 echo "<div class='tabbertab' title='Ca'>" >> $output | |
455 clonality_table $outdir/change_o/change-o-defined_clones-summary-IGA.txt $output | |
456 echo "</div>" >> $output | |
457 | |
458 echo "<div class='tabbertab' title='Cg'>" >> $output | |
459 clonality_table $outdir/change_o/change-o-defined_clones-summary-IGG.txt $output | |
460 echo "</div>" >> $output | |
461 | |
462 echo "<div class='tabbertab' title='Cm'>" >> $output | |
463 clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output | |
464 echo "</div>" >> $output | |
465 | |
466 echo "<div class='tabbertab' title='Overview'>" >> $output | |
467 cat "$outdir/sequence_overview/index.html" >> $output | |
468 echo "</div>" >> $output | |
469 | |
470 | |
471 echo "</div>" >> $output #clonality tabber end | |
472 | |
473 echo "</div>" >> $output #clonality tab end | |
474 | |
475 echo "<div class='tabbertab' title='Downloads'>" >> $output | |
476 | |
477 echo "<table class='pure-table pure-table-striped'>" >> $output | |
478 echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output | |
479 echo "<tr><td>The complete dataset</td><td><a href='merged.txt' download='merged.txt' >Download</a></td></tr>" >> $output | |
480 echo "<tr><td>The filtered dataset</td><td><a href='filtered.txt' download='filtered.txt' >Download</a></td></tr>" >> $output | |
481 echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt' download='data_sum.txt' >Download</a></td></tr>" >> $output | |
482 echo "<tr><td>The data used to generate the first SHM Overview plot</td><td><a href='plot1.txt' download='plot1.txt' >Download</a></td></tr>" >> $output | |
483 echo "<tr><td>The data used to generate the second SHM Overview plot</td><td><a href='plot2.txt' download='plot2.txt' >Download</a></td></tr>" >> $output | |
484 echo "<tr><td>The data used to generate the third SHM Overview plot</td><td><a href='plot3.txt' download='plot3.txt' >Download</a></td></tr>" >> $output | |
485 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt' download='unmatched.txt' >Download</a></td></tr>" >> $output | |
486 | |
487 echo "<tr><td>The data generate the frequency scatter plot</td><td><a href='scatter.txt' download='scatter.txt' >Download</a></td></tr>" >> $output | |
488 echo "<tr><td>The data used to generate the frequency by class plot</td><td><a href='frequency_ranges_classes.txt' download='frequency_ranges_classes.txt' >Download</a></td></tr>" >> $output | |
489 echo "<tr><td>The data for frequency by subclass</td><td><a href='frequency_ranges_subclasses.txt' download='frequency_ranges_subclasses.txt' >Download</a></td></tr>" >> $output | |
490 | |
491 | |
492 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output | |
493 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output | |
494 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt' download='aa_id_mutations.txt' >Download</a></td></tr>" >> $output | |
495 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt' download='absent_aa_id.txt' >Download</a></td></tr>" >> $output | |
496 echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output | |
497 | |
498 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output | |
499 | |
500 echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf' download='baseline.pdf' >Download</a></td></tr>" >> $output | |
501 echo "<tr><td>Baseline data</td><td><a href='baseline.txt' download='baseline.txt' >Download</a></td></tr>" >> $output | |
502 echo "<tr><td>Baseline IGA PDF</td><td><a href='baseline_IGA.pdf' download='baseline_IGA.pdf' >Download</a></td></tr>" >> $output | |
503 echo "<tr><td>Baseline IGA data</td><td><a href='baseline_IGA.txt' download='baseline_IGA.txt' >Download</a></td></tr>" >> $output | |
504 echo "<tr><td>Baseline IGG PDF</td><td><a href='baseline_IGG.pdf' download='baseline_IGG.pdf' >Download</a></td></tr>" >> $output | |
505 echo "<tr><td>Baseline IGG data</td><td><a href='baseline_IGG.txt' download='baseline_IGG.txt' >Download</a></td></tr>" >> $output | |
506 echo "<tr><td>Baseline IGM PDF</td><td><a href='baseline_IGM.pdf' download='baseline_IGM.pdf' >Download</a></td></tr>" >> $output | |
507 echo "<tr><td>Baseline IGM data</td><td><a href='baseline_IGM.txt' download='baseline_IGM.txt' >Download</a></td></tr>" >> $output | |
508 | |
509 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz' download='new_IMGT.txz' >Download</a></td></tr>" >> $output | |
510 echo "<tr><td>An IMGT archive with just the matched and filtered IGA sequences</td><td><a href='new_IMGT_IGA.txz' download='new_IMGT_IGA.txz' >Download</a></td></tr>" >> $output | |
511 echo "<tr><td>An IMGT archive with just the matched and filtered IGA1 sequences</td><td><a href='new_IMGT_IGA1.txz' download='new_IMGT_IGA1.txz' >Download</a></td></tr>" >> $output | |
512 echo "<tr><td>An IMGT archive with just the matched and filtered IGA2 sequences</td><td><a href='new_IMGT_IGA2.txz' download='new_IMGT_IGA2.txz' >Download</a></td></tr>" >> $output | |
513 echo "<tr><td>An IMGT archive with just the matched and filtered IGG sequences</td><td><a href='new_IMGT_IGG.txz' download='new_IMGT_IGG.txz' >Download</a></td></tr>" >> $output | |
514 echo "<tr><td>An IMGT archive with just the matched and filtered IGG1 sequences</td><td><a href='new_IMGT_IGG1.txz' download='new_IMGT_IGG1.txz' >Download</a></td></tr>" >> $output | |
515 echo "<tr><td>An IMGT archive with just the matched and filtered IGG2 sequences</td><td><a href='new_IMGT_IGG2.txz' download='new_IMGT_IGG2.txz' >Download</a></td></tr>" >> $output | |
516 echo "<tr><td>An IMGT archive with just the matched and filtered IGG3 sequences</td><td><a href='new_IMGT_IGG3.txz' download='new_IMGT_IGG3.txz' >Download</a></td></tr>" >> $output | |
517 echo "<tr><td>An IMGT archive with just the matched and filtered IGG4 sequences</td><td><a href='new_IMGT_IGG4.txz' download='new_IMGT_IGG4.txz' >Download</a></td></tr>" >> $output | |
518 echo "<tr><td>An IMGT archive with just the matched and filtered IGM sequences</td><td><a href='new_IMGT_IGM.txz' download='new_IMGT_IGM.txz' >Download</a></td></tr>" >> $output | |
519 | |
520 echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output | |
521 echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output | |
522 echo "<tr><td>The Change-O DB file with defined clones of IGA</td><td><a href='change_o/change-o-db-defined_clones-IGA.txt' download='change_o/change-o-db-defined_clones-IGA.txt' >Download</a></td></tr>" >> $output | |
523 echo "<tr><td>The Change-O DB defined clones summary file of IGA</td><td><a href='change_o/change-o-defined_clones-summary-IGA.txt' download='change_o/change-o-defined_clones-summary-IGA.txt' >Download</a></td></tr>" >> $output | |
524 echo "<tr><td>The Change-O DB file with defined clones of IGG</td><td><a href='change_o/change-o-db-defined_clones-IGG.txt' download='change_o/change-o-db-defined_clones-IGG.txt' >Download</a></td></tr>" >> $output | |
525 echo "<tr><td>The Change-O DB defined clones summary file of IGG</td><td><a href='change_o/change-o-defined_clones-summary-IGG.txt' download='change_o/change-o-defined_clones-summary-IGG.txt' >Download</a></td></tr>" >> $output | |
526 echo "<tr><td>The Change-O DB file with defined clones of IGM</td><td><a href='change_o/change-o-db-defined_clones-IGM.txt' download='change_o/change-o-db-defined_clones-IGM.txt' >Download</a></td></tr>" >> $output | |
527 echo "<tr><td>The Change-O DB defined clones summary file of IGM</td><td><a href='change_o/change-o-defined_clones-summary-IGM.txt' download='change_o/change-o-defined_clones-summary-IGM.txt' >Download</a></td></tr>" >> $output | |
528 | |
529 echo "</table>" >> $output | |
530 | |
531 echo "</div>" >> $output #downloads tab end | |
532 | |
533 echo "</div>" >> $output #tabs end | |
534 | |
535 echo "</html>" >> $output | |
536 | |
537 echo "---------------- baseline ----------------" | |
538 echo "---------------- baseline ----------------<br />" >> $log | |
539 tmp="$PWD" | |
540 | |
541 mkdir $outdir/baseline | |
542 | |
543 | |
544 mkdir $outdir/baseline/IGA_IGG_IGM | |
545 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then | |
546 cd $outdir/baseline/IGA_IGG_IGM | |
547 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "IGA_IGG_IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" | |
548 else | |
549 echo "No sequences" > "$outdir/baseline.txt" | |
550 fi | |
551 | |
552 mkdir $outdir/baseline/IGA | |
553 if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then | |
554 cd $outdir/baseline/IGA | |
555 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt" | |
556 else | |
557 echo "No IGA sequences" > "$outdir/baseline_IGA.txt" | |
558 fi | |
559 | |
560 mkdir $outdir/baseline/IGG | |
561 if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then | |
562 cd $outdir/baseline/IGG | |
563 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGG.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt" | |
564 else | |
565 echo "No IGG sequences" > "$outdir/baseline_IGG.txt" | |
566 fi | |
567 | |
568 mkdir $outdir/baseline/IGM | |
569 if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then | |
570 cd $outdir/baseline/IGM | |
571 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt" | |
572 else | |
573 echo "No IGM sequences" > "$outdir/baseline_IGM.txt" | |
574 fi | |
575 | |
576 cd $tmp | |
577 | |
578 echo "---------------- naive_output.r ----------------" | |
579 echo "---------------- naive_output.r ----------------<br />" >> $log | |
580 | |
581 if [[ "$naive_output" != "None" ]] | |
582 then | |
583 cp $outdir/new_IMGT_IGA.txz ${naive_output_ca} | |
584 cp $outdir/new_IMGT_IGG.txz ${naive_output_cg} | |
585 cp $outdir/new_IMGT_IGM.txz ${naive_output_cm} | |
586 fi | |
587 | |
588 echo "</table>" >> $outdir/base_overview.html | |
589 | |
590 mv $log $outdir/log.html | |
591 | |
592 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log | |
593 echo "<table border = 1>" >> $log | |
594 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log | |
595 tIFS="$TMP" | |
596 IFS=$'\t' | |
597 while read step seq perc | |
598 do | |
599 echo "<tr>" >> $log | |
600 echo "<td>$step</td>" >> $log | |
601 echo "<td>$seq</td>" >> $log | |
602 echo "<td>${perc}%</td>" >> $log | |
603 echo "</tr>" >> $log | |
604 done < $outdir/filtering_steps.txt | |
605 echo "</table border></center></html>" >> $log | |
606 | |
607 IFS="$tIFS" | |
608 | |
609 | |
610 echo "---------------- Done! ----------------" | |
611 echo "---------------- Done! ----------------<br />" >> $outdir/log.html | |
612 | |
613 | |
614 | |
615 | |
616 | |
617 | |
618 | |
619 | |
620 | |
621 | |
622 | |
623 | |
624 | |
625 | |
626 | |
627 | |
628 | |
629 | |
630 | |
631 | |
632 |