# HG changeset patch # User davidvanzessen # Date 1614260390 0 # Node ID 0ef7f80ea0610169f78d9b410f8ea2273e701a49 # Parent 124b7fd92a3e7a9d9520acd1df970e7c43e8e730 Uploaded diff -r 124b7fd92a3e -r 0ef7f80ea061 .gitattributes --- a/.gitattributes Thu Feb 25 13:36:15 2021 +0000 +++ b/.gitattributes Thu Feb 25 13:39:50 2021 +0000 @@ -1,2 +1,4 @@ # Auto detect text files and perform LF normalization * text=auto +# Convert to LF line endings on checkout. +*.sh text eol=lf diff -r 124b7fd92a3e -r 0ef7f80ea061 complete.sh --- a/complete.sh Thu Feb 25 13:36:15 2021 +0000 +++ b/complete.sh Thu Feb 25 13:39:50 2021 +0000 @@ -1,72 +1,72 @@ -#!/bin/bash -set -e -inputFiles=($1) -outputDir=$3 -outputFile=$3/index.html #$1 -clonalType=$4 -species=$5 -locus=$6 -filterproductive=$7 -clonality_method=$8 - -html=$2 -dir="$(cd "$(dirname "$0")" && pwd)" -array=("$@") -echo "

Progress

" > $html -echo "" >> $html - -#mkdir $PWD/igblastdatabase -#unzip $dir/database.zip -d $PWD/igblastdatabase/ -#export IGDATA=$PWD/igblastdatabase/ - -echo "python: `which python`" -echo "R: `which R`" -echo "Rscript: `which Rscript`" - -id="" -forwardSlash="/" -mergerInput=() -echo "Before loop" -count=1 -for current in "${inputFiles[@]}" -do - if [[ "$current" != *"$forwardSlash"* ]]; then - id="$current" - mergerInput+=($id) - count=1 - continue - fi - echo "working on $current" - fileName=$(basename $current) - fileName="${fileName%.*}" - parsedFileName="$PWD/$fileName.parsed" - f=$(file $current) - zipType="Zip archive" - zxType="XZ compressed data" - echo "filetype of ${id}: $f" - if [[ "$f" == *"$zipType"* ]] || [[ "$f" == *"$zxType"* ]] - then - echo "" >> $html - fileName=$(basename $current) - bash ${dir}/imgt_loader/imgt_loader.sh $current $parsedFileName "${fileName}" - else - echo "" >> $html - bash ${dir}/igblast/igblast.sh $current "$species" $locus $parsedFileName - fi - mergerInput+=($parsedFileName) - count=$((count+1)) -done - -echo "" >> $html -echo "" >> $html - -bash $dir/experimental_design/experimental_design.sh ${mergerInput[*]} $PWD/merged.txt - -echo "" >> $html -echo "" >> $html -echo "" >> $html - -echo "after ED" - -bash $dir/report_clonality/r_wrapper.sh $PWD/merged.txt $2 $outputDir $clonalType "$species" "$locus" $filterproductive $clonality_method - +#!/bin/bash +set -e +inputFiles=($1) +outputDir=$3 +outputFile=$3/index.html #$1 +clonalType=$4 +species=$5 +locus=$6 +filterproductive=$7 +clonality_method=$8 + +html=$2 +dir="$(cd "$(dirname "$0")" && pwd)" +array=("$@") +echo "

Progress

info
-----------------------------------
Sample $count of patient $id is an archive file, using IMGT Loader
Sample $count of patient $id is not a zip file so assuming fasta/fastq, using igBLASTn
-----------------------------------
merging
done
-----------------------------------
plotting
" > $html +echo "" >> $html + +#mkdir $PWD/igblastdatabase +#unzip $dir/database.zip -d $PWD/igblastdatabase/ +#export IGDATA=$PWD/igblastdatabase/ + +echo "python: `which python`" +echo "R: `which R`" +echo "Rscript: `which Rscript`" + +id="" +forwardSlash="/" +mergerInput=() +echo "Before loop" +count=1 +for current in "${inputFiles[@]}" +do + if [[ "$current" != *"$forwardSlash"* ]]; then + id="$current" + mergerInput+=($id) + count=1 + continue + fi + echo "working on $current" + fileName=$(basename $current) + fileName="${fileName%.*}" + parsedFileName="$PWD/$fileName.parsed" + f=$(file $current) + zipType="Zip archive" + zxType="XZ compressed data" + echo "filetype of ${id}: $f" + if [[ "$f" == *"$zipType"* ]] || [[ "$f" == *"$zxType"* ]] + then + echo "" >> $html + fileName=$(basename $current) + bash ${dir}/imgt_loader/imgt_loader.sh $current $parsedFileName "${fileName}" + else + echo "" >> $html + bash ${dir}/igblast/igblast.sh $current "$species" $locus $parsedFileName + fi + mergerInput+=($parsedFileName) + count=$((count+1)) +done + +echo "" >> $html +echo "" >> $html + +bash $dir/experimental_design/experimental_design.sh ${mergerInput[*]} $PWD/merged.txt + +echo "" >> $html +echo "" >> $html +echo "" >> $html + +echo "after ED" + +bash $dir/report_clonality/r_wrapper.sh $PWD/merged.txt $2 $outputDir $clonalType "$species" "$locus" $filterproductive $clonality_method + diff -r 124b7fd92a3e -r 0ef7f80ea061 experimental_design/experimental_design.sh --- a/experimental_design/experimental_design.sh Thu Feb 25 13:36:15 2021 +0000 +++ b/experimental_design/experimental_design.sh Thu Feb 25 13:39:50 2021 +0000 @@ -1,4 +1,4 @@ - -dir="$(cd "$(dirname "$0")" && pwd)" - -Rscript --verbose $dir/experimental_design.r $@ 2>&1 + +dir="$(cd "$(dirname "$0")" && pwd)" + +Rscript --verbose $dir/experimental_design.r $@ 2>&1 diff -r 124b7fd92a3e -r 0ef7f80ea061 igblast/igblast.sh --- a/igblast/igblast.sh Thu Feb 25 13:36:15 2021 +0000 +++ b/igblast/igblast.sh Thu Feb 25 13:39:50 2021 +0000 @@ -1,28 +1,28 @@ -set -e - -dir="$(cd "$(dirname "$0")" && pwd)" - -input=$1 -species=$2 -locus=$3 -output=$4 - -declare -A speciesdict - -speciesdict=(["Rattus norvegicus functional"]="rat" ["Rattus norvegicus non-functional"]="rat" ["Oryctolagus cuniculus functional"]="rabbit" ["Oryctolagus cuniculus non-functional"]="rabbit" ["Mus musculus functional"]="mouse" ["Mus musculus non-functional"]="mouse" ["Homo sapiens functional"]="human" ["Homo sapiens non-functional"]="human" ["Macaca mulatta non-functional"]="rhesus_monkey" ["Macaca mulatta functional"]="rhesus_monkey") - -echo "Species: $species ${speciesdict[$species]}" - -species="${speciesdict[$species]}" - -if [ "$species" == "" ] -then - >&2 echo "Species not possible with igBLASTn, use IMGT" - exit 1 -fi - -echo "$input $species $locus $output" - -java -Xmx16G -jar $IGBLASTWRP/igblastwrp.jar -p 4 -S $species -R $locus ${input} $PWD/blasted_output 2>&1 - -Rscript --verbose $dir/igblast.r "$PWD/blasted_output.L2.txt" "$output" 2>&1 +set -e + +dir="$(cd "$(dirname "$0")" && pwd)" + +input=$1 +species=$2 +locus=$3 +output=$4 + +declare -A speciesdict + +speciesdict=(["Rattus norvegicus functional"]="rat" ["Rattus norvegicus non-functional"]="rat" ["Oryctolagus cuniculus functional"]="rabbit" ["Oryctolagus cuniculus non-functional"]="rabbit" ["Mus musculus functional"]="mouse" ["Mus musculus non-functional"]="mouse" ["Homo sapiens functional"]="human" ["Homo sapiens non-functional"]="human" ["Macaca mulatta non-functional"]="rhesus_monkey" ["Macaca mulatta functional"]="rhesus_monkey") + +echo "Species: $species ${speciesdict[$species]}" + +species="${speciesdict[$species]}" + +if [ "$species" == "" ] +then + >&2 echo "Species not possible with igBLASTn, use IMGT" + exit 1 +fi + +echo "$input $species $locus $output" + +java -Xmx16G -jar $IGBLASTWRP/igblastwrp.jar -p 4 -S $species -R $locus ${input} $PWD/blasted_output 2>&1 + +Rscript --verbose $dir/igblast.r "$PWD/blasted_output.L2.txt" "$output" 2>&1 diff -r 124b7fd92a3e -r 0ef7f80ea061 imgt_loader/imgt_loader.sh --- a/imgt_loader/imgt_loader.sh Thu Feb 25 13:36:15 2021 +0000 +++ b/imgt_loader/imgt_loader.sh Thu Feb 25 13:39:50 2021 +0000 @@ -1,74 +1,74 @@ -#!/bin/bash -input=$1 -output=$2 -name=$3 -dir="$(cd "$(dirname "$0")" && pwd)" -mkdir -p $PWD/$name/files -f=$(file $input) -zip7Type="7-zip archive" -tarType="tar archive" -bzip2Type="bzip2 compressed" -gzipType="gzip compressed" -zipType="Zip archive" -rarType="RAR archive" -zxType="XZ compressed data" - -if [[ "$f" == *"$zip7Type"* ]]; then - echo "7-zip" - echo "Trying: 7za e $input -o$PWD/files/" - 7za e $input -o$PWD/$name/files -fi - -if [[ "$f" == *"$tarType"* ]] -then - echo "tar archive" - echo "Trying: tar xvf $input -C $PWD/files/" - tar -xvf $input -C $PWD/$name/files -fi - -if [[ "$f" == *"$bzip2Type"* ]] -then - echo "bzip2 compressed data" - echo "Trying: tar jxf $input -C $PWD/files/" - tar -jxf $input -C $PWD/$name/files -fi - -if [[ "$f" == *"$gzipType"* ]] -then - echo "gzip compressed data" - echo "Trying: tar xvzf $input -C $PWD/files/" - tar -xvzf $input -C $PWD/$name/files -fi - -if [[ "$f" == *"$zipType"* ]] -then - echo "Zip archive" - echo "Trying: unzip $input -d $PWD/files/" - unzip $input -d $PWD/$name/files > $PWD/unziplog.log -fi - -if [[ "$f" == *"$rarType"* ]] -then - echo "RAR archive" - echo "Trying: unrar e $input $PWD/files/" - unrar e $input $PWD/$name/files -fi - -if [[ "$f" == *"$zxType"* ]] -then - echo "xz compressed data" - echo "Trying: tar -xJf $input -C $PWD/files/" - tar xJf $input -C $PWD/$name/files -fi -find $PWD/$name/files -iname "1_*" -exec cat {} + > $PWD/$name/summ.txt -find $PWD/$name/files -iname "3_*" -exec cat {} + > $PWD/$name/sequences.txt -find $PWD/$name/files -iname "4_*" -exec cat {} + > $PWD/$name/gapped_aa.txt -find $PWD/$name/files -iname "5_*" -exec cat {} + > $PWD/$name/aa.txt -find $PWD/$name/files -iname "6_*" -exec cat {} + > $PWD/$name/junction.txt - -echo "summ.txt `cat $PWD/$name/summ.txt | wc -l`" -echo "aa.txt `cat $PWD/$name/aa.txt | wc -l`" - -#python $dir/imgt_loader.py --summ $PWD/$name/summ.txt --aa $PWD/$name/aa.txt --junction $PWD/$name/junction.txt --output $output - -Rscript --verbose $dir/imgt_loader.r $PWD/$name/summ.txt $PWD/$name/sequences.txt $PWD/$name/aa.txt $PWD/$name/junction.txt $PWD/$name/gapped_aa.txt $output 2>&1 +#!/bin/bash +input=$1 +output=$2 +name=$3 +dir="$(cd "$(dirname "$0")" && pwd)" +mkdir -p $PWD/$name/files +f=$(file $input) +zip7Type="7-zip archive" +tarType="tar archive" +bzip2Type="bzip2 compressed" +gzipType="gzip compressed" +zipType="Zip archive" +rarType="RAR archive" +zxType="XZ compressed data" + +if [[ "$f" == *"$zip7Type"* ]]; then + echo "7-zip" + echo "Trying: 7za e $input -o$PWD/files/" + 7za e $input -o$PWD/$name/files +fi + +if [[ "$f" == *"$tarType"* ]] +then + echo "tar archive" + echo "Trying: tar xvf $input -C $PWD/files/" + tar -xvf $input -C $PWD/$name/files +fi + +if [[ "$f" == *"$bzip2Type"* ]] +then + echo "bzip2 compressed data" + echo "Trying: tar jxf $input -C $PWD/files/" + tar -jxf $input -C $PWD/$name/files +fi + +if [[ "$f" == *"$gzipType"* ]] +then + echo "gzip compressed data" + echo "Trying: tar xvzf $input -C $PWD/files/" + tar -xvzf $input -C $PWD/$name/files +fi + +if [[ "$f" == *"$zipType"* ]] +then + echo "Zip archive" + echo "Trying: unzip $input -d $PWD/files/" + unzip $input -d $PWD/$name/files > $PWD/unziplog.log +fi + +if [[ "$f" == *"$rarType"* ]] +then + echo "RAR archive" + echo "Trying: unrar e $input $PWD/files/" + unrar e $input $PWD/$name/files +fi + +if [[ "$f" == *"$zxType"* ]] +then + echo "xz compressed data" + echo "Trying: tar -xJf $input -C $PWD/files/" + tar xJf $input -C $PWD/$name/files +fi +find $PWD/$name/files -iname "1_*" -exec cat {} + > $PWD/$name/summ.txt +find $PWD/$name/files -iname "3_*" -exec cat {} + > $PWD/$name/sequences.txt +find $PWD/$name/files -iname "4_*" -exec cat {} + > $PWD/$name/gapped_aa.txt +find $PWD/$name/files -iname "5_*" -exec cat {} + > $PWD/$name/aa.txt +find $PWD/$name/files -iname "6_*" -exec cat {} + > $PWD/$name/junction.txt + +echo "summ.txt `cat $PWD/$name/summ.txt | wc -l`" +echo "aa.txt `cat $PWD/$name/aa.txt | wc -l`" + +#python $dir/imgt_loader.py --summ $PWD/$name/summ.txt --aa $PWD/$name/aa.txt --junction $PWD/$name/junction.txt --output $output + +Rscript --verbose $dir/imgt_loader.r $PWD/$name/summ.txt $PWD/$name/sequences.txt $PWD/$name/aa.txt $PWD/$name/junction.txt $PWD/$name/gapped_aa.txt $output 2>&1 diff -r 124b7fd92a3e -r 0ef7f80ea061 report_clonality/r_wrapper.sh --- a/report_clonality/r_wrapper.sh Thu Feb 25 13:36:15 2021 +0000 +++ b/report_clonality/r_wrapper.sh Thu Feb 25 13:39:50 2021 +0000 @@ -1,391 +1,391 @@ -#!/bin/bash - -inputFile=$1 -outputDir=$3 -outputFile=$3/index.html #$2 -clonalType=$4 -species=$5 -locus=$6 -filterproductive=$7 -clonality_method=$8 - -dir="$(cd "$(dirname "$0")" && pwd)" -useD="false" -if grep -q "$species.*${locus}D" "$dir/genes.txt" ; then - echo "species D region in reference db" - useD="true" -fi -echo "$species" -if [[ "$species" == *"custom"* ]] ; then - loci=(${locus//;/ }) - useD="true" - echo "${loci[@]}" - if [[ "${#loci[@]}" -eq "2" ]] ; then - useD="false" - fi -fi -mkdir $3 -cp $dir/genes.txt $outputDir -Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive ${clonality_method} 2>&1 -cp $dir/tabber.js $outputDir -cp $dir/style.css $outputDir -cp $dir/script.js $outputDir -cp $dir/jquery-1.11.0.min.js $outputDir -cp $dir/pure-min.css $outputDir -cp $dir/IGH_junctie_analyse.png $outputDir -samples=`cat $outputDir/samples.txt` - -echo "

Click here for the results

Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)
" > $2 -echo "
info
-----------------------------------
Sample $count of patient $id is an archive file, using IMGT Loader
Sample $count of patient $id is not a zip file so assuming fasta/fastq, using igBLASTn
-----------------------------------
merging
done
-----------------------------------
plotting
" >> $2 -echo "" >> $2 -while IFS=, read sample all productive perc_prod productive_unique perc_prod_un unproductive perc_unprod unproductive_unique perc_unprod_un - do - echo "" >> $2 - echo "" >> $2 - if [[ "$productive" != "0" ]] ; then - echo "" >> $2 - echo "" >> $2 - echo "" >> $2 - echo "" >> $2 - else - echo "" >> $2 - fi -done < $outputDir/productive_counting.txt -echo "
Donor/ReplicateAllProductiveUnique ProductiveUnproductiveUnique Unproductive
$sample$all$productive (${perc_prod}%)$productive_unique (${perc_prod_un}%)$unproductive (${perc_unprod}%)$unproductive_unique (${perc_unprod_un}%)
No productive sequences!

" >> $2 -echo "Table showing the number and percentage of (unique) productive and unproductive sequences per donor and per replicate.
" >> $2 -echo "The definition of unique sequences is based on the clonal type definition filter setting chosen. " >> $2 -echo "" >> $2 - -echo "Report on:" >> $outputFile - -mkdir $outputDir/circos -cp -R $dir/circos/* $outputDir/circos/ - -USECIRCOS="no" -path_to_circos=$(which circos) -if [ -x "$path_to_circos" ]; then - USECIRCOS="yes" -fi - -echo "Using Circos: $USECIRCOS" -sed -i "s%DATA_DIR%$outputDir/circos%" $outputDir/circos/circos.conf -for sample in $samples; do #output the samples to a file and create the circos plots with the R script output - echo " $sample" >> $outputFile - - if [[ "$USECIRCOS" != "yes" ]]; then - continue - fi - - circos_file="$outputDir/${sample}_VJ_circos.txt" - sed -i -- 's%/%:%g' $circos_file - echo -e -n "labels$(cat ${circos_file})" > ${circos_file} - echo "Circos tools command:" - echo "cat \"${circos_file}\" | parse-table -configfile $dir/circos/parse-table.conf 2>&1 | make-conf -dir $outputDir/circos/" - cat "${circos_file}" | parse-table -configfile $dir/circos/parse-table.conf 2>&1 | make-conf -dir $outputDir/circos/ - - echo "Circos command:" - echo "circos -conf $outputDir/circos/circos.conf 2>&1" - circos -conf $outputDir/circos/circos.conf 2>&1 - mv $outputDir/circos/circos.png $outputDir/circosVJ_${sample}.png - mv $outputDir/circos/circos.svg $outputDir/circosVJ_${sample}.svg - - - if [[ "$useD" == "true" ]] ; then - circos_file="$outputDir/${sample}_VD_circos.txt" - sed -i -- 's%/%:%g' $circos_file - echo -e -n "labels$(cat ${circos_file})" > ${circos_file} - cat "${circos_file}" | parse-table -configfile $dir/circos/parse-table.conf 2>&1 | make-conf -dir $outputDir/circos/ - sed -i -- 's%/%:%g' $outputDir/circos/cells.txt - circos -conf $outputDir/circos/circos.conf 2>&1 - mv $outputDir/circos/circos.png $outputDir/circosVD_${sample}.png - mv $outputDir/circos/circos.svg $outputDir/circosVD_${sample}.svg - - circos_file="$outputDir/${sample}_DJ_circos.txt" - sed -i -- 's%/%:%g' $circos_file - echo -e -n "labels$(cat ${circos_file})" > ${circos_file} - cat "${circos_file}" | parse-table -configfile $dir/circos/parse-table.conf 2>&1 | make-conf -dir $outputDir/circos/ - sed -i -- 's%/%:%g' $outputDir/circos/cells.txt - circos -conf $outputDir/circos/circos.conf 2>&1 - mv $outputDir/circos/circos.png $outputDir/circosDJ_${sample}.png - mv $outputDir/circos/circos.svg $outputDir/circosDJ_${sample}.svg - fi -done -echo "" >> $outputFile -echo "" >> $outputFile -echo "" >> $outputFile -echo "" >> $outputFile -echo "" >> $outputFile -echo "
" >> $outputFile - - -echo "" >> $outputFile -if [[ "$useD" == "true" ]] ; then - echo "" >> $outputFile -fi -echo "" >> $outputFile -if [[ "$useD" == "true" ]] ; then - echo "" >> $outputFile -fi -echo "
" >> $outputFile - -echo "" >> $outputFile - -cat $dir/naive_gene_freq.htm >> $outputFile - -echo "
" >> $outputFile - -echo "
" >> $outputFile -echo "
" >> $outputFile -echo "" >> $outputFile - - -echo "" >> $outputFile -echo "" >> $outputFile -while read Sample median -do - echo "" >> $outputFile -done < $outputDir/AAMedianBySample.txt -echo "
DonorMedian CDR3 Length
$Sample$median
" >> $outputFile - -cat $dir/naive_cdr3_char.htm >> $outputFile - -echo "
" >> $outputFile - -#Heatmaps - -count=1 -echo "
" >> $outputFile -for sample in $samples; do - echo "
" >> $outputFile - if [[ "$useD" == "true" ]] ; then - echo "" >> $outputFile - fi - echo "" >> $outputFile - if [[ "$useD" == "true" ]] ; then - echo "" >> $outputFile - fi - echo "
" >> $outputFile - count=$((count+1)) -done - -cat $dir/naive_heatmap.htm >> $outputFile - -echo "
" >> $outputFile - -echo "
" >> $outputFile -for sample in $samples; do - echo "" >> $outputFile -done -echo "
IDInclude
$sample
" >> $outputFile -echo "
" >> $outputFile -echo "
" >> $outputFile -echo "
" >> $outputFile - -cat $dir/naive_compare.htm >> $outputFile - -echo "
" >> $outputFile - - -#circos - -if [[ "$USECIRCOS" == "yes" ]]; then - - echo "
" >> $outputFile - for sample in $samples; do - echo "
" >> $outputFile - if [[ "$useD" == "true" ]] ; then - echo "
" >> $outputFile - fi - echo "" >> $outputFile - if [[ "$useD" == "true" ]] ; then - echo "" >> $outputFile - fi - echo "
V-D
V-J
D-J
" >> $outputFile - count=$((count+1)) - done - - cat $dir/naive_circos.htm >> $outputFile - - echo "
" >> $outputFile -fi -#echo "
" >> $outputFile - -hasReplicateColumn="$(if head -n 1 $inputFile | grep -q 'Replicate'; then echo 'Yes'; else echo 'No'; fi)" -echo "$hasReplicateColumn" -#if its a 'new' merged file with replicate info -if [[ "$hasReplicateColumn" == "Yes" && "${clonality_method}" != "none" ]] ; then - if [[ "${clonality_method}" == "boyd" ]] ; then - echo "
" >> $outputFile - else - echo "
" >> $outputFile - fi - - for sample in $samples; do - echo "${clonality_method}" - - echo "
" >> $outputFile - - if [[ "${clonality_method}" == "boyd" ]] ; then - clonalityScore="$(cat $outputDir/lymphclon_clonality_${sample}.txt)" - echo "" >> $outputFile - fi - - #replicate,reads,squared - echo "" >> $outputFile - while read replicate reads squared - do - echo "" >> $outputFile - done < $outputDir/ReplicateReads_$sample.txt - - #sum of reads and reads squared - while read readsSum squaredSum - do - echo "" >> $outputFile - done < $outputDir/ReplicateSumReads_$sample.txt - - echo "" >> $outputFile - - #overview - echo "" >> $outputFile - while read type count weight weightedCount - do - if [[ "$type" -eq "1" ]]; then - echo "" >> $outputFile - else - echo "" >> $outputFile - fi - done < $outputDir/ClonalityOverView_$sample.txt - echo "
Clonality Score: $clonalityScore
Replicate IDNumber of Sequences
$replicate$reads
Sum$readsSum
Number of replicates containing the coincidenceNumber of sequences shared between replicates
$type$count
$type$count
" >> $outputFile - done - - cat $dir/naive_clonality.htm >> $outputFile - - echo "
" >> $outputFile -fi - -#hasJunctionData="$(if head -n 1 $inputFile | grep -qE '3V.REGION.trimmed.nt.nb'; then echo 'Yes'; else echo 'No'; fi)" - -#if [[ "$hasJunctionData" == "Yes" ]] ; then -if [ -a "$outputDir/junctionAnalysisProd_mean_wD.txt" ] ; then - echo "
" >> $outputFile - echo "" >> $outputFile - - echo "

Unique rearrangements with a V, D and J gene assigned

" >> $outputFile - echo "" >> $outputFile - while read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median - do - echo "" >> $outputFile - done < $outputDir/junctionAnalysisProd_mean_wD.txt - echo "
Productive mean
DonorNumber of sequencesV.DELP1N1P2DEL.DD.DELP3N2P4DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELD$DDEL$P3$N2$P4$DELJ$TotalDel$TotalN$TotalP$median
" >> $outputFile - - echo "" >> $outputFile - while read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median - do - echo "" >> $outputFile - done < $outputDir/junctionAnalysisUnProd_mean_wD.txt - echo "
Unproductive mean
DonorNumber of sequencesV.DELP1N1P2DEL.DD.DELP3N2P4DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELD$DDEL$P3$N2$P4$DELJ$TotalDel$TotalN$TotalP-
" >> $outputFile - - echo "" >> $outputFile - while read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median - do - echo "" >> $outputFile - done < $outputDir/junctionAnalysisProd_median_wD.txt - echo "
Productive median
DonorNumber of sequencesV.DELP1N1P2DEL.DD.DELP3N2P4DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELD$DDEL$P3$N2$P4$DELJ$TotalDel$TotalN$TotalP$median
" >> $outputFile - - echo "" >> $outputFile - while read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median - do - echo "" >> $outputFile - done < $outputDir/junctionAnalysisUnProd_median_wD.txt - echo "
Unproductive median
DonorNumber of sequencesV.DELP1N1P2DEL.DD.DELP3N2P4DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELD$DDEL$P3$N2$P4$DELJ$TotalDel$TotalN$TotalP-
" >> $outputFile - - # again for no-d - echo "

Unique rearrangements with only a V and J gene assigned

" >> $outputFile - echo "" >> $outputFile - while read Sample unique VDEL P1 N1 P2 DELJ TotalDel TotalN TotalP median - do - echo "" >> $outputFile - done < $outputDir/junctionAnalysisProd_mean_nD.txt - echo "
Productive mean
DonorNumber of sequencesV.DELP1NP2DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELJ$TotalDel$TotalN$TotalP$median
" >> $outputFile - - echo "" >> $outputFile - while read Sample unique VDEL P1 N1 P2 DELJ TotalDel TotalN TotalP median - do - echo "" >> $outputFile - done < $outputDir/junctionAnalysisUnProd_mean_nD.txt - echo "
Unproductive mean
DonorNumber of sequencesV.DELP1NP2DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELJ$TotalDel$TotalN$TotalP-
" >> $outputFile - - echo "" >> $outputFile - while read Sample unique VDEL P1 N1 P2 DELJ TotalDel TotalN TotalP median - do - echo "" >> $outputFile - done < $outputDir/junctionAnalysisProd_median_nD.txt - echo "
Productive median
DonorNumber of sequencesV.DELP1NP2DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELJ$TotalDel$TotalN$TotalP$median
" >> $outputFile - - echo "" >> $outputFile - while read Sample unique VDEL P1 N1 P2 DELJ TotalDel TotalN TotalP median - do - echo "" >> $outputFile - done < $outputDir/junctionAnalysisUnProd_median_nD.txt - echo "
Unproductive median
DonorNumber of sequencesV.DELP1NP2DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELJ$TotalDel$TotalN$TotalP-
" >> $outputFile - - cat $dir/naive_junction.htm >> $outputFile - - echo "
" >> $outputFile -fi - -echo "
" >> $outputFile -echo "" >> $outputFile -echo "" >> $outputFile -echo "" >> $outputFile -echo "" >> $outputFile - -echo "" >> $outputFile -if [[ "$useD" == "true" ]] ; then - echo "" >> $outputFile -fi - -echo "" >> $outputFile -if [[ "$useD" == "true" ]] ; then - echo "" >> $outputFile -fi -echo "" >> $outputFile -echo "" >> $outputFile - -echo "" >> $outputFile -echo "" >> $outputFile -echo "" >> $outputFile - -echo "" >> $outputFile -for sample in $samples; do - if [[ "$useD" == "true" ]] ; then - echo "" >> $outputFile - fi - echo "" >> $outputFile - if [[ "$useD" == "true" ]] ; then - echo "" >> $outputFile - fi -done - -echo "" >> $outputFile -for sample in $samples; do - if [[ "$useD" == "true" ]] ; then - echo "" >> $outputFile - fi - echo "" >> $outputFile - if [[ "$useD" == "true" ]] ; then - echo "" >> $outputFile - fi -done - -#echo "" >> $outputFile - -echo "" >> $outputFile -echo "" >> $outputFile -# echo "" >> $outputFile - -echo "
DescriptionLink
The filtered datasetDownload
Gene frequencies
The dataset used to generate the distribution of V gene families graphDownload
The dataset used to generate the distribution of D gene families graphDownload
The dataset used to generate the relative frequency of V gene usage graphDownload
The dataset used to generate the relative frequency of D gene usage graphDownload
The dataset used to generate the relative frequency of J gene usage graphDownload
The dataset used to generate the relative frequency of the D reading frame graphDownload
CDR3 Characteristics
The dataset used to generate the CDR3 length frequency graphDownload
The dataset used to generate the Amino Acid Composition in the CDR3 graphDownload
Heatmaps
The data used to generate the VD heatmap for $sample.Download
The data used to generate the VJ heatmap for $sample.Download
The data used to generate the DJ heatmap for $sample.Download
Circos
The data used to generate the VD Circos plots for $sample.Download
The data used to generate the VJ Circos plots for $sample.Download
The data used to generate the DJ Circos plots for $sample.Download
A frequency count of V Gene + J Gene + CDR3Download
Clonality
The dataset used to calculate clonality score (Unique based on clonaltype, $clonalType)Download
Sequences that are present in more than one replicateDownload
" >> $outputFile - -cat $dir/naive_downloads.htm >> $outputFile - -echo "
" >> $outputFile +#!/bin/bash + +inputFile=$1 +outputDir=$3 +outputFile=$3/index.html #$2 +clonalType=$4 +species=$5 +locus=$6 +filterproductive=$7 +clonality_method=$8 + +dir="$(cd "$(dirname "$0")" && pwd)" +useD="false" +if grep -q "$species.*${locus}D" "$dir/genes.txt" ; then + echo "species D region in reference db" + useD="true" +fi +echo "$species" +if [[ "$species" == *"custom"* ]] ; then + loci=(${locus//;/ }) + useD="true" + echo "${loci[@]}" + if [[ "${#loci[@]}" -eq "2" ]] ; then + useD="false" + fi +fi +mkdir $3 +cp $dir/genes.txt $outputDir +Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive ${clonality_method} 2>&1 +cp $dir/tabber.js $outputDir +cp $dir/style.css $outputDir +cp $dir/script.js $outputDir +cp $dir/jquery-1.11.0.min.js $outputDir +cp $dir/pure-min.css $outputDir +cp $dir/IGH_junctie_analyse.png $outputDir +samples=`cat $outputDir/samples.txt` + +echo "

Click here for the results

Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)
" > $2 +echo "" >> $2 +echo "" >> $2 +while IFS=, read sample all productive perc_prod productive_unique perc_prod_un unproductive perc_unprod unproductive_unique perc_unprod_un + do + echo "" >> $2 + echo "" >> $2 + if [[ "$productive" != "0" ]] ; then + echo "" >> $2 + echo "" >> $2 + echo "" >> $2 + echo "" >> $2 + else + echo "" >> $2 + fi +done < $outputDir/productive_counting.txt +echo "
Donor/ReplicateAllProductiveUnique ProductiveUnproductiveUnique Unproductive
$sample$all$productive (${perc_prod}%)$productive_unique (${perc_prod_un}%)$unproductive (${perc_unprod}%)$unproductive_unique (${perc_unprod_un}%)
No productive sequences!

" >> $2 +echo "Table showing the number and percentage of (unique) productive and unproductive sequences per donor and per replicate.
" >> $2 +echo "The definition of unique sequences is based on the clonal type definition filter setting chosen. " >> $2 +echo "
" >> $2 + +echo "Report on:" >> $outputFile + +mkdir $outputDir/circos +cp -R $dir/circos/* $outputDir/circos/ + +USECIRCOS="no" +path_to_circos=$(which circos) +if [ -x "$path_to_circos" ]; then + USECIRCOS="yes" +fi + +echo "Using Circos: $USECIRCOS" +sed -i "s%DATA_DIR%$outputDir/circos%" $outputDir/circos/circos.conf +for sample in $samples; do #output the samples to a file and create the circos plots with the R script output + echo " $sample" >> $outputFile + + if [[ "$USECIRCOS" != "yes" ]]; then + continue + fi + + circos_file="$outputDir/${sample}_VJ_circos.txt" + sed -i -- 's%/%:%g' $circos_file + echo -e -n "labels$(cat ${circos_file})" > ${circos_file} + echo "Circos tools command:" + echo "cat \"${circos_file}\" | parse-table -configfile $dir/circos/parse-table.conf 2>&1 | make-conf -dir $outputDir/circos/" + cat "${circos_file}" | parse-table -configfile $dir/circos/parse-table.conf 2>&1 | make-conf -dir $outputDir/circos/ + + echo "Circos command:" + echo "circos -conf $outputDir/circos/circos.conf 2>&1" + circos -conf $outputDir/circos/circos.conf 2>&1 + mv $outputDir/circos/circos.png $outputDir/circosVJ_${sample}.png + mv $outputDir/circos/circos.svg $outputDir/circosVJ_${sample}.svg + + + if [[ "$useD" == "true" ]] ; then + circos_file="$outputDir/${sample}_VD_circos.txt" + sed -i -- 's%/%:%g' $circos_file + echo -e -n "labels$(cat ${circos_file})" > ${circos_file} + cat "${circos_file}" | parse-table -configfile $dir/circos/parse-table.conf 2>&1 | make-conf -dir $outputDir/circos/ + sed -i -- 's%/%:%g' $outputDir/circos/cells.txt + circos -conf $outputDir/circos/circos.conf 2>&1 + mv $outputDir/circos/circos.png $outputDir/circosVD_${sample}.png + mv $outputDir/circos/circos.svg $outputDir/circosVD_${sample}.svg + + circos_file="$outputDir/${sample}_DJ_circos.txt" + sed -i -- 's%/%:%g' $circos_file + echo -e -n "labels$(cat ${circos_file})" > ${circos_file} + cat "${circos_file}" | parse-table -configfile $dir/circos/parse-table.conf 2>&1 | make-conf -dir $outputDir/circos/ + sed -i -- 's%/%:%g' $outputDir/circos/cells.txt + circos -conf $outputDir/circos/circos.conf 2>&1 + mv $outputDir/circos/circos.png $outputDir/circosDJ_${sample}.png + mv $outputDir/circos/circos.svg $outputDir/circosDJ_${sample}.svg + fi +done +echo "" >> $outputFile +echo "" >> $outputFile +echo "" >> $outputFile +echo "" >> $outputFile +echo "" >> $outputFile +echo "
" >> $outputFile + + +echo "" >> $outputFile +if [[ "$useD" == "true" ]] ; then + echo "" >> $outputFile +fi +echo "" >> $outputFile +if [[ "$useD" == "true" ]] ; then + echo "" >> $outputFile +fi +echo "
" >> $outputFile + +echo "" >> $outputFile + +cat $dir/naive_gene_freq.htm >> $outputFile + +echo "
" >> $outputFile + +echo "
" >> $outputFile +echo "
" >> $outputFile +echo "" >> $outputFile + + +echo "" >> $outputFile +echo "" >> $outputFile +while read Sample median +do + echo "" >> $outputFile +done < $outputDir/AAMedianBySample.txt +echo "
DonorMedian CDR3 Length
$Sample$median
" >> $outputFile + +cat $dir/naive_cdr3_char.htm >> $outputFile + +echo "
" >> $outputFile + +#Heatmaps + +count=1 +echo "
" >> $outputFile +for sample in $samples; do + echo "
" >> $outputFile + if [[ "$useD" == "true" ]] ; then + echo "" >> $outputFile + fi + echo "" >> $outputFile + if [[ "$useD" == "true" ]] ; then + echo "" >> $outputFile + fi + echo "
" >> $outputFile + count=$((count+1)) +done + +cat $dir/naive_heatmap.htm >> $outputFile + +echo "
" >> $outputFile + +echo "
" >> $outputFile +for sample in $samples; do + echo "" >> $outputFile +done +echo "
IDInclude
$sample
" >> $outputFile +echo "
" >> $outputFile +echo "
" >> $outputFile +echo "
" >> $outputFile + +cat $dir/naive_compare.htm >> $outputFile + +echo "
" >> $outputFile + + +#circos + +if [[ "$USECIRCOS" == "yes" ]]; then + + echo "
" >> $outputFile + for sample in $samples; do + echo "
" >> $outputFile + if [[ "$useD" == "true" ]] ; then + echo "
" >> $outputFile + fi + echo "" >> $outputFile + if [[ "$useD" == "true" ]] ; then + echo "" >> $outputFile + fi + echo "
V-D
V-J
D-J
" >> $outputFile + count=$((count+1)) + done + + cat $dir/naive_circos.htm >> $outputFile + + echo "
" >> $outputFile +fi +#echo "
" >> $outputFile + +hasReplicateColumn="$(if head -n 1 $inputFile | grep -q 'Replicate'; then echo 'Yes'; else echo 'No'; fi)" +echo "$hasReplicateColumn" +#if its a 'new' merged file with replicate info +if [[ "$hasReplicateColumn" == "Yes" && "${clonality_method}" != "none" ]] ; then + if [[ "${clonality_method}" == "boyd" ]] ; then + echo "
" >> $outputFile + else + echo "
" >> $outputFile + fi + + for sample in $samples; do + echo "${clonality_method}" + + echo "
" >> $outputFile + + if [[ "${clonality_method}" == "boyd" ]] ; then + clonalityScore="$(cat $outputDir/lymphclon_clonality_${sample}.txt)" + echo "" >> $outputFile + fi + + #replicate,reads,squared + echo "" >> $outputFile + while read replicate reads squared + do + echo "" >> $outputFile + done < $outputDir/ReplicateReads_$sample.txt + + #sum of reads and reads squared + while read readsSum squaredSum + do + echo "" >> $outputFile + done < $outputDir/ReplicateSumReads_$sample.txt + + echo "" >> $outputFile + + #overview + echo "" >> $outputFile + while read type count weight weightedCount + do + if [[ "$type" -eq "1" ]]; then + echo "" >> $outputFile + else + echo "" >> $outputFile + fi + done < $outputDir/ClonalityOverView_$sample.txt + echo "
Clonality Score: $clonalityScore
Replicate IDNumber of Sequences
$replicate$reads
Sum$readsSum
Number of replicates containing the coincidenceNumber of sequences shared between replicates
$type$count
$type$count
" >> $outputFile + done + + cat $dir/naive_clonality.htm >> $outputFile + + echo "
" >> $outputFile +fi + +#hasJunctionData="$(if head -n 1 $inputFile | grep -qE '3V.REGION.trimmed.nt.nb'; then echo 'Yes'; else echo 'No'; fi)" + +#if [[ "$hasJunctionData" == "Yes" ]] ; then +if [ -a "$outputDir/junctionAnalysisProd_mean_wD.txt" ] ; then + echo "
" >> $outputFile + echo "" >> $outputFile + + echo "

Unique rearrangements with a V, D and J gene assigned

" >> $outputFile + echo "" >> $outputFile + while read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median + do + echo "" >> $outputFile + done < $outputDir/junctionAnalysisProd_mean_wD.txt + echo "
Productive mean
DonorNumber of sequencesV.DELP1N1P2DEL.DD.DELP3N2P4DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELD$DDEL$P3$N2$P4$DELJ$TotalDel$TotalN$TotalP$median
" >> $outputFile + + echo "" >> $outputFile + while read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median + do + echo "" >> $outputFile + done < $outputDir/junctionAnalysisUnProd_mean_wD.txt + echo "
Unproductive mean
DonorNumber of sequencesV.DELP1N1P2DEL.DD.DELP3N2P4DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELD$DDEL$P3$N2$P4$DELJ$TotalDel$TotalN$TotalP-
" >> $outputFile + + echo "" >> $outputFile + while read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median + do + echo "" >> $outputFile + done < $outputDir/junctionAnalysisProd_median_wD.txt + echo "
Productive median
DonorNumber of sequencesV.DELP1N1P2DEL.DD.DELP3N2P4DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELD$DDEL$P3$N2$P4$DELJ$TotalDel$TotalN$TotalP$median
" >> $outputFile + + echo "" >> $outputFile + while read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median + do + echo "" >> $outputFile + done < $outputDir/junctionAnalysisUnProd_median_wD.txt + echo "
Unproductive median
DonorNumber of sequencesV.DELP1N1P2DEL.DD.DELP3N2P4DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELD$DDEL$P3$N2$P4$DELJ$TotalDel$TotalN$TotalP-
" >> $outputFile + + # again for no-d + echo "

Unique rearrangements with only a V and J gene assigned

" >> $outputFile + echo "" >> $outputFile + while read Sample unique VDEL P1 N1 P2 DELJ TotalDel TotalN TotalP median + do + echo "" >> $outputFile + done < $outputDir/junctionAnalysisProd_mean_nD.txt + echo "
Productive mean
DonorNumber of sequencesV.DELP1NP2DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELJ$TotalDel$TotalN$TotalP$median
" >> $outputFile + + echo "" >> $outputFile + while read Sample unique VDEL P1 N1 P2 DELJ TotalDel TotalN TotalP median + do + echo "" >> $outputFile + done < $outputDir/junctionAnalysisUnProd_mean_nD.txt + echo "
Unproductive mean
DonorNumber of sequencesV.DELP1NP2DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELJ$TotalDel$TotalN$TotalP-
" >> $outputFile + + echo "" >> $outputFile + while read Sample unique VDEL P1 N1 P2 DELJ TotalDel TotalN TotalP median + do + echo "" >> $outputFile + done < $outputDir/junctionAnalysisProd_median_nD.txt + echo "
Productive median
DonorNumber of sequencesV.DELP1NP2DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELJ$TotalDel$TotalN$TotalP$median
" >> $outputFile + + echo "" >> $outputFile + while read Sample unique VDEL P1 N1 P2 DELJ TotalDel TotalN TotalP median + do + echo "" >> $outputFile + done < $outputDir/junctionAnalysisUnProd_median_nD.txt + echo "
Unproductive median
DonorNumber of sequencesV.DELP1NP2DEL.JTotal.DelTotal.NTotal.PCDR3.Length
$Sample$unique$VDEL$P1$N1$P2$DELJ$TotalDel$TotalN$TotalP-
" >> $outputFile + + cat $dir/naive_junction.htm >> $outputFile + + echo "
" >> $outputFile +fi + +echo "
" >> $outputFile +echo "" >> $outputFile +echo "" >> $outputFile +echo "" >> $outputFile +echo "" >> $outputFile + +echo "" >> $outputFile +if [[ "$useD" == "true" ]] ; then + echo "" >> $outputFile +fi + +echo "" >> $outputFile +if [[ "$useD" == "true" ]] ; then + echo "" >> $outputFile +fi +echo "" >> $outputFile +echo "" >> $outputFile + +echo "" >> $outputFile +echo "" >> $outputFile +echo "" >> $outputFile + +echo "" >> $outputFile +for sample in $samples; do + if [[ "$useD" == "true" ]] ; then + echo "" >> $outputFile + fi + echo "" >> $outputFile + if [[ "$useD" == "true" ]] ; then + echo "" >> $outputFile + fi +done + +echo "" >> $outputFile +for sample in $samples; do + if [[ "$useD" == "true" ]] ; then + echo "" >> $outputFile + fi + echo "" >> $outputFile + if [[ "$useD" == "true" ]] ; then + echo "" >> $outputFile + fi +done + +#echo "" >> $outputFile + +echo "" >> $outputFile +echo "" >> $outputFile +# echo "" >> $outputFile + +echo "
DescriptionLink
The filtered datasetDownload
Gene frequencies
The dataset used to generate the distribution of V gene families graphDownload
The dataset used to generate the distribution of D gene families graphDownload
The dataset used to generate the relative frequency of V gene usage graphDownload
The dataset used to generate the relative frequency of D gene usage graphDownload
The dataset used to generate the relative frequency of J gene usage graphDownload
The dataset used to generate the relative frequency of the D reading frame graphDownload
CDR3 Characteristics
The dataset used to generate the CDR3 length frequency graphDownload
The dataset used to generate the Amino Acid Composition in the CDR3 graphDownload
Heatmaps
The data used to generate the VD heatmap for $sample.Download
The data used to generate the VJ heatmap for $sample.Download
The data used to generate the DJ heatmap for $sample.Download
Circos
The data used to generate the VD Circos plots for $sample.Download
The data used to generate the VJ Circos plots for $sample.Download
The data used to generate the DJ Circos plots for $sample.Download
A frequency count of V Gene + J Gene + CDR3Download
Clonality
The dataset used to calculate clonality score (Unique based on clonaltype, $clonalType)Download
Sequences that are present in more than one replicateDownload
" >> $outputFile + +cat $dir/naive_downloads.htm >> $outputFile + +echo "
" >> $outputFile