| 0 | 1 #!/bin/bash | 
|  | 2 dir="$(cd "$(dirname "$0")" && pwd)" | 
|  | 3 | 
|  | 4 args=("$@") | 
|  | 5 output=$1 | 
|  | 6 inputs=("${args[@]:1}") | 
|  | 7 | 
|  | 8 workdir="$PWD" | 
|  | 9 | 
|  | 10 echo "Output: $output" | 
|  | 11 echo "Inputs: ${inputs[@]}" | 
|  | 12 | 
|  | 13 mkdir "$workdir/output" | 
|  | 14 | 
|  | 15 function imgt_unpack { | 
|  | 16 	local imgt_zip=$1 | 
|  | 17 	local outdir=$2 | 
|  | 18 	if [ ! -d "$outdir" ]; then | 
|  | 19 		mkdir "$outdir" | 
|  | 20 	fi | 
|  | 21 	local type="`file $imgt_zip`" | 
|  | 22 	if [[ "$type" == *"Zip archive"* ]] ; then | 
|  | 23 		unzip $imgt_zip -d $outdir | 
|  | 24 	elif [[ "$type" == *"XZ compressed data"* ]] ; then | 
|  | 25 		mkdir -p $outdir | 
|  | 26 		echo "tar -xJf $imgt_zip -C $outdir" | 
|  | 27 		tar -xJf $imgt_zip -C $outdir | 
|  | 28 	fi | 
|  | 29 } | 
|  | 30 | 
|  | 31 function concat_imgt_files { | 
|  | 32 	indir=$1 | 
|  | 33 	outdir=$2 | 
|  | 34 	start_line=$3 #line # to start at, 2 to skip header | 
| 1 | 35 	id=$4 | 
| 2 | 36 	if [[ "${start_line}" == "1" ]] ; then | 
|  | 37 		cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/1_Summary.txt" | 
|  | 38 		cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt" | 
|  | 39 		cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt" | 
|  | 40 		cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt" | 
|  | 41 		cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt" | 
|  | 42 		cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/6_Junction.txt" | 
|  | 43 		cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt" | 
|  | 44 		cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt" | 
|  | 45 		cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt" | 
|  | 46 		cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt" | 
|  | 47 	else | 
|  | 48 		cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/1_Summary.txt" | 
|  | 49 		cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt" | 
|  | 50 		cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt" | 
|  | 51 		cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt" | 
|  | 52 		cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt" | 
|  | 53 		cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/6_Junction.txt" | 
|  | 54 		cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt" | 
|  | 55 		cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt" | 
|  | 56 		cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt" | 
|  | 57 		cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt" | 
|  | 58 	fi | 
|  | 59 | 
| 0 | 60 } | 
|  | 61 | 
|  | 62 echo "Unpacking IMGT file 1.." | 
|  | 63 imgt_unpack ${inputs[0]} "$workdir/input1" | 
|  | 64 | 
|  | 65 echo "Concatenating IMGT file 1..." | 
| 1 | 66 id=${inputs[1]} | 
|  | 67 concat_imgt_files "$workdir/input1" "$workdir/output" 1 $id | 
| 0 | 68 | 
| 1 | 69 remaining_inputs=("${inputs[@]:2}") | 
| 0 | 70 | 
| 1 | 71 i="0" | 
|  | 72 while [ $i -lt ${#remaining_inputs[@]} ]; do | 
|  | 73 	j=$((i+1)) | 
|  | 74 	input="${remaining_inputs[$i]}" | 
|  | 75 	id="${remaining_inputs[$j]}" | 
|  | 76 | 
|  | 77 	echo "Unpacking IMGT file $j.." | 
| 0 | 78 	current_dir="$workdir/input${i}" | 
|  | 79 	imgt_unpack "${input}" "${current_dir}" | 
|  | 80 	echo "Concatenating IMGT file $1..." | 
| 1 | 81 	concat_imgt_files "${current_dir}" "$workdir/output" 2 $id | 
|  | 82 	i=$((i+2)) | 
| 0 | 83 done | 
|  | 84 | 
| 2 | 85 echo "`head $workdir/output/1_Summary.txt`" | 
|  | 86 | 
| 1 | 87 | 
| 0 | 88 echo "Creating new IMGT zip" | 
|  | 89 cd "$workdir/output" | 
|  | 90 tar cfJ "$output" * | 
|  | 91 | 
|  | 92 #awk to fix the sequence numbers repeating? | 
|  | 93 | 
|  | 94 echo "Done" | 
| 1 | 95 | 
|  | 96 exit 0 | 
|  | 97 | 
|  | 98 i="1" | 
|  | 99 for input in "${remaining_inputs[@]}" | 
|  | 100 do | 
|  | 101 	echo "Unpacking IMGT file $i.." | 
|  | 102 	current_dir="$workdir/input${i}" | 
|  | 103 	imgt_unpack "${input}" "${current_dir}" | 
|  | 104 	echo "Concatenating IMGT file $1..." | 
|  | 105 	concat_imgt_files "${current_dir}" "$workdir/output" 2 $id | 
|  | 106 	i=$((i+1)) | 
|  | 107 done | 
|  | 108 |