| 
0
 | 
     1 #!/bin/bash
 | 
| 
 | 
     2 dir="$(cd "$(dirname "$0")" && pwd)"
 | 
| 
 | 
     3 
 | 
| 
 | 
     4 args=("$@")
 | 
| 
 | 
     5 output=$1
 | 
| 
 | 
     6 inputs=("${args[@]:1}")
 | 
| 
 | 
     7 
 | 
| 
 | 
     8 workdir="$PWD"
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 echo "Output: $output"
 | 
| 
 | 
    11 echo "Inputs: ${inputs[@]}"
 | 
| 
 | 
    12 
 | 
| 
 | 
    13 mkdir "$workdir/output"
 | 
| 
 | 
    14 
 | 
| 
 | 
    15 function imgt_unpack {
 | 
| 
 | 
    16 	local imgt_zip=$1
 | 
| 
 | 
    17 	local outdir=$2
 | 
| 
 | 
    18 	if [ ! -d "$outdir" ]; then
 | 
| 
 | 
    19 		mkdir "$outdir"
 | 
| 
 | 
    20 	fi
 | 
| 
 | 
    21 	local type="`file $imgt_zip`"
 | 
| 
 | 
    22 	if [[ "$type" == *"Zip archive"* ]] ; then
 | 
| 
 | 
    23 		unzip $imgt_zip -d $outdir
 | 
| 
 | 
    24 	elif [[ "$type" == *"XZ compressed data"* ]] ; then
 | 
| 
 | 
    25 		mkdir -p $outdir
 | 
| 
 | 
    26 		echo "tar -xJf $imgt_zip -C $outdir"
 | 
| 
 | 
    27 		tar -xJf $imgt_zip -C $outdir
 | 
| 
 | 
    28 	fi
 | 
| 
 | 
    29 }
 | 
| 
 | 
    30 
 | 
| 
 | 
    31 function concat_imgt_files {
 | 
| 
 | 
    32 	indir=$1
 | 
| 
 | 
    33 	outdir=$2
 | 
| 
 | 
    34 	start_line=$3 #line # to start at, 2 to skip header
 | 
| 
1
 | 
    35 	id=$4
 | 
| 
2
 | 
    36 	if [[ "${start_line}" == "1" ]] ; then
 | 
| 
 | 
    37 		cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/1_Summary.txt"
 | 
| 
 | 
    38 		cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt"
 | 
| 
 | 
    39 		cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt"
 | 
| 
 | 
    40 		cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt"
 | 
| 
 | 
    41 		cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt"
 | 
| 
 | 
    42 		cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/6_Junction.txt"
 | 
| 
 | 
    43 		cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt"
 | 
| 
 | 
    44 		cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt"
 | 
| 
 | 
    45 		cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt"
 | 
| 
 | 
    46 		cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt"
 | 
| 
 | 
    47 	else 
 | 
| 
 | 
    48 		cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/1_Summary.txt"
 | 
| 
 | 
    49 		cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt"
 | 
| 
 | 
    50 		cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt"
 | 
| 
 | 
    51 		cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt"
 | 
| 
 | 
    52 		cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt"
 | 
| 
 | 
    53 		cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/6_Junction.txt"
 | 
| 
 | 
    54 		cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt"
 | 
| 
 | 
    55 		cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt"
 | 
| 
 | 
    56 		cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt"
 | 
| 
 | 
    57 		cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt"
 | 
| 
 | 
    58 	fi
 | 
| 
 | 
    59 	
 | 
| 
0
 | 
    60 }
 | 
| 
 | 
    61 
 | 
| 
 | 
    62 echo "Unpacking IMGT file 1.."
 | 
| 
 | 
    63 imgt_unpack ${inputs[0]} "$workdir/input1"
 | 
| 
 | 
    64 
 | 
| 
 | 
    65 echo "Concatenating IMGT file 1..."
 | 
| 
1
 | 
    66 id=${inputs[1]}
 | 
| 
 | 
    67 concat_imgt_files "$workdir/input1" "$workdir/output" 1 $id
 | 
| 
0
 | 
    68 
 | 
| 
1
 | 
    69 remaining_inputs=("${inputs[@]:2}")
 | 
| 
0
 | 
    70 
 | 
| 
1
 | 
    71 i="0"
 | 
| 
 | 
    72 while [ $i -lt ${#remaining_inputs[@]} ]; do
 | 
| 
 | 
    73 	j=$((i+1))
 | 
| 
 | 
    74 	input="${remaining_inputs[$i]}"
 | 
| 
 | 
    75 	id="${remaining_inputs[$j]}"
 | 
| 
 | 
    76 	
 | 
| 
 | 
    77 	echo "Unpacking IMGT file $j.."
 | 
| 
0
 | 
    78 	current_dir="$workdir/input${i}"
 | 
| 
 | 
    79 	imgt_unpack "${input}" "${current_dir}"
 | 
| 
 | 
    80 	echo "Concatenating IMGT file $1..."
 | 
| 
1
 | 
    81 	concat_imgt_files "${current_dir}" "$workdir/output" 2 $id
 | 
| 
 | 
    82 	i=$((i+2))
 | 
| 
0
 | 
    83 done
 | 
| 
 | 
    84 
 | 
| 
2
 | 
    85 echo "`head $workdir/output/1_Summary.txt`"
 | 
| 
 | 
    86 
 | 
| 
1
 | 
    87 
 | 
| 
0
 | 
    88 echo "Creating new IMGT zip"
 | 
| 
 | 
    89 cd "$workdir/output"
 | 
| 
 | 
    90 tar cfJ "$output" *
 | 
| 
 | 
    91 
 | 
| 
 | 
    92 #awk to fix the sequence numbers repeating?
 | 
| 
 | 
    93 
 | 
| 
 | 
    94 echo "Done"
 | 
| 
1
 | 
    95 
 | 
| 
 | 
    96 exit 0
 | 
| 
 | 
    97 
 | 
| 
 | 
    98 i="1"
 | 
| 
 | 
    99 for input in "${remaining_inputs[@]}"
 | 
| 
 | 
   100 do
 | 
| 
 | 
   101 	echo "Unpacking IMGT file $i.."
 | 
| 
 | 
   102 	current_dir="$workdir/input${i}"
 | 
| 
 | 
   103 	imgt_unpack "${input}" "${current_dir}"
 | 
| 
 | 
   104 	echo "Concatenating IMGT file $1..."
 | 
| 
 | 
   105 	concat_imgt_files "${current_dir}" "$workdir/output" 2 $id
 | 
| 
 | 
   106 	i=$((i+1))
 | 
| 
 | 
   107 done
 | 
| 
 | 
   108 
 |