comparison imgt_concatenate.sh @ 1:b360a373835f draft

Uploaded
author davidvanzessen
date Thu, 24 Nov 2016 10:27:14 -0500
parents d3cf09f5a1a6
children d77d4700fd0a
comparison
equal deleted inserted replaced
0:d3cf09f5a1a6 1:b360a373835f
30 30
31 function concat_imgt_files { 31 function concat_imgt_files {
32 indir=$1 32 indir=$1
33 outdir=$2 33 outdir=$2
34 start_line=$3 #line # to start at, 2 to skip header 34 start_line=$3 #line # to start at, 2 to skip header
35 cat `find $indir/ -name "1_*"` | tail -n+${start_line} >> "$outdir/1_Summary.txt" 35 id=$4
36 cat `find $indir/ -name "2_*"` | tail -n+${start_line} >> "$outdir/2_IMGT-gapped-nt-sequences.txt" 36 cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/1_Summary.txt"
37 cat `find $indir/ -name "3_*"` | tail -n+${start_line} >> "$outdir/3_Nt-sequences.txt" 37 cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt"
38 cat `find $indir/ -name "4_*"` | tail -n+${start_line} >> "$outdir/4_IMGT-gapped-AA-sequences.txt" 38 cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt"
39 cat `find $indir/ -name "5_*"` | tail -n+${start_line} >> "$outdir/5_AA-sequences.txt" 39 cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt"
40 cat `find $indir/ -name "6_*"` | tail -n+${start_line} >> "$outdir/6_Junction.txt" 40 cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt"
41 cat `find $indir/ -name "7_*"` | tail -n+${start_line} >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt" 41 cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/6_Junction.txt"
42 cat `find $indir/ -name "8_*"` | tail -n+${start_line} >> "$outdir/8_V-REGION-nt-mutation-statistics.txt" 42 cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt"
43 cat `find $indir/ -name "9_*"` | tail -n+${start_line} >> "$outdir/9_V-REGION-AA-change-statistics.txt" 43 cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt"
44 cat `find $indir/ -name "10_*"` | tail -n+${start_line} >> "$outdir/10_V-REGION-mutation-hotspots.txt" 44 cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt"
45 cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt"
45 } 46 }
46 47
47 echo "Unpacking IMGT file 1.." 48 echo "Unpacking IMGT file 1.."
48 imgt_unpack ${inputs[0]} "$workdir/input1" 49 imgt_unpack ${inputs[0]} "$workdir/input1"
49 50
50 echo "Concatenating IMGT file 1..." 51 echo "Concatenating IMGT file 1..."
51 concat_imgt_files "$workdir/input1" "$workdir/output" 1 52 id=${inputs[1]}
53 concat_imgt_files "$workdir/input1" "$workdir/output" 1 $id
52 54
53 remaining_inputs=("${inputs[@]:1}") 55 remaining_inputs=("${inputs[@]:2}")
54 56
55 i="2" 57 i="0"
56 for input in "${remaining_inputs[@]}" 58 while [ $i -lt ${#remaining_inputs[@]} ]; do
57 do 59 j=$((i+1))
58 echo "Unpacking IMGT file $i.." 60 input="${remaining_inputs[$i]}"
61 id="${remaining_inputs[$j]}"
62
63 echo "Unpacking IMGT file $j.."
59 current_dir="$workdir/input${i}" 64 current_dir="$workdir/input${i}"
60 imgt_unpack "${input}" "${current_dir}" 65 imgt_unpack "${input}" "${current_dir}"
61 echo "Concatenating IMGT file $1..." 66 echo "Concatenating IMGT file $1..."
62 concat_imgt_files "${current_dir}" "$workdir/output" 2 67 concat_imgt_files "${current_dir}" "$workdir/output" 2 $id
63 i=$((i+1)) 68 i=$((i+2))
64 done 69 done
70
65 71
66 echo "Creating new IMGT zip" 72 echo "Creating new IMGT zip"
67 cd "$workdir/output" 73 cd "$workdir/output"
68 tar cfJ "$output" * 74 tar cfJ "$output" *
69 75
70 #awk to fix the sequence numbers repeating? 76 #awk to fix the sequence numbers repeating?
71 77
72 echo "Done" 78 echo "Done"
79
80 exit 0
81
82 i="1"
83 for input in "${remaining_inputs[@]}"
84 do
85 echo "Unpacking IMGT file $i.."
86 current_dir="$workdir/input${i}"
87 imgt_unpack "${input}" "${current_dir}"
88 echo "Concatenating IMGT file $1..."
89 concat_imgt_files "${current_dir}" "$workdir/output" 2 $id
90 i=$((i+1))
91 done
92