Mercurial > repos > davidvanzessen > imgt_concatenate
comparison imgt_concatenate.sh @ 1:b360a373835f draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 24 Nov 2016 10:27:14 -0500 |
parents | d3cf09f5a1a6 |
children | d77d4700fd0a |
comparison
equal
deleted
inserted
replaced
0:d3cf09f5a1a6 | 1:b360a373835f |
---|---|
30 | 30 |
31 function concat_imgt_files { | 31 function concat_imgt_files { |
32 indir=$1 | 32 indir=$1 |
33 outdir=$2 | 33 outdir=$2 |
34 start_line=$3 #line # to start at, 2 to skip header | 34 start_line=$3 #line # to start at, 2 to skip header |
35 cat `find $indir/ -name "1_*"` | tail -n+${start_line} >> "$outdir/1_Summary.txt" | 35 id=$4 |
36 cat `find $indir/ -name "2_*"` | tail -n+${start_line} >> "$outdir/2_IMGT-gapped-nt-sequences.txt" | 36 cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/1_Summary.txt" |
37 cat `find $indir/ -name "3_*"` | tail -n+${start_line} >> "$outdir/3_Nt-sequences.txt" | 37 cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt" |
38 cat `find $indir/ -name "4_*"` | tail -n+${start_line} >> "$outdir/4_IMGT-gapped-AA-sequences.txt" | 38 cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt" |
39 cat `find $indir/ -name "5_*"` | tail -n+${start_line} >> "$outdir/5_AA-sequences.txt" | 39 cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt" |
40 cat `find $indir/ -name "6_*"` | tail -n+${start_line} >> "$outdir/6_Junction.txt" | 40 cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt" |
41 cat `find $indir/ -name "7_*"` | tail -n+${start_line} >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt" | 41 cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/6_Junction.txt" |
42 cat `find $indir/ -name "8_*"` | tail -n+${start_line} >> "$outdir/8_V-REGION-nt-mutation-statistics.txt" | 42 cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt" |
43 cat `find $indir/ -name "9_*"` | tail -n+${start_line} >> "$outdir/9_V-REGION-AA-change-statistics.txt" | 43 cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt" |
44 cat `find $indir/ -name "10_*"` | tail -n+${start_line} >> "$outdir/10_V-REGION-mutation-hotspots.txt" | 44 cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt" |
45 cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt" | |
45 } | 46 } |
46 | 47 |
47 echo "Unpacking IMGT file 1.." | 48 echo "Unpacking IMGT file 1.." |
48 imgt_unpack ${inputs[0]} "$workdir/input1" | 49 imgt_unpack ${inputs[0]} "$workdir/input1" |
49 | 50 |
50 echo "Concatenating IMGT file 1..." | 51 echo "Concatenating IMGT file 1..." |
51 concat_imgt_files "$workdir/input1" "$workdir/output" 1 | 52 id=${inputs[1]} |
53 concat_imgt_files "$workdir/input1" "$workdir/output" 1 $id | |
52 | 54 |
53 remaining_inputs=("${inputs[@]:1}") | 55 remaining_inputs=("${inputs[@]:2}") |
54 | 56 |
55 i="2" | 57 i="0" |
56 for input in "${remaining_inputs[@]}" | 58 while [ $i -lt ${#remaining_inputs[@]} ]; do |
57 do | 59 j=$((i+1)) |
58 echo "Unpacking IMGT file $i.." | 60 input="${remaining_inputs[$i]}" |
61 id="${remaining_inputs[$j]}" | |
62 | |
63 echo "Unpacking IMGT file $j.." | |
59 current_dir="$workdir/input${i}" | 64 current_dir="$workdir/input${i}" |
60 imgt_unpack "${input}" "${current_dir}" | 65 imgt_unpack "${input}" "${current_dir}" |
61 echo "Concatenating IMGT file $1..." | 66 echo "Concatenating IMGT file $1..." |
62 concat_imgt_files "${current_dir}" "$workdir/output" 2 | 67 concat_imgt_files "${current_dir}" "$workdir/output" 2 $id |
63 i=$((i+1)) | 68 i=$((i+2)) |
64 done | 69 done |
70 | |
65 | 71 |
66 echo "Creating new IMGT zip" | 72 echo "Creating new IMGT zip" |
67 cd "$workdir/output" | 73 cd "$workdir/output" |
68 tar cfJ "$output" * | 74 tar cfJ "$output" * |
69 | 75 |
70 #awk to fix the sequence numbers repeating? | 76 #awk to fix the sequence numbers repeating? |
71 | 77 |
72 echo "Done" | 78 echo "Done" |
79 | |
80 exit 0 | |
81 | |
82 i="1" | |
83 for input in "${remaining_inputs[@]}" | |
84 do | |
85 echo "Unpacking IMGT file $i.." | |
86 current_dir="$workdir/input${i}" | |
87 imgt_unpack "${input}" "${current_dir}" | |
88 echo "Concatenating IMGT file $1..." | |
89 concat_imgt_files "${current_dir}" "$workdir/output" 2 $id | |
90 i=$((i+1)) | |
91 done | |
92 |