0
|
1 #!/usr/bin/env bash
|
|
2
|
|
3 input_file="$1"
|
|
4 output_file="$2"
|
|
5 chromInfo_file="$3"
|
|
6 chrom="$4"
|
|
7 score_col="$5"
|
|
8 hilbert_curve_level="$6"
|
|
9 summarization_mode="$7"
|
|
10 chrom_col="$8"
|
|
11 start_col="$9"
|
|
12 end_col="${10}"
|
|
13 strand_col="${11}"
|
|
14
|
|
15 ## use first sequence if chrom filed is empty
|
|
16 if [ -z "$chrom" ]; then
|
|
17 chrom=$( head -n 1 "$input_file" | cut -f$chrom_col )
|
|
18 fi
|
|
19
|
|
20 ## get sequence length
|
|
21 if [ ! -r "$chromInfo_file" ]; then
|
|
22 echo "Unable to read chromInfo_file $chromInfo_file" 1>&2
|
|
23 exit 1
|
|
24 fi
|
|
25
|
|
26 chrom_len=$( awk '$1 == chrom {print $2}' chrom=$chrom $chromInfo_file )
|
|
27
|
|
28 ## error if we can't find the chrom_len
|
|
29 if [ -z "$chrom_len" ]; then
|
|
30 echo "Can't find length for sequence \"$chrom\" in chromInfo_file $chromInfo_file" 1>&2
|
|
31 exit 1
|
|
32 fi
|
|
33
|
|
34 ## make sure chrom_len is positive
|
|
35 if [ $chrom_len -le 0 ]; then
|
|
36 echo "sequence \"$chrom\" length $chrom_len <= 0" 1>&2
|
|
37 exit 1
|
|
38 fi
|
|
39
|
|
40 ## modify R script depending on the inclusion of a score column, strand information
|
|
41 input_cols="\$${start_col}, \$${end_col}"
|
|
42 col_types='beg=0, end=0, strand=""'
|
|
43
|
|
44 # if strand_col == 0 (strandCol metadata is not set), assume everything's on the plus strand
|
|
45 if [ $strand_col -ne 0 ]; then
|
|
46 input_cols="${input_cols}, \$${strand_col}"
|
|
47 else
|
|
48 input_cols="${input_cols}, \\\"+\\\""
|
|
49 fi
|
|
50
|
|
51 # set plot value (either from data or use a constant value)
|
|
52 if [ $score_col -eq -1 ]; then
|
|
53 value=1
|
|
54 else
|
|
55 input_cols="${input_cols}, \$${score_col}"
|
|
56 col_types="${col_types}, score=0"
|
|
57 value='chunk$score[i]'
|
|
58 fi
|
|
59
|
|
60 R --vanilla &> /dev/null <<endOfCode
|
|
61 library(HilbertVis);
|
|
62
|
|
63 chrom_len <- ${chrom_len};
|
|
64 chunk_size <- 1000;
|
|
65 interval_count <- 0;
|
|
66 invalid_strand <- 0;
|
|
67
|
|
68 awk_cmd <- paste(
|
|
69 "awk 'BEGIN{FS=\"\t\";OFS=\"\t\"}",
|
|
70 "\$${chrom_col} == \"${chrom}\"",
|
|
71 "{print ${input_cols}}' ${input_file}"
|
|
72 );
|
|
73
|
|
74 col_types <- list(${col_types});
|
|
75 vec <- vector(mode="numeric", length=chrom_len);
|
|
76 conn <- pipe(description=awk_cmd, open="r");
|
|
77
|
|
78 repeat {
|
|
79 chunk <- scan(file=conn, what=col_types, sep="\t", nlines=chunk_size, quiet=TRUE);
|
|
80
|
|
81 if ((rows <- length(chunk\$beg)) == 0)
|
|
82 break;
|
|
83
|
|
84 interval_count <- interval_count + rows;
|
|
85
|
|
86 for (i in 1:rows) {
|
|
87 if (chunk\$strand[i] == '+') {
|
|
88 start <- chunk\$beg[i] + 1;
|
|
89 stop <- chunk\$end[i];
|
|
90 } else if (chunk\$strand[i] == '-') {
|
|
91 start <- chrom_len - chunk\$end[i] - 1;
|
|
92 stop <- chrom_len - chunk\$beg[i];
|
|
93 } else {
|
|
94 invalid_strand <- invalid_strand + 1;
|
|
95 interval_count <- interval_count - 1;
|
|
96 next;
|
|
97 }
|
|
98 vec[start:stop] <- ${value};
|
|
99 }
|
|
100 }
|
|
101
|
|
102 close(conn);
|
|
103
|
|
104 hMat <- hilbertImage(vec, level=$hilbert_curve_level, mode="$summarization_mode");
|
|
105 pdf(file="$output_file", onefile=TRUE, width=8, height=10.5, paper="letter");
|
|
106 showHilbertImage(hMat);
|
|
107 dev.off();
|
|
108 endOfCode
|
|
109
|