Mercurial > repos > xuebing > sharplabtool
comparison tools/human_genome_variation/hilbertvis.sh @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env bash | |
2 | |
3 input_file="$1" | |
4 output_file="$2" | |
5 chromInfo_file="$3" | |
6 chrom="$4" | |
7 score_col="$5" | |
8 hilbert_curve_level="$6" | |
9 summarization_mode="$7" | |
10 chrom_col="$8" | |
11 start_col="$9" | |
12 end_col="${10}" | |
13 strand_col="${11}" | |
14 | |
15 ## use first sequence if chrom filed is empty | |
16 if [ -z "$chrom" ]; then | |
17 chrom=$( head -n 1 "$input_file" | cut -f$chrom_col ) | |
18 fi | |
19 | |
20 ## get sequence length | |
21 if [ ! -r "$chromInfo_file" ]; then | |
22 echo "Unable to read chromInfo_file $chromInfo_file" 1>&2 | |
23 exit 1 | |
24 fi | |
25 | |
26 chrom_len=$( awk '$1 == chrom {print $2}' chrom=$chrom $chromInfo_file ) | |
27 | |
28 ## error if we can't find the chrom_len | |
29 if [ -z "$chrom_len" ]; then | |
30 echo "Can't find length for sequence \"$chrom\" in chromInfo_file $chromInfo_file" 1>&2 | |
31 exit 1 | |
32 fi | |
33 | |
34 ## make sure chrom_len is positive | |
35 if [ $chrom_len -le 0 ]; then | |
36 echo "sequence \"$chrom\" length $chrom_len <= 0" 1>&2 | |
37 exit 1 | |
38 fi | |
39 | |
40 ## modify R script depending on the inclusion of a score column, strand information | |
41 input_cols="\$${start_col}, \$${end_col}" | |
42 col_types='beg=0, end=0, strand=""' | |
43 | |
44 # if strand_col == 0 (strandCol metadata is not set), assume everything's on the plus strand | |
45 if [ $strand_col -ne 0 ]; then | |
46 input_cols="${input_cols}, \$${strand_col}" | |
47 else | |
48 input_cols="${input_cols}, \\\"+\\\"" | |
49 fi | |
50 | |
51 # set plot value (either from data or use a constant value) | |
52 if [ $score_col -eq -1 ]; then | |
53 value=1 | |
54 else | |
55 input_cols="${input_cols}, \$${score_col}" | |
56 col_types="${col_types}, score=0" | |
57 value='chunk$score[i]' | |
58 fi | |
59 | |
60 R --vanilla &> /dev/null <<endOfCode | |
61 library(HilbertVis); | |
62 | |
63 chrom_len <- ${chrom_len}; | |
64 chunk_size <- 1000; | |
65 interval_count <- 0; | |
66 invalid_strand <- 0; | |
67 | |
68 awk_cmd <- paste( | |
69 "awk 'BEGIN{FS=\"\t\";OFS=\"\t\"}", | |
70 "\$${chrom_col} == \"${chrom}\"", | |
71 "{print ${input_cols}}' ${input_file}" | |
72 ); | |
73 | |
74 col_types <- list(${col_types}); | |
75 vec <- vector(mode="numeric", length=chrom_len); | |
76 conn <- pipe(description=awk_cmd, open="r"); | |
77 | |
78 repeat { | |
79 chunk <- scan(file=conn, what=col_types, sep="\t", nlines=chunk_size, quiet=TRUE); | |
80 | |
81 if ((rows <- length(chunk\$beg)) == 0) | |
82 break; | |
83 | |
84 interval_count <- interval_count + rows; | |
85 | |
86 for (i in 1:rows) { | |
87 if (chunk\$strand[i] == '+') { | |
88 start <- chunk\$beg[i] + 1; | |
89 stop <- chunk\$end[i]; | |
90 } else if (chunk\$strand[i] == '-') { | |
91 start <- chrom_len - chunk\$end[i] - 1; | |
92 stop <- chrom_len - chunk\$beg[i]; | |
93 } else { | |
94 invalid_strand <- invalid_strand + 1; | |
95 interval_count <- interval_count - 1; | |
96 next; | |
97 } | |
98 vec[start:stop] <- ${value}; | |
99 } | |
100 } | |
101 | |
102 close(conn); | |
103 | |
104 hMat <- hilbertImage(vec, level=$hilbert_curve_level, mode="$summarization_mode"); | |
105 pdf(file="$output_file", onefile=TRUE, width=8, height=10.5, paper="letter"); | |
106 showHilbertImage(hMat); | |
107 dev.off(); | |
108 endOfCode | |
109 |