annotate CREST.pl @ 0:acc8d8bfeb9a

Uploaded
author jjohnson
date Wed, 08 Feb 2012 16:59:24 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1 #!/usr/bin/perl -w
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
2 #use lib '/home/jwang2/AssembleTest/Detect/nonSJsrc/dev';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
3 use strict;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
4 use Carp;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
5 use Getopt::Long;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
6 use English;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
7 use Pod::Usage;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
8 use Data::Dumper;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
9 use Bio::DB::Sam;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
10 use Bio::DB::Sam::Constants;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
11 use Bio::SearchIO;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
12 use Bio::SeqIO;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
13 use File::Temp qw/ tempfile tempdir /;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
14 use File::Spec;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
15 use File::Path;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
16 use File::Copy;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
17 use File::Basename;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
18 use List::MoreUtils qw/ uniq /;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
19 use Cwd;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
20 use SCValidator qw($lowqual_cutoff $min_percent_id $min_percent_hq LEFT_CLIP RIGHT_CLIP);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
21 use SVUtil qw($use_scratch $work_dir clip_fa_file prepare_reads parse_range get_input_bam
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
22 find_smallest_cover get_work_dir is_PCR_dup read_fa_file get_sclip_reads);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
23 use StructVar;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
24 require SVExtTools;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
25
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
26 use Transcript;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
27 use Gene;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
28 use GeneModel;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
29
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
30 my $debug = 1;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
31 $min_percent_id = 90;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
32 # input/output
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
33 my ( $out_dir, $out_prefix, $range, $bam_d, $bam_g, $sclip_file);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
34 my $out_suffix = "predSV.txt";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
35 my $read_len = 100;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
36 my $ref_genome ;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
37
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
38 my $hetero_factor = 0.4;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
39 my $triger_p_value = 0.05;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
40
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
41 #RNASeq support
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
42 my $RNASeq = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
43 my $gene_model_file;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
44 my $gm_format = "REFFLAT";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
45 my $gm;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
46 my $max_sclip_reads = 50; # we will consider the position if we have enough sclipped reads
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
47 my $min_one_side_reads = 5;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
48 my $min_pct_sclip_reads = 5; # we require at least 5% reads have softclipping
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
49
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
50 # external programs related variable
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
51 # 1. cap3 related variables
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
52 my $cap3 = "cap3";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
53 my $cap3_options=" -h 70 -y 10 > /dev/null";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
54
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
55 # 2 blat related variables, using blat server and blat standalone
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
56 my $blat_client_exe = "gfClient";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
57 my $blat_client_options = ' -out=psl -nohead -minIdentity=95 -maxIntron=5';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
58 my $target_genome = "hg18.2bit";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
59 my $dir_2bit = "/";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
60 my $blat_server = "sjblat";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
61 my $blat_port = 50000;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
62 my $blat_exe = "blat";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
63 my $blat_options = " -tileSize=7 -stepSize=1 -out=psl -minScore=15 -noHead -maxIntron=1 ";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
64
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
65 $use_scratch = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
66 my $paired = 1;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
67
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
68 my $rescue = 1;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
69 # other options
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
70 my ($min_sclip_reads, $max_repetitive_cover, $min_sclip_len );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
71 my $min_hit_reads;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
72 my $rmdup;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
73 my $min_clip_len;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
74 my ($max_score_diff, $max_num_hits);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
75 my ($min_dist_diff, $min_hit_len) = (15, 15);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
76
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
77 #SV filter related parameters
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
78 my $max_bp_dist = 15;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
79 my $min_percent_cons_of_read = 0.75;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
80 my $germline_seq_width = 100;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
81 my $germline_search_width = 50;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
82 my $rm_tandem_repeat = 1; #tandem repeat mediated events
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
83 my $tr_max_indel_size = 100; #tandem repeat mediated indel size
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
84 my $tr_min_size = 2;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
85 my $tr_max_size = 8; #tandem repeat max_size of single repeat
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
86 my $tr_min_num = 4; #tandem repeat minimum number of occurence
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
87
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
88 # verification pupuse
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
89 my $verify_file;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
90 my $sensitive;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
91 my $cluster_size;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
92
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
93 # common help options
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
94 my ( $help, $man, $version, $usage );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
95 my $optionOK = GetOptions(
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
96 # SV validation related parameters
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
97 'max_bp_dist=i' => \$max_bp_dist,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
98 'min_percent_cons_of_read=f' => \$min_percent_cons_of_read,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
99 'germline_seq_width=i' => \$germline_seq_width,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
100 'germline_search_width=i' => \$germline_search_width,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
101 # tandem repeat related parameters
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
102 'rm_tandem_repeat!' => \$rm_tandem_repeat,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
103 'tr_max_indel_size=i' => \$tr_max_indel_size,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
104 'tr_min_size=i' => \$tr_min_size,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
105 'tr_max_size=i' => \$tr_max_size,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
106 'tr_min_num=i' => \$tr_min_num,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
107 'hetero_factor=f' => \$hetero_factor,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
108 'triger_p_value=f' => \$triger_p_value,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
109
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
110 # input/output
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
111 #'s|sample=s' => \$sample,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
112 'd|input_d=s' => \$bam_d,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
113 'g|inpug_g=s' => \$bam_g,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
114 'f|sclipfile=s' => \$sclip_file,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
115 'p|prefix=s' => \$out_prefix,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
116 'o|out_dir=s' => \$out_dir,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
117 'l|read_len=i' => \$read_len,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
118 'ref_genome=s' => \$ref_genome,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
119 'v|verify_file=s' => \$verify_file,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
120 'sensitive' => \$sensitive,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
121 'paired!' => \$paired,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
122 'rescue!' => \$rescue,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
123 'cluster_size=i' => \$cluster_size,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
124 # external programs location and options
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
125 'scratch!' => \$use_scratch,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
126 'cap3=s' => \$cap3,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
127 'cap3opt=s' => \$cap3_options,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
128 'blatclient=s' => \$blat_client_exe,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
129 'blatclientopt=s' => \$blat_client_options,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
130 'blat=s' => \$blat_exe,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
131 't|target_genome=s' => \$target_genome,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
132 'blatopt=s' => \$blat_options,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
133 'blatserver=s' => \$blat_server,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
134 'blatport=i' => \$blat_port,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
135 '2bitdir=s' => \$dir_2bit,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
136 #RNAseq support
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
137 'RNASeq' => \$RNASeq,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
138 'genemodel=s' => \$gene_model_file,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
139 'gmformat=s' => \$gm_format,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
140 #other related parameters
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
141 'r|range=s' => \$range,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
142 'max_score_diff=i' => \$max_score_diff,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
143 'm|min_sclip_reads=i' => \$min_sclip_reads,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
144 'min_one_side_reads=i' => \$min_one_side_reads,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
145 'max_rep_cover=i' => \$max_repetitive_cover,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
146 'min_sclip_len=i' => \$min_sclip_len,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
147 'min_hit_len=i' => \$min_hit_len,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
148 'min_dist_diff=i' => \$min_dist_diff,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
149 'min_hit_reads=i' => \$min_hit_reads,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
150 'rmdup!' => \$rmdup,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
151
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
152 # soft_clipping related parameters (from SVDector package)
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
153 'min_percent_id=i' => \$min_percent_id,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
154 'min_percent_hq=i' => \$SCValidator::min_percent_hq,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
155 'lowqual_cutoff=i' => \$lowqual_cutoff,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
156
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
157 # common help parameters
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
158 'h|help|?' => \$help,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
159 'man' => \$man,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
160 'usage' => \$usage,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
161 'version' => \$version,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
162 );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
163
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
164 pod2usage(-verbose=>2) if($man or $usage);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
165 pod2usage(1) if($help or $version );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
166
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
167 my $start_dir = getcwd;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
168
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
169 # figure out input file
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
170 if(!$bam_d) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
171 pod2usage(1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
172 croak "You need specify input bam file(s)";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
173 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
174
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
175 if(!$sclip_file) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
176 pod2usage(1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
177 croak "You need to specify the softclipping file";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
178 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
179 $sclip_file = File::Spec->rel2abs($sclip_file);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
180
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
181 if(!$ref_genome) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
182 pod2usage(1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
183 croak "You need to specify the reference genome used by bam file";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
184 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
185
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
186 croak "The file you sepcified does not exist" unless (
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
187 -e $sclip_file && -e $bam_d && -e $ref_genome && -e $target_genome);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
188 my $input_base;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
189 $bam_d = File::Spec->rel2abs($bam_d);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
190 $input_base = fileparse($bam_d);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
191 $bam_g = File::Spec->rel2abs($bam_g) if($bam_g);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
192
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
193 #RNASeq support
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
194 if($RNASeq) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
195 croak "You need specify the input gene model file" unless ($gene_model_file);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
196 StructVar->add_RNASeq_filter();
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
197 $min_sclip_reads = 10 unless $min_sclip_reads;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
198 $max_repetitive_cover = 5000 unless $max_repetitive_cover;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
199 $min_sclip_len = 20 unless $min_clip_len;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
200 $max_score_diff = 5 unless $max_score_diff;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
201 $max_num_hits = 3 unless $max_num_hits;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
202 $min_hit_reads = 5 unless $min_hit_reads;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
203 $cluster_size = 5 unless $cluster_size;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
204 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
205 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
206 $min_sclip_reads = 3 unless $min_sclip_reads;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
207 $max_repetitive_cover = 500 unless $max_repetitive_cover;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
208 $min_sclip_len = 20 unless $min_clip_len;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
209 $max_score_diff = 5 unless $max_score_diff;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
210 $max_num_hits = 10 unless $max_num_hits;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
211 $min_hit_reads = 1 unless $min_hit_reads;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
212 $cluster_size = 1 unless $cluster_size;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
213 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
214
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
215 if($gene_model_file) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
216 $gm = GeneModel->new if($gene_model_file);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
217 $gm->from_file($gene_model_file, $gm_format);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
218 StructVar->gene_model($gm);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
219 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
220
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
221 # set up the external programs and validators
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
222 # Those variable will be global
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
223 my $assembler = Assembler->new(
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
224 -PRG => $cap3,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
225 -OPTIONS => $cap3_options
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
226 );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
227
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
228 my $mapper = Mapper->new(
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
229 -PRG => join(' ', ($blat_client_exe, $blat_server, $blat_port)),
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
230 -OPTIONS => $blat_client_options,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
231 -BIT2_DIR => $dir_2bit,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
232 -MAX_SCORE_DIFF => $max_score_diff,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
233 -MAX_NUM_HITS => $max_num_hits,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
234 );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
235
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
236 my $aligner = Aligner->new(
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
237 -PRG => $blat_exe,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
238 -OPTIONS => $blat_options,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
239 );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
240
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
241 StructVar->assembler($assembler);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
242 StructVar->read_len($read_len);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
243 StructVar->aligner($aligner);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
244 StructVar->mapper($mapper);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
245 StructVar->RNASeq(1) if($RNASeq);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
246 StructVar->genome($target_genome);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
247 StructVar->remove_filter("tandem_repeat") unless($rm_tandem_repeat);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
248 StructVar->tr_max_indel_size($tr_max_indel_size);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
249 StructVar->tr_min_size($tr_min_size);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
250 StructVar->tr_max_size($tr_max_size);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
251 StructVar->tr_min_num($tr_min_num);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
252 StructVar->max_bp_dist($max_bp_dist) if($max_bp_dist);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
253 StructVar->germline_seq_width($germline_seq_width) if($germline_seq_width);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
254 StructVar->germline_search_width($germline_search_width) if($germline_search_width);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
255
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
256
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
257 my $validator = SCValidator->new();
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
258 $validator->remove_validator('strand_validator') if(!$paired);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
259
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
260 #setup output and working directory
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
261 $out_dir = getcwd if(!$out_dir);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
262 mkdir $out_dir if(!-e $out_dir || ! -d $out_dir);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
263 $work_dir = get_work_dir(-SCRATCH => $use_scratch);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
264 $work_dir = $out_dir if(!$work_dir);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
265 $use_scratch = undef if($work_dir eq $out_dir); # don't erase the out_dir
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
266 chdir($work_dir);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
267
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
268 # figure out output prefix
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
269 if(!$out_prefix) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
270 $out_prefix = $input_base;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
271 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
272
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
273 my $sam_d = Bio::DB::Sam->new( -bam => $bam_d, -fasta => $ref_genome);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
274 StructVar->sam_d($sam_d);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
275 my $sam_g;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
276 if($bam_g) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
277 $sam_g = Bio::DB::Sam->new( -bam => $bam_g);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
278 StructVar->sam_g($sam_g);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
279 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
280
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
281 #my $output_file = File::Spec->catfile($outdir, $out_prefix . $out_suffix);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
282 my $output_file;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
283 $output_file = join('.', $out_prefix, $out_suffix);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
284 $output_file = File::Spec->catfile($out_dir, $output_file);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
285
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
286 # the softclip file is sorted, so no need to re-sort it
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
287 open my $SCLIP, "<$sclip_file" or croak "can't open $sclip_file:$OS_ERROR";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
288 my %sclip;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
289 my $pre_p;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
290 while( my $line = <$SCLIP> ) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
291 chomp $line;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
292 my ($chr, $pos, $ort, $cover, $C) = split /\t/, $line;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
293 if( ! exists($sclip{$chr})) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
294 $sclip{$chr} = [];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
295 $pre_p = $pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
296 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
297 $C = 30 unless $C;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
298 if($pos < $pre_p) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
299 print STDERR "The input file is not sorted!";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
300 exit(1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
301 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
302 push @{$sclip{$chr}}, [$pos, $ort, $cover, $C];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
303 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
304 close $SCLIP;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
305
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
306 my @final_SV;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
307 my @s_cover = @{find_smallest_cover($min_sclip_reads, $hetero_factor, $triger_p_value)};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
308 if($range) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
309 @final_SV = detect_range_SVs($sam_d, $range, \%sclip, \@s_cover);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
310 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
311 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
312 foreach my $chr (keys %sclip) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
313 my @tmpSV = detect_range_SVs($sam_d, $chr, \%sclip, \@s_cover);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
314 @final_SV = @tmpSV unless (@final_SV);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
315 foreach my $sv (@tmpSV) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
316 push @final_SV, $sv if($sv && ! is_dup_SV(\@final_SV, $sv));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
317 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
318 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
319 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
320 undef %sclip;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
321 open my $OUT, ">$output_file" or croak "can't open $output_file:$OS_ERROR";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
322 foreach my $SV (@final_SV) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
323 if($SV->filter) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
324 print $OUT $SV->to_string ;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
325 if($RNASeq) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
326 print $OUT "\t", join("\t", @{$SV->get_genes});
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
327 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
328 print $OUT "\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
329 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
330 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
331 chdir $start_dir;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
332 exit(0);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
333
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
334 sub detect_range_SVs {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
335 my ($sam_d, $range, $r_sclips, $r_scover) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
336 my ($chr, $start, $end) = parse_range($range);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
337 my ($tid) = $sam_d->header->parse_region($chr);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
338 my $chr_len = $sam_d->header->target_len->[$tid];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
339 my $r_range_sclips = search_sclip($r_sclips, $range);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
340 return unless $r_range_sclips;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
341 my @scover = @{$r_scover};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
342 my @rtn;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
343
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
344 my ($f_tree, $r_tree);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
345 if($RNASeq) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
346 my $full_chr = $chr;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
347 my ($gm_chr) = $gm->get_all_chr;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
348 $full_chr = "chr" . $chr if($chr !~ m/chr/ && $gm_chr =~ /chr/);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
349 ($f_tree, $r_tree) = ($gm->sub_model($full_chr, "+"), $gm->sub_model($full_chr, "-")) if($RNASeq);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
350 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
351 my $n = scalar @{$r_range_sclips};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
352 push @{$r_range_sclips}, [$r_range_sclips->[$n-1][0] + $cluster_size + 1, '+', 1, 50];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
353 push @{$r_range_sclips}, [$r_range_sclips->[$n-1][0] + $cluster_size + 1, '-', 1, 50];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
354 $n = $n + 2;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
355
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
356 # try to save the 1 base off problem of soft-clipping
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
357 my (@ss1, $p1, $c1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
358 my (@ss2, $p2, $c2);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
359 my ($n1, $n2, $cover1, $cover2) = (0, 0, 0, 0);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
360 for( my $i = 0; $i < $n; $i++) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
361 my $s = $r_range_sclips->[$i];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
362 # print join("\t", @{$s}), "\n" if($debug);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
363 my $clip = $s->[1] eq '+' ? RIGHT_CLIP : LEFT_CLIP;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
364 next if($s->[0] >= $chr_len );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
365 if($clip == RIGHT_CLIP) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
366 $p1 = $c1 = $s->[0] unless $p1;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
367 if($s->[0] < $c1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
368 print STDERR "The soft-clipping file has problem! It's either not sorted or
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
369 the file has mutliple parts for a chromsome!";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
370 last;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
371 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
372 if($s->[0] - $c1 < $cluster_size) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
373 $n1 += $s->[2];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
374 $cover1 = $s->[3] if($cover1 < $s->[3]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
375 push @ss1, $s;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
376 if($s->[0] < $c1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
377 $p1 += $s->[0];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
378 $c1 = $p1 / scalar @ss1;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
379 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
380 next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
381 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
382 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
383 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
384 $p2 = $c2 = $s->[0] unless $p2;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
385 if($s->[0] < $c2) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
386 print STDERR "The soft-clipping file has problem! It's either not sorted or
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
387 the file has mutliple parts for a chromsome!";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
388 last;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
389 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
390 if($s->[0] - $p2 < $cluster_size) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
391 $n2 += $s->[2];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
392 $cover2 = $s->[3] if($cover2 < $s->[3]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
393 push @ss2, $s;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
394 if($s->[0] < $c2) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
395 $p2 += $s->[0];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
396 $c2 = $p2 / scalar @ss2;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
397 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
398 next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
399 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
400 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
401
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
402 my @cc = $clip == LEFT_CLIP ? @ss2 : @ss1;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
403 my $pos = $clip == LEFT_CLIP ? $ss2[$#ss2]->[0] : $ss1[0]->[0];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
404 my ($n_s, $cover_s) = $clip == LEFT_CLIP ? ($n2, $cover2) : ($n1, $cover1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
405 my $tmp_range = $clip == LEFT_CLIP ? [$ss2[0]->[0], $ss2[$#ss2]->[0] + $cluster_size] :
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
406 [$ss1[0]->[0] - $cluster_size, $ss1[$#ss1]->[0]];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
407
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
408 if($clip == LEFT_CLIP) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
409 @ss2 = (); $p2 = $c2 = $s->[0];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
410 $n2 = $s->[2];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
411 $cover2 = $s->[3];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
412 push @ss2, $s;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
413 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
414 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
415 @ss1 = (); $p1 = $c1 = $s->[0];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
416 $n1 = $s->[2];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
417 $cover1 = $s->[3];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
418 push @ss1, $s;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
419 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
420
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
421 my @s_reads;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
422 if($RNASeq) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
423 # print $n_s, "\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
424 next if($n_s < $min_sclip_reads && $cover_s > $s_cover[$n_s] ) ;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
425 next if($n_s < $max_sclip_reads && $n_s * 100 <= $cover_s * $min_pct_sclip_reads);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
426 my @f_genes = $f_tree->intersect($tmp_range);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
427 my @r_genes = $r_tree->intersect($tmp_range);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
428 next if(scalar @f_genes == 0 && scalar @r_genes == 0);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
429 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
430 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
431 if($n_s < $min_sclip_reads) { #too few covered
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
432 if($cover_s > $s_cover[$n_s]) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
433 next if(!$sensitive);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
434 foreach my $c (@cc) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
435 next if($c->[0] >= $chr_len);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
436 push @s_reads, get_sclip_reads(-SAM => $sam_d,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
437 -CHR =>$chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
438 -START => $c->[0],
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
439 -END => $c->[0],
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
440 -CLIP => $clip,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
441 -MINCLIPLEN => 0,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
442 -VALIDATOR => $validator,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
443 -PAIRED => $paired,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
444 -RMDUP => $rmdup,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
445 -EXTRA => abs($c->[0] - $pos) );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
446 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
447 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
448 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
449 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
450 if(! @s_reads) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
451 foreach my $c (@cc) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
452 next if($c->[0] >= $chr_len);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
453 push @s_reads, get_sclip_reads(-SAM => $sam_d,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
454 -CHR =>$chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
455 -START => $c->[0],
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
456 -END => $c->[0],
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
457 -CLIP => $clip,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
458 -MINCLIPLEN => 0,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
459 -VALIDATOR => $validator,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
460 -PAIRED => $paired,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
461 -RMDUP => $rmdup,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
462 -EXTRA => abs($c->[0] - $pos) );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
463 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
464 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
465 my $l = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
466 foreach my $r (@s_reads) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
467 my $len = length $r->{seq};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
468 $l = $len if($l < $len);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
469 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
470 next if ($l < $min_sclip_len);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
471 print STDERR join("\t", ($chr, $pos, $clip == LEFT_CLIP ? "-" : "+", scalar @s_reads)), "\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
472 my @SV = detect_SV(
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
473 -SAM => $sam_d,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
474 -CHR => $chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
475 -POS => $pos,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
476 -ORT => $clip == LEFT_CLIP ? "-" : "+",
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
477 -SCLIP => \%sclip,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
478 -COVER => $cover_s,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
479 -SREADS => \@s_reads);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
480 foreach my $sv (@SV) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
481 $sv->{type} = $sv->type;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
482 push @rtn, $sv if($sv && ! is_dup_SV(\@rtn, $sv));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
483 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
484 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
485
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
486 if($RNASeq) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
487 push @rtn, find_del_SVs($sam_d, $chr, $start, $end);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
488 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
489 return @rtn;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
490 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
491
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
492 sub is_dup_SV {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
493 my($r_SVs, $sv) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
494 foreach my $s (@{$r_SVs}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
495 return 1
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
496 if( $s->first_bp->{pos} == $sv->second_bp->{pos} &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
497 $s->first_bp->{chr} eq $sv->second_bp->{chr} &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
498 $s->second_bp->{pos} == $sv->first_bp->{pos} &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
499 $s->second_bp->{chr} eq $sv->first_bp->{chr} &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
500 $s->{type} == $sv->{type} );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
501 return 1
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
502 if( $s->first_bp->{pos} == $sv->first_bp->{pos} &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
503 $s->first_bp->{chr} eq $sv->first_bp->{chr} &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
504 $s->second_bp->{pos} == $sv->second_bp->{pos} &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
505 $s->second_bp->{chr} eq $sv->second_bp->{chr} &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
506 $s->{type} == $sv->{type} );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
507
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
508 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
509 return;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
510 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
511
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
512 sub search_sclip {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
513 my ($r_sclip, $range) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
514 my ($chr, $start, $end) = parse_range($range);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
515 return $r_sclip->{$chr} if(!$start);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
516 my $start_i = bin_search($r_sclip->{$chr}, $start);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
517 my $end_i = bin_search($r_sclip->{$chr}, $end);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
518 my @tmp = @{$r_sclip->{$chr}};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
519 if($start_i <= $end_i) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
520 @tmp = @tmp[$start_i .. $end_i];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
521 if($start_i == $end_i) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
522 return undef if($tmp[0]->[0] < $start || $tmp[0]->[0] > $end);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
523 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
524 return \@tmp;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
525 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
526 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
527 return undef;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
528 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
529 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
530
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
531 sub bin_search {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
532 my ($a, $p) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
533 my ($s, $e) = (0, scalar(@{$a})-1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
534 my $m = int( ($s + $e)/2);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
535 while(1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
536 return $s if($a->[$s][0] >= $p);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
537 return $e if($a->[$e][0] <= $p);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
538 return $m if($a->[$m][0] == $p || ($a->[$m-1][0] < $p && $a->[$m+1][0] > $p));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
539 if($a->[$m][0] > $p) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
540 $e = $m;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
541 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
542 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
543 $s = $m;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
544 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
545 $m = int( ($s+$e)/2 );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
546 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
547 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
548
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
549 sub count_coverage {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
550 my ($sam, $chr, $pos, $clip) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
551 if($rmdup && !$RNASeq) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
552 my @pairs;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
553 my $seg = $sam->segment(-seq_id => $chr, -start => $pos, -end => $pos);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
554 my $n = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
555 return 0 unless $seg;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
556 my $itr = $seg->features(-iterator => 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
557 while( my $a = $itr->next_seq) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
558 next unless($a->start && $a->end); #why unmapped reads here?
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
559 my $sclip_len = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
560 if($clip) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
561 my @cigar_array = @{$a->cigar_array};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
562 #$sclip_len = $1 if($a->cigar_str =~ m/S(\d+)$/ && $clip == RIGHT_CLIP);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
563 $sclip_len = $cigar_array[0]->[1] if($cigar_array[0]->[0] eq 'S' && $clip == RIGHT_CLIP);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
564 #$sclip_len = $1 if($a->cigar_str =~ m/^S(\d+)/ && $clip == LEFT_CLIP);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
565 $sclip_len = $cigar_array[$#cigar_array]->[1] if($cigar_array[$#cigar_array]->[0] eq 'S' && $clip == RIGHT_CLIP);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
566 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
567 next if(@pairs > 0 && is_PCR_dup($a, \@pairs, $sclip_len));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
568 $n++;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
569 return $n if( $n > $max_repetitive_cover);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
570 push @pairs, [$a->start, $a->end, $a->mate_start ? $a->mate_start : 0,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
571 $a->mate_end ? $a->mate_end : 0, $sclip_len];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
572 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
573 return $n;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
574 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
575 else{
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
576 my ($c) = $sam->features(-type => 'coverage', -seq_id=> $chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
577 -start => $pos, -end => $pos);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
578 return 0 unless $c;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
579 my @c_d = $c->coverage;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
580 return $c_d[0];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
581 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
582 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
583
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
584 sub detect_SV {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
585 my %param = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
586 my @s_reads = @{$param{-SREADS}};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
587 my($sam, $chr, $ort, $r_sclip, $coverage) = ( $param{-SAM},
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
588 $param{-CHR}, $param{-ORT}, $param{-SCLIP}, $param{-COVER});
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
589 my $clip = $ort eq '+' ? RIGHT_CLIP : LEFT_CLIP;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
590 return if($coverage > $max_repetitive_cover);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
591 my $fa_name = prepare_reads(\@s_reads, $clip);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
592 my($contig_file, $sclip_count, $contig_reads) = $assembler->run($fa_name);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
593 return if(!$contig_file or !(-e $contig_file));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
594
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
595 $contig_file = clip_fa_file($contig_file, $clip);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
596 my $contig_seqs = read_fa_file($contig_file);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
597 if(scalar @s_reads == 1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
598 $contig_file = clip_fa_file($fa_name, $clip);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
599 $contig_seqs = read_fa_file($contig_file);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
600 my @seq_names = keys %{$contig_seqs};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
601 $sclip_count->{$seq_names[0]} = 1;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
602 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
603
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
604 my @SV;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
605 my $where = ($clip == LEFT_CLIP ? "right" : "left");
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
606 my $mapping = $mapper->run( -QUERY => $contig_file );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
607 my (%reads, %quals, %s_lens, %pos);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
608 foreach my $r (@s_reads) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
609 $reads{$r->{name}} = $r->{full_seq};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
610 $quals{$r->{name}} = $r->{full_qual};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
611 $s_lens{$r->{name}} = length($r->{seq});
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
612 $pos{$r->{name}} = $r->{pos};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
613 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
614
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
615 foreach my $s (keys(%{$mapping})) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
616 next if($sclip_count->{$s} < $min_sclip_reads);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
617
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
618 # try to find a better mapping
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
619 my @tmp_reads;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
620 my $selected_read;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
621 my @tmp_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
622 foreach my $n (@{$contig_reads->{$s}}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
623 push @tmp_pos, $pos{$n};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
624 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
625 @tmp_pos = uniq @tmp_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
626 my $real_pos = $clip == LEFT_CLIP ? $tmp_pos[$#tmp_pos] : $tmp_pos[0];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
627
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
628 my $n_new_SV = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
629 my $tmp_bp;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
630 foreach my $t (@{$mapping->{$s}}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
631 my $qort = $t->{qstrand};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
632 my $t_pos = $t->{tstart};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
633 $t_pos = $t->{tend} if( ($qort eq "+" && $clip == LEFT_CLIP) ||
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
634 ($qort eq '-' && $clip == RIGHT_CLIP));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
635 if($t->{tchr} eq $chr && (abs($t_pos - $real_pos) < $min_dist_diff)) { # the soft clipping is incorrect
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
636 @SV = @SV[0 .. ($#SV - $n_new_SV + 1)] if($n_new_SV > 0);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
637 last;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
638 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
639 my $first_bp = {};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
640 $first_bp->{sc_reads} = $sclip_count->{$s};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
641 $first_bp->{$where . "_chr"} = $chr;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
642 $first_bp->{$where . "_pos"} = $real_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
643 $first_bp->{all_pos} = \@tmp_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
644 $first_bp->{$where . "_ort"} = "+";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
645 $first_bp->{pos} = $real_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
646 $first_bp->{cover} = $coverage;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
647 $first_bp->{chr} = $chr;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
648 my $new_where = ($where eq "right" ? "left" : "right");
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
649
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
650 $first_bp->{$new_where . "_chr"} = $t->{tchr};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
651 $first_bp->{$new_where . "_pos"} = $t_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
652 $first_bp->{$new_where . "_ort"} = $qort;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
653 $first_bp->{sc_seq} = $contig_seqs->{$s};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
654 $first_bp->{reads} = $contig_reads->{$s};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
655
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
656 $tmp_bp = $first_bp unless $tmp_bp;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
657 my $second_bp = check_sclip(-SAM => $sam, -TARGET => $t, -CHR => $chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
658 -POS => $real_pos, -SCLIP => $r_sclip, -CLIP => $clip);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
659 if(@{$second_bp}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
660 foreach my $tmp_bp (@{$second_bp}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
661 push @SV, StructVar->new(-FIRST_BP => $first_bp, -SECOND_BP => $tmp_bp);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
662 $n_new_SV++;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
663 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
664 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
665 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
666 # save some one side good soft-clipping SV
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
667 if( $rescue == 1 &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
668 $n_new_SV == 0 &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
669 $sclip_count->{$s} >= $min_one_side_reads &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
670 (scalar(@{$mapping->{$s}}) == 1 || ($mapping->{$s}[0]{perfect} == 1 && $mapping->{$s}->[1]{perfect} == 0)) &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
671 $tmp_bp->{chr} &&
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
672 length($contig_seqs->{$s}) * 0.95 < $mapping->{$s}[0]{matches} ) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
673
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
674 my %second_bp = %{$tmp_bp};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
675 $second_bp{sc_reads} = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
676 $second_bp{sc_seq} = '';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
677 ($second_bp{chr}, $second_bp{pos}) = $second_bp{pos} == $second_bp{left_pos} ?
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
678 ($second_bp{right_chr}, $second_bp{right_pos}) : ($second_bp{left_chr}, $second_bp{left_pos});
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
679 $second_bp{cover} = count_coverage($sam, $second_bp{chr}, $second_bp{pos});
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
680 $second_bp{reads} = [];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
681 push @SV, StructVar->new(-FIRST_BP => $tmp_bp, -SECOND_BP => \%second_bp);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
682 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
683
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
684 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
685 system("rm $fa_name"); system("rm $fa_name*");
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
686 return @SV;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
687 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
688
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
689 sub get_gene_range {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
690 my ($chr, $pos, $clip) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
691 my $r = $clip == LEFT_CLIP ? [$pos, $pos+5] : [$pos - 5, $pos];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
692 my @genes;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
693 my $tmp = $chr;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
694 $tmp = "chr" . $tmp if($tmp !~ m/chr/);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
695 my ($f_tree, $r_tree) = ($gm->sub_model($tmp, "+"), $gm->sub_model($tmp, "-"));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
696 push @genes, $f_tree->intersect($r);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
697 push @genes, $r_tree->intersect($r);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
698 my ($s, $e);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
699 if(scalar @genes == 0) { #no gene here
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
700 return [$pos - $read_len, $pos + $read_len];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
701 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
702 foreach my $g (@genes) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
703 my $start = $g->val->get_start($pos, $read_len);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
704 my $end = $g->val->get_end($pos, $read_len);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
705 $s = $start if(!$s || $s > $start);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
706 $e = $end if(!$e || $e < $end);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
707 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
708 return [$s, $e];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
709 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
710
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
711 sub check_sclip {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
712 my %arg = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
713 my ($sam, $chr, $pos, $target, $r_sclip, $clip) =
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
714 ( $arg{-SAM}, $arg{-CHR}, $arg{-POS}, $arg{-TARGET}, $arg{-SCLIP}, $arg{-CLIP} );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
715
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
716 my @bp;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
717
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
718 # identify the searching region for partner soft cliping
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
719 my $extension;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
720 if($RNASeq) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
721 $extension = 50;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
722 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
723 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
724 $extension = $read_len - ($target->{qend} - $target->{qstart});
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
725 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
726 my ($tpos, $ort) = ($target->{tend}, $target->{qstrand});
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
727 $tpos = $target->{tstart} if( ($clip == LEFT_CLIP && $ort eq '-')
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
728 || ($clip == RIGHT_CLIP && $ort eq '+'));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
729 $extension = abs($tpos - $pos) - 1
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
730 if($chr eq $target->{tchr} && abs($tpos - $pos) <= $extension); # don't consider itself
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
731 my $range =$target->{tchr} . ":";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
732 if($tpos < $extension) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
733 $range .= '1';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
734 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
735 else{
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
736 $range .= $tpos - $extension;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
737 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
738
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
739 $range .= "-"; $range .= $tpos + $extension;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
740
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
741 # the orginal genome sequence where we find the soft_clipping
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
742 my $orig;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
743 my $tmp = $chr;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
744 #$tmp = 'chr' . $tmp if($tmp !~ m/^chr/);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
745 $orig = $target_genome . ":" . $tmp . ":";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
746 my $base_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
747 if($RNASeq) { #let's do a wild guess!
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
748 my $r = get_gene_range($tmp, $pos, $clip);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
749 return \@bp unless $r;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
750 $base_pos = $r->[0];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
751 $orig = join("", ($orig, $r->[0], "-", $r->[1]));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
752 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
753 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
754 $base_pos = $pos - $read_len;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
755 $orig = join("", ($orig, $pos - $read_len, "-", $pos + $read_len));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
756 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
757
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
758 return \@bp if(!exists( $r_sclip->{$target->{tchr}})); #mapped to chrY or other minor chrs
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
759 # the soft clipping happens in highly repetitive region
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
760 # return \@bp if($coverage > $max_repetitive_cover);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
761
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
762 my $r_pp = search_sclip($r_sclip, $range);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
763 return \@bp if(!$r_pp);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
764
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
765 my $real_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
766 my $found = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
767 my @r_reads;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
768 my @l_reads;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
769 foreach my $s (@{$r_pp}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
770 #next if($s->[2] < $min_hit_reads);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
771 next if($chr ne $target->{tchr} && (
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
772 ($clip == LEFT_CLIP && $ort ne $s->[1] ) ||
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
773 ($clip == RIGHT_CLIP && $ort eq $s->[1] )) );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
774
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
775 next if($chr eq $target->{tchr} && ($s->[0] < $tpos - $extension
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
776 || $s->[0] > $tpos + $extension));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
777 my $tort = $s->[1];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
778 next if($s->[3] > $max_repetitive_cover);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
779 my $tclip = $tort eq '+' ? RIGHT_CLIP : LEFT_CLIP;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
780 my @reads = get_sclip_reads(
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
781 -SAM => $sam,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
782 -CHR => $target->{tchr},
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
783 -START => $s->[0],
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
784 -END => $s->[0],
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
785 -CLIP => $tclip,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
786 -MINCLIPLEN => $min_hit_len,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
787 -VALIDATOR => $validator,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
788 -PAIRED => $paired,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
789 -RMDUP => $rmdup);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
790 next if(!@reads || scalar(@reads) == 0);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
791 # push @r_reads, @reads if($tclip == RIGHT_CLIP);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
792 # push @l_reads, @reads if($tclip == LEFT_CLIP);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
793 # }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
794
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
795 # foreach my $tclip ( (RIGHT_CLIP, LEFT_CLIP)) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
796 # my $s_reads = $tclip == RIGHT_CLIP ? \@r_reads : \@l_reads;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
797 # next if(scalar @{$s_reads} == 0);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
798 my %count;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
799 # my $fa_name = prepare_reads($s_reads, $tclip);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
800 my $fa_name = prepare_reads(\@reads, $tclip);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
801
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
802 my($contig_file, $sclip_count, $contig_reads, $singlet_file) = $assembler->run($fa_name);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
803 my (%reads, %quals, %s_lens, %pos);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
804 # foreach my $r (@{$s_reads}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
805 foreach my $r (@reads) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
806 $reads{$r->{name}} = $r->{full_seq};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
807 $quals{$r->{name}} = $r->{full_qual};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
808 $s_lens{$r->{name}} = length($r->{seq});
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
809 $pos{$r->{name}} = $r->{pos};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
810 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
811
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
812 $contig_file = clip_fa_file($contig_file, $tclip);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
813 if( -s $singlet_file ) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
814 $singlet_file = clip_fa_file($singlet_file, $tclip);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
815 system("cat $singlet_file >> $contig_file");
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
816 system("rm $singlet_file");
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
817 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
818 my $seqs = read_fa_file($contig_file);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
819 my $hits = $aligner->run( -TARGET => $orig, -QUERY => $contig_file);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
820 foreach my $t (keys %{$hits}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
821 my $h = $hits->{$t};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
822 my $tmp_bp;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
823 my @all_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
824 my $all_reads_name;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
825 if(exists $contig_reads->{$t}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
826 foreach my $n (@{$contig_reads->{$t}}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
827 push @all_pos, $pos{$n};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
828 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
829 $all_reads_name = $contig_reads->{$t};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
830 @all_pos = uniq @all_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
831 @all_pos = sort {$a <=> $b} @all_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
832
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
833 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
834 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
835 push @all_pos, $pos{$t};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
836 $all_reads_name = [$t];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
837 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
838
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
839 if(is_good_hit($h, $clip, $tclip)) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
840 my $hit_ort = ($h->strand('query') == 1 ? "+" : "-");
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
841 my $real_pos = $tclip == RIGHT_CLIP ? $all_pos[0] : $all_pos[$#all_pos];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
842 if($tclip == RIGHT_CLIP ) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
843 $tmp_bp = {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
844 left_ort => '+',
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
845 left_pos => $real_pos,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
846 left_chr => $target->{tchr},
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
847 right_ort => $hit_ort,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
848 right_pos => ($hit_ort eq "+" ? $h->start("hit") : $h->end("hit")) + $base_pos,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
849 right_chr => $chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
850 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
851 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
852 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
853 $tmp_bp = {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
854 left_ort => $hit_ort,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
855 left_chr => $chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
856 left_pos => ($hit_ort eq "+" ? $h->end("hit") : $h->start("hit")) + $base_pos,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
857 right_ort => "+",
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
858 right_chr => $target->{tchr},
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
859 right_pos => $real_pos,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
860 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
861 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
862 $tmp_bp->{chr} = $target->{tchr};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
863 $tmp_bp->{pos} = $real_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
864 $tmp_bp->{all_pos} = \@all_pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
865 $tmp_bp->{sc_seq} = $seqs->{$t};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
866 $tmp_bp->{cover} = count_coverage($sam, $target->{tchr}, $real_pos);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
867 $tmp_bp->{sc_reads} = exists $sclip_count->{$t} ? $sclip_count->{$t} : 1;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
868 $tmp_bp->{reads} = exists $contig_reads->{$t} ? $contig_reads->{$t} : [$t];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
869
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
870 push @bp, $tmp_bp;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
871 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
872 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
873 system("rm $fa_name"); system("rm $fa_name*");
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
874 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
875 return \@bp;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
876 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
877
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
878 # many more filter can be added here
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
879 sub is_good_hit {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
880 my ($hit, $clip, $tclip) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
881
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
882 return if($hit->length_aln('query') < $min_hit_len);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
883 return if($hit->frac_identical * 100 < $min_percent_id );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
884 return 1;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
885
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
886 # do we want to do the check?
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
887 my $ort = ($hit->start('query') < $hit->end('query') ? "+" : "-");
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
888 my ($dist, $t_dist, $qstart, $qend);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
889 $qstart = $ort eq '+' ? $hit->start('query') : $hit->end('query') ;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
890 $qend = $ort eq '+' ? $hit->end('query') : $hit->start('query');
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
891 $t_dist = $qstart;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
892 $t_dist = $hit->query_length - $qend if($tclip == LEFT_CLIP);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
893
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
894 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
895
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
896 # RNASeq support to identify deletions
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
897
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
898 sub cigar2hit {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
899 my ($s, @cigar_array) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
900 my @pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
901 my $p = $s;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
902 foreach my $c (@cigar_array){
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
903 my($op, $len) = @{$c};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
904 if($op eq 'N') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
905 push @pos, [$s, $p];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
906 $s = $p + $len;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
907 $p = $s;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
908 next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
909 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
910 $p += $len if($op eq 'M' || $op eq 'D');
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
911 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
912 push @pos, [$s, $p];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
913 return \@pos;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
914 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
915
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
916 sub find_del_SVs {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
917 my ($sam, $chr, $start, $end) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
918 my $max_sclip_len = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
919 my $range = $chr;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
920 if($start && $end) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
921 $range = $chr . ":" . $start . "-" . $end;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
922 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
923 my %SV;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
924 my $tmp = $chr;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
925 $tmp = "chr" . $chr if($chr !~ m/chr/);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
926
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
927 my($f_tree, $r_tree) = ($gm->sub_model($tmp, "+"), $gm->sub_model($tmp, "-"));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
928
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
929 $sam->fetch($range, sub {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
930 my $a = shift;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
931 my $cigar_str = $a->cigar_str;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
932 # print $a->qname, "\t", $cigar_str, "\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
933 return unless $cigar_str =~ m/N/; # the read cross two genes must have N
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
934 # return unless $a->score >= 97;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
935 my $sv = identify_del_SV($f_tree, $a);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
936 if($sv){
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
937 my $tmp1 = $sv->[0] . "+" . $sv->[1];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
938 if(!exists($SV{$tmp1})) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
939 $SV{$tmp1} = {};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
940 $SV{$tmp1}->{num} = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
941 $SV{$tmp1}->{left} = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
942 $SV{$tmp1}->{right} = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
943 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
944 $SV{$tmp1}->{num}++;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
945 $SV{$tmp1}->{left} = $sv->[5] if($SV{$tmp1}->{left} < $sv->[5]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
946 $SV{$tmp1}->{right} = $sv->[6] if($SV{$tmp1}->{right} < $sv->[6]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
947 $SV{$tmp1}->{gene_ort} = "+";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
948 $SV{$tmp1}->{left_gene} = $sv->[3];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
949 $SV{$tmp1}->{right_gene} = $sv->[4];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
950 push @{$SV{$tmp1}->{reads}}, $a->qname;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
951
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
952 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
953 $sv = identify_del_SV($r_tree, $a);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
954 if($sv){
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
955 my $tmp1 = $sv->[0] . "-" . $sv->[1];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
956 if(!exists($SV{$tmp1})) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
957 $SV{$tmp1} = {};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
958 $SV{$tmp1}->{num} = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
959 $SV{$tmp1}->{left} = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
960 $SV{$tmp1}->{right} = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
961 $SV{$tmp1}->{reads} = [];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
962 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
963 $SV{$tmp1}->{num}++;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
964 $SV{$tmp1}->{left} = $sv->[5] if($SV{$tmp1}->{left} < $sv->[5]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
965 $SV{$tmp1}->{right} = $sv->[6] if($SV{$tmp1}->{right} < $sv->[6]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
966 $SV{$tmp1}->{gene_ort} = "-";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
967 $SV{$tmp1}->{left_gene} = $sv->[3];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
968 $SV{$tmp1}->{right_gene} = $sv->[4];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
969 push @{$SV{$tmp1}->{reads}}, $a->qname;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
970 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
971 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
972 );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
973
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
974 my @rtn;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
975 foreach my $sv (keys %SV) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
976 my( $pos1, $pos2 ) = $sv =~ m/(\d+)[+|-](\d+)/;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
977 print $pos1, "\t", $pos2, "\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
978 my( $cover1, $cover2) = (count_coverage($sam, $chr, $pos1 - 1), count_coverage($sam, $chr, $pos2));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
979 next if($SV{$sv}->{num} < $min_sclip_reads);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
980 my $cover = $cover1 < $cover2 ? $cover1 : $cover2;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
981 next if($SV{$sv}->{num} * 100 < $cover * $min_pct_sclip_reads && $SV{$sv} < 50);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
982 push @rtn, StructVar->new (
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
983 -FIRST_BP => { left_ort => '+',
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
984 left_pos => $pos1 - 1,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
985 left_chr => $chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
986 right_ort => '+',
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
987 right_pos => $pos2,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
988 right_chr => $chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
989 chr => $chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
990 cover => count_coverage($sam, $chr, $pos1 - 1),
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
991 pos => $pos1 - 1,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
992 sc_reads => $SV{$sv}->{num},
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
993 left_len => $SV{$sv}->{left},
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
994 right_len => $SV{$sv}->{right},
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
995 gene_ort => $SV{$sv}->{gene_ort},
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
996 left_gene => $SV{$sv}->{left_gene},
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
997 right_gene => $SV{$sv}->{right_gene},
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
998 reads => $SV{$sv}->{reads},
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
999 },
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1000 -SECOND_BP => { left_ort => '+',
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1001 left_pos => $pos1 - 1,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1002 left_chr => $chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1003 right_ort => '+',
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1004 right_pos => $pos2,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1005 right_chr => $chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1006 chr => $chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1007 pos => $pos2,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1008 cover => count_coverage($sam, $chr, $pos2),
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1009 sc_reads => $SV{$sv}->{num},
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1010 reads => [],
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1011 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1012 );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1013 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1014 return @rtn;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1015 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1016
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1017 sub identify_del_SV {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1018 my ($tree, $a) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1019 my @cigar_array = @{$a->cigar_array};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1020 return unless($a->start && $a->end);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1021 return if(scalar $tree->intersect([$a->start, $a->end]) <= 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1022
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1023 my @hits = @{ cigar2hit($a->start, @cigar_array) };
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1024 my @genes;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1025 my $last_gene;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1026 my @change;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1027 my ($left, $right) = (0, 0);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1028 for( my $i = 0; $i < scalar @hits; $i++) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1029 my $h = $hits[$i];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1030 my @g = $tree->intersect($h);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1031 my $g_size = scalar @g;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1032 if($g_size == 0) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1033 # print STDERR $a->qname, " is mapped outside of gene\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1034 next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1035 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1036 if( scalar @g == 1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1037 push @genes, $g[0] unless ($last_gene && $last_gene->val->name eq $g[0]->val->name);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1038 if($last_gene && $last_gene->val->name ne $g[0]->val->name) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1039 $right += ($h->[1] - $h->[0] );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1040 push @change, $i;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1041 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1042 $left += ($h->[1] - $h->[0] ) if($right == 0);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1043 $last_gene = $g[0];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1044 next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1045 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1046 # print STDERR $a->qname, " connect two genes into one?\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1047 return;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1048 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1049 return if(scalar @genes <= 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1050 if(scalar @genes > 2) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1051 # print STDERR $a->qname, "crossed more than 2 genes!\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1052 return;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1053 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1054 # now we down to 2 genes
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1055 my ($left_gene, $right_gene) = @genes;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1056 return unless $left_gene->val->overlap([$hits[$change[0]-1]->[0], $hits[$change[0]-1]->[1]]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1057 return unless $right_gene->val->overlap([$hits[$change[0]]->[0], $hits[$change[0]]->[1]]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1058 my $p = $left_gene->val->end;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1059 print join("\t", ( $a->score,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1060 $a->cigar_str, $left, $right, $hits[$change[0]-1]->[1], $hits[$change[0]]->[0],
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1061 $left_gene->val->name, $right_gene->val->name)), "\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1062 return [$hits[$change[0]-1]->[1], $hits[$change[0]]->[0], $a, $left_gene->val, $right_gene->val, $left, $right];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1063 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1064
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1065 =head1 NAME
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1066
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1067 CREST.pl - a Structure Variation detection tools using softclipping for
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1068 whole genome sequencing.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1069
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1070
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1071 =head1 VERSION
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1072
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1073 This documentation refers to CREST.pl version 0.0.1.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1074
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1075
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1076 =head1 USAGE
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1077
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1078 This program depends on several things that need to be installed and/or
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1079 specified. The program uses BioPerl and Bio::DB::Sam module to parse
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1080 the files and bam files. Also it uses Blat software suits to do genome
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1081 mapping and alignment. To make the program efficient, it also requires
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1082 a blat server setup. And the program uses CAP3 assembler.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1083
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1084 Identify somatic SVs from input:
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1085 CREST.pl -f somatic.cover -d tumor.bam -g germline.bam
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1086 Identify SVs from a single bam compared wtih reference genome:
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1087 CREST.pl -f sclip.cover -d sample.bam
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1088 Identify SVs from a single bam on chr1 only
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1089 CREST.pl -f sclip.cover -d sample.bam -r chr1
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1090
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1091 =head1 REQUIRED ARGUMENTS
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1092
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1093 To run the program, several parameter must specified.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1094 -d, --input_d: The input (diagnositic) bam file
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1095 -f, --sclipfile: The soft clipping information generated by extractSClip.pl
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1096 --ref_genome: The reference genome file in fa format
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1097 -t, --target_genome: The 2bit genome file used by blat server and blat
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1098 --blatserver: The blat server name or ip address
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1099 --blatport: The blat server port
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1100
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1101 =head1 OPTIONS
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1102
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1103 The options that can be used for the program.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1104 -g, --input_g The germline (paired) bam file
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1105 -p, --prefix The output prefix, default is the input bam file name
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1106 -o, --out_dir Output directory, default is the working directory
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1107 -l, --read_len The read length of the sequencing data, defaut 100
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1108 --sensitive The program will generate more SVs with higher false positive rate.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1109
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1110 --(no)scratch Use the scratch space, default is off.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1111 --(no)paired Use paired reads or not, defafult is on, so change to --nopaired for unpaired reads.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1112 --cap3 CAP3 executable position, default cap3
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1113 --cap3opt CAP3 options, single quoted, Default ' > /dev/null'
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1114 --blatclient gfClient excutable position, default gfClient
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1115 --blatclientopt gfClient options, single quoted, default '-out=psl -nohead'
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1116 --blat blat executable potion, default balt
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1117 --blatopt blat options, single quoted, default '-tileSize=7 -stepSize=1 -out=psl -minScore=15'
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1118
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1119 -r, --range The range where SV will be detected, using chr1:100-200 format.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1120 --max_score_diff The maximum score difference when stopping select hit, default 10.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1121 --min_sclip_reads Minimum number of soft clipping read to triger the procedure, default 3.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1122 --max_rep_cover The min number of coverage to be called as repetitive and don't triger
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1123 the procedure, default 500.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1124 --min_sclip_len The min length of soft clipping part at a position to triger the detection,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1125 default 20.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1126 --min_hit_len Min length of a hit for genome mapping
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1127 --min_dist_diff Min distance between the mapped position and the soft clipping position, default 20.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1128 --(no)rmdup Remove PCR dumplicate. Default remove.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1129
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1130 --min_percent_id Min percentage of identity of soft clipping read mapping, default 90
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1131 --min_percent_hq Min percentage of high quality base in soft clipping reads, default 80
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1132 --lowqual_cutoff Low quality cutoff value, default 20.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1133
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1134 -h, --help, -? Help information
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1135 --man Man page.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1136 --usage Usage information.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1137 --version Software version.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1138
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1139 --(no)rm_tandem_repeat Remove tandem repeat caused SV events, default is ON. When it's on ptrfinder program
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1140 need to be on the path.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1141 --tr_max_indel_size Maximum tandem repeat mediated INDEL events, default 100
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1142 --tr_min_size Minimum tandem reapet size, default 2
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1143 --tr_max_size Maximum tandem repeat size, default 8
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1144 --tr_min_num Minimum tandem repeat number, defaut 4
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1145 --min_percent_cons_of_read Minimum percent of consensus length of read length, default 0.75
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1146 --max_bp_dist Maximum distance between two idenfitifed break points, default 15
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1147 --germline_seq_width Half window width of genomic sequence around break point for germline SV filtering,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1148 default 100
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1149 --germline_search_width Half window width for seaching soft-clipped reads around breakpoint for germline SV
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1150 filtering, default 50.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1151
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1152 --hetero_factor The factor about the SV's heterogenirity and heterozygosity, default 0.4.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1153 --triger_p_value The p-value that will triger the SV detection when number of soft-clipped reads is small,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1154 defaut 0.05.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1155
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1156 --(no)rescue Use rescue mode, when it's on, the a SV with only 1 side with enough soft-clipped reads
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1157 is considered as a valid one instead of rejecting it. Default on.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1158 --min_one_side_reads When rescure mode is on, the minimum number of soft-clipped reads on one side, default 5.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1159
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1160 --RNASeq RNAseq mode, default off
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1161 --genemodel A gene model file, currently only refFlat format (BED) is supported. Requried for RNASeq.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1162 --cluster_size The soft-clipped reads within cluster_size will be considered together, default is 3, RNAseq mode
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1163 only.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1164
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1165
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1166 =head1 DESCRIPTION
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1167
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1168 This is a program designed to identify Structure Variations (SVs) using soft
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1169 clipping reads. With the improvement of next-gen sequencing techniques,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1170 both the coverage and length of pair-end reads have been increased steady.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1171 Many SV detection software availalbe uses pair-end information due to the
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1172 limitted read length. Now 100bp is pretty common and many reads will cross
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1173 the break points. Some mapping programs ( bwa, etc ) have the ability to
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1174 identify so called soft-clipping reads. A soft-clipping read is a read that
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1175 different part can be mapped to different genomic posiiton, but the read can
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1176 be uniquely positioned using the mate-pair position and the insert length.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1177 So this program use those reads to do an assembly-mapping-assembly-alignment
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1178 procedure to identify potential structure variiations.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1179
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1180
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1181 =head1 DIAGNOSTICS
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1182
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1183 A list of every error and warning message that the application can generate
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1184 (even the ones that will "never happen"), with a full explanation of each
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1185 problem, one or more likely causes, and any suggested remedies. If the
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1186 application generates exit status codes (e.g., under Unix), then list the exit
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1187 status associated with each error.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1188
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1189 =head1 CONFIGURATION AND ENVIRONMENT
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1190
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1191 The program is designed to use under a cluster or high performance computing
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1192 environment since it's dealing with over 100G input data. The program can be
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1193 used as highly parallellized. And the program is developped under linux/unix.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1194
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1195
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1196 =head1 DEPENDENCIES
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1197
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1198 The program depend on several packages:
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1199 1. Bioperl perl module.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1200 2. Bio::DB::Sam, version 1.5 or later, it requires samtools lib installed.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1201 3. blat suits, include blat, gfClient, gfServer.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1202 4. CAP3 assembly program.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1203
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1204
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1205 =head1 BUGS AND LIMITATIONS
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1206
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1207 There are no known bugs in this module, but the method is limitted to bam file
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1208 that has soft-clipping cigar string generated.Please report problems to
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1209 Jianmin Wang (Jianmin.Wang@stjude.org)
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1210 Patches are welcome.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1211
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1212 =head1 AUTHOR
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1213
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1214 Jianmin Wang (Jianmin.Wang@stjude.org)
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1215
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1216
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1217 =head1 LICENCE AND COPYRIGHT
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1218
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1219 Copyright (c) 2010 by St. Jude Children's Research Hospital.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1220
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1221 This program is free software: you can redistribute it and/or modify
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1222 it under the terms of the GNU General Public License as published by
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1223 the Free Software Foundation, either version 2 of the License, or
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1224 (at your option) any later version.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1225
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1226 This program is distributed in the hope that it will be useful,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1227 but WITHOUT ANY WARRANTY; without even the implied warranty of
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1228 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1229 GNU General Public License for more details.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1230
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1231 You should have received a copy of the GNU General Public License
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1232 along with this program. If not, see <http://www.gnu.org/licenses/>.