annotate PsiCLASS-1.0.2/psiclass @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #!/usr/bin/env perl
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 use strict ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 use warnings ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 use Cwd 'cwd' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 use Cwd 'abs_path' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 use File::Basename;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 use threads ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 use threads::shared ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 die "Usage: ./psiclass [OPTIONS]\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 "Required:\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 "\t-b STRING: paths to the alignment BAM files. Use comma to separate multiple BAM files\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 "\t\tor\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 "\t--lb STRING: path to the file listing the alignments BAM files\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 "Optional:\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 "\t-s STRING: path to the trusted splice file (default: not used)\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 "\t-o STRING: prefix of output files (default: ./psiclass)\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 "\t-p INT: number of processes/threads (default: 1)\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 "\t-c FLOAT: only use the subexons with classifier score <= than the given number (default: 0.05)\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 "\t--sa FLOAT: the minimum average number of supported read for retained introns (default: 0.5)\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 "\t--vd FLOAT : the minimum average coverage depth of a transcript to be reported (defaults: 1.0)\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 "\t--maxDpConstraintSize: the number of subexons a constraint can cover in DP. (default: 7. -1 for inf)\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 "\t--bamGroup STRING: path to the file listing the group id of BAMs in the --lb file (default: not used)\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 "\t--primaryParalog: use primary alignment to retain paralog genes (default: use unique alignments)\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 #"\t--mateIdx INT: the read id has suffix such as .1, .2 for a mate pair. (default: auto)\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 "\t--version: print version and exit\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 "\t--stage INT: (default: 0)\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 "\t\t0-start from beginning - building splice sites for each sample\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 "\t\t1-start from building subexon files for each sample\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 "\t\t2-start from combining subexon files across samples\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 "\t\t3-start from assembling the transcripts for each sample\n".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 "\t\t4-start from voting the consensus transcripts across samples\n"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 if ( @ARGV == 0 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 my $WD = dirname( abs_path( $0 ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 my $i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 my $cmd ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 my $prefix = "psiclass_" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 my $numThreads = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 sub system_call
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 print $_[0], "\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 system( $_[0] ) == 0 or die "Terminated\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 # Process the arguments
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 my @bamFiles : shared ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 my $spliceFile = "" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 my $bamFileList = "" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 my $stage = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 my $classesOpt = "" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 my $juncOpt = "" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 my $trustSpliceOpt = "" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 my $voteOpt = "" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 my $outdir = "." ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 my $mateIdx = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 my $spliceAvgSupport = 0.5 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 my $bamGroup = "" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 for ( $i = 0 ; $i < @ARGV ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 if ( $ARGV[$i] eq "--lb" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 $bamFileList = $ARGV[$i + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 open FP1, $ARGV[$i + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 while ( <FP1> )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 chomp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 push @bamFiles, $_ ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 ++$i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 elsif ( $ARGV[$i] eq "-b" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 push @bamFiles, ( split /,/, $ARGV[$i + 1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 ++$i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 elsif ( $ARGV[$i] eq "-s" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 $spliceFile = $ARGV[$i + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 ++$i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 elsif ( $ARGV[$i] eq "-o" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 my $o = $ARGV[$i + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 # Parse the -o option to get the folder and prefix
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 if ( $o =~ /\// )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 if ( substr( $o, -1 ) ne "/" ) # file prefix is in -o
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 my @cols = split /\/+/, $o ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 $prefix = $cols[-1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 $outdir = join( "/", @cols[0..($#cols-1)] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 else # -o only specifies path
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 $outdir = substr( $o, 0, length( $o ) - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 $prefix = $o ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 if ( substr( $prefix, -1 ) ne "_" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 $prefix .= "_" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 ++$i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 elsif ( $ARGV[$i] eq "--stage" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 $stage = $ARGV[$i + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 ++$i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 elsif ( $ARGV[ $i ] eq "-p" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 $numThreads = $ARGV[$i + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 $classesOpt .= " -p $numThreads" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 ++$i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 elsif ( $ARGV[ $i ] eq "-c" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 $classesOpt .= " -c ".$ARGV[ $i + 1 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 ++$i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 elsif ( $ARGV[ $i ] eq "--mateIdx" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 $mateIdx = $ARGV[ $i + 1 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 if ( $mateIdx == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 $classesOpt .= " --hasMateIdSuffix" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 $juncOpt .= " --hasMateIdSuffix" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 ++$i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 elsif ( $ARGV[$i] eq "--bamGroup" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 $bamGroup = $ARGV[$i + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 ++$i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 elsif ( $ARGV[ $i ] eq "--sa" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 $trustSpliceOpt .= " -a ".$ARGV[$i + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 ++$i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 elsif ( $ARGV[ $i ] eq "--vd" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 $voteOpt .= " -d ".$ARGV[$i + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 ++$i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 elsif ( $ARGV[$i] eq "--primaryParalog" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 $classesOpt .= " --primaryParalog" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 elsif ( $ARGV[$i] eq "--maxDpConstraintSize" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 $classesOpt .= " --maxDpConstraintSize ".$ARGV[$i + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 ++$i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 elsif ( $ARGV[$i] eq "--version" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 die "PsiCLASS v1.0.1\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 die "Unknown argument: ", $ARGV[$i], "\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 if ( scalar( @bamFiles ) == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 die "Must use option --lb to specify the list of bam files.\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 if ( $mateIdx == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 open FPsam, "$WD/samtools-0.1.19/samtools view ".$bamFiles[0]."| head -1000 |" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 my $flag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 while ( <FPsam> )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 my @cols = split /\t/ ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 my $id = $cols[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188 if ( !( $id =~ /[\.|\/][1|2]$/ ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 $flag = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 close FPsam ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 if ( $flag == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 print "Found mate read id index suffix(.1 or /1). Calling \"--mateIdx 1\" option. If this is a false calling, please use \"--mateIdx 0\".\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 $classesOpt .= " --hasMateIdSuffix" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 $juncOpt .= " --hasMateIdSuffix" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 mkdir $outdir if ( !-d $outdir ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 mkdir "$outdir/splice" if ( !-d "$outdir/splice" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207 mkdir "$outdir/subexon" if ( !-d "$outdir/subexon" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210 my $threadLock : shared ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211 my @sharedFiles : shared ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212 my @threads ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 for ( $i = 0 ; $i < $numThreads ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 push @threads, $i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 sub threadRunSplice
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 my $tid = threads->tid() - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 my $i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 for ( $i = 0 ; $i < scalar( @bamFiles ) ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 next if ( ( $i % $numThreads ) != $tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 system_call( "$WD/junc ".$bamFiles[$i]." -a $juncOpt > $outdir/splice/${prefix}bam_$i.raw_splice" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 # Generate the splice file for each bam file.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 if ( $stage <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 if ( $numThreads == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234 for ( $i = 0 ; $i < @bamFiles ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 system_call( "$WD/junc ".$bamFiles[$i]." -a $juncOpt > $outdir/splice/${prefix}bam_$i.raw_splice" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 #if ( $spliceFile ne "" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 #{
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239 # system_call( "perl $WD/ManipulateIntronFile.pl $spliceFile ${prefix}bam_$i.raw_splice > ${prefix}bam_$i.splice" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 #}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 #else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 #{
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 #system_call( "awk \'{if (\$6>1) print;}\' ${prefix}bam_$i.raw_splice > ${prefix}bam_$i.splice" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 # system_call( "mv ${prefix}bam_$i.raw_splice ${prefix}bam_$i.splice" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 #}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 foreach ( @threads )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252 $_ = threads->create( \&threadRunSplice ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 foreach ( @threads )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 $_->join() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260 if ( $bamGroup eq "" || $spliceFile ne "" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262 open FPls, ">$outdir/splice/${prefix}splice.list" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263 for ( $i = 0 ; $i < @bamFiles ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265 print FPls "$outdir/splice/${prefix}bam_$i.raw_splice\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267 close FPls ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269 if ( $spliceFile ne "" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 for ( $i = 0 ; $i < @bamFiles ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 system_call( "perl $WD/FilterSplice.pl $outdir/splice/${prefix}bam_$i.raw_splice $spliceFile > $outdir/splice/${prefix}bam_$i.splice" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 system_call( "$WD/trust-splice $outdir/splice/${prefix}splice.list ". $bamFiles[0] ." $trustSpliceOpt > $outdir/splice/${prefix}bam.trusted_splice" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279 for ( $i = 0 ; $i < @bamFiles ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 system_call( "perl $WD/FilterSplice.pl $outdir/splice/${prefix}bam_$i.raw_splice $outdir/splice/${prefix}bam.trusted_splice > $outdir/splice/${prefix}bam_$i.splice" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287 my @bamToGroupId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 my %groupNameToId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 open FPbg, $bamGroup ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 my $groupUsed = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291 while (<FPbg>)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 chomp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 if ( !defined $groupNameToId{$_} )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296 $groupNameToId{$_} = $groupUsed ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 ++$groupUsed ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299 push @bamToGroupId, $groupNameToId{$_} ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301 close FPbg ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 # Process each group one by one
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 my $group ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305 for ( $group = 0 ; $group < $groupUsed ; ++$group )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307 open FPls, ">$outdir/splice/${prefix}splice_$group.list" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308 for ( $i = 0 ; $i < @bamFiles ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310 next if ( $bamToGroupId[$i] != $group ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 print FPls "$outdir/splice/${prefix}bam_$i.raw_splice\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 close FPls ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315 system_call( "$WD/trust-splice $outdir/splice/${prefix}splice_$group.list ". $bamFiles[0] ." $trustSpliceOpt > $outdir/splice/${prefix}bam_$group.trusted_splice" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316 for ( $i = 0 ; $i < @bamFiles ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 next if ( $bamToGroupId[$i] != $group ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 system_call( "perl $WD/FilterSplice.pl $outdir/splice/${prefix}bam_$i.raw_splice $outdir/splice/${prefix}bam_$group.trusted_splice > $outdir/splice/${prefix}bam_$i.splice" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 # Get subexons from each bam file
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 sub threadRunSubexonInfo
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 my $tidAdjust = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331 $tidAdjust = $numThreads if ( $stage < 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 my $tid = threads->tid() - $tidAdjust - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 my $i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334 for ( $i = 0 ; $i < scalar( @bamFiles ) ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 next if ( ( $i % $numThreads ) != $tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 system_call( "$WD/subexon-info ".$bamFiles[$i]." $outdir/splice/${prefix}bam_$i.splice > $outdir/subexon/${prefix}subexon_$i.out" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 if ( $stage <= 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 if ( $numThreads == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 for ( $i = 0 ; $i < @bamFiles ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 system_call( "$WD/subexon-info ".$bamFiles[$i]." $outdir/splice/${prefix}bam_$i.splice > $outdir/subexon/${prefix}subexon_$i.out" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353 #print "hi ", scalar( @threads ), " ", scalar( @bamFiles), "\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354 foreach ( @threads )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356 $_ = threads->create( \&threadRunSubexonInfo ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 foreach ( @threads )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360 $_->join() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 open FPls, ">$outdir/subexon/${prefix}subexon.list" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 for ( $i = 0 ; $i < @bamFiles ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367 print FPls "$outdir/subexon/${prefix}subexon_$i.out\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369 close FPls ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372 # combine the subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373 if ( $stage <= 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375 $cmd = "$WD/combine-subexons --ls $outdir/subexon/${prefix}subexon.list > $outdir/subexon/${prefix}subexon_combined.out" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376 system_call( "$cmd" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379 # Run classes
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 if ( $stage <= 3 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382 my $trimPrefix = substr( $prefix, 0, -1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383 my $bamPath = "" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384 if ( $bamFileList ne "" )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386 $bamPath = " --lb $bamFileList " ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390 foreach my $b (@bamFiles)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392 $bamPath .= " -b $b " ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395 $cmd = "$WD/classes $classesOpt $bamPath -s $outdir/subexon/${prefix}subexon_combined.out -o $outdir/${trimPrefix} > $outdir/${prefix}classes.log" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396 system_call( "$cmd" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399 # Run voting
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400 if ( $stage <= 4 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402 open FPgtflist, ">$outdir/${prefix}gtf.list" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 for ( $i = 0 ; $i < @bamFiles ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405 print FPgtflist "$outdir/${prefix}sample_${i}.gtf\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407 close FPgtflist ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408 $cmd = "$WD/vote-transcripts --lg $outdir/${prefix}gtf.list $voteOpt > $outdir/${prefix}vote.gtf" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409 system_call( "$cmd" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411