|
0
|
1 #!/project/bioperl/perl-5.10.1-sles11/bin/perl -w
|
|
|
2 #
|
|
|
3 #------------------------------------------------------------------------------
|
|
|
4 # University of Minnesota
|
|
|
5 # Copyright 2010 - 2011, Regents of the University of Minnesota
|
|
|
6 #------------------------------------------------------------------------------
|
|
|
7 # Author:
|
|
|
8 #
|
|
|
9 # Jesse Erdmann
|
|
|
10 #
|
|
|
11 # POD documentation
|
|
|
12 #------------------------------------------------------------------------------
|
|
|
13 =pod BEGIN
|
|
|
14
|
|
|
15 =head1 NAME
|
|
|
16
|
|
|
17 tapdance_runner.pl - TAPDANCE wrapper that provides a single interface to all
|
|
|
18 TAPDANCE functionality.
|
|
|
19
|
|
|
20 =head1 SYNOPSIS
|
|
|
21
|
|
|
22 tapdance_runner.pl [-help]
|
|
|
23
|
|
|
24 See http://sf.net/p/tapdancebio for full documentation
|
|
|
25
|
|
|
26 =head1 OPTIONS
|
|
|
27
|
|
|
28 =over 6
|
|
|
29
|
|
|
30 =item B<-help>
|
|
|
31
|
|
|
32 Print this usage summary.
|
|
|
33
|
|
|
34 =item B<-seqFile sequence_file>
|
|
|
35
|
|
|
36 The sequences to be processed for insertions.
|
|
|
37
|
|
|
38 =item B<-bar2libFile barcode_to_library_mapping_file>
|
|
|
39
|
|
|
40 A tab delimited file where each line contains the barcode and name of a
|
|
|
41 library. Additionally, columns after the second column will be treated as
|
|
|
42 metadata tags to be associated with the library.
|
|
|
43
|
|
|
44 =item B<-baseConfig custom_config>
|
|
|
45
|
|
|
46 OPTIONAL. If there is a custom tapdance_base_config.txt to be used in
|
|
|
47 special cases, use this parameter to specify it's use. An example where
|
|
|
48 this might be useful is the case where distinct groups of users are using
|
|
|
49 the same TAPDANCE installation, but separate mutagens.
|
|
|
50
|
|
|
51 =item B<-config predefined_config_file>
|
|
|
52
|
|
|
53 A configuration file may be used rather than specify options on
|
|
|
54 the command line. Any options specified in the base config file
|
|
|
55 will be overriden by values specified in this config file.
|
|
|
56
|
|
|
57 =item B<-db_config database_configuration_file>
|
|
|
58
|
|
|
59 Use this option if the database configuration needs to be kept separate
|
|
|
60 from other configuration information. This is most useful in Galaxy
|
|
|
61 where the end user should not have the database user credentials
|
|
|
62 exposed to them.
|
|
|
63
|
|
|
64 =item B<-bowtieIdx reference_genome>
|
|
|
65
|
|
|
66 The name of the bowtie index to use for aligning individual sequences. This
|
|
|
67 is only used during the first phase of TAPDANCE. It is important to note that
|
|
|
68 the index name is not a single file. For instance, the mm9 index has several
|
|
|
69 files name mm9.[0-9].ebwt and mm9.rev.[0-9].ebwt. However, the correct
|
|
|
70 value for this parameter would be /my/path/to/indexes/mm9
|
|
|
71
|
|
|
72 =item B<-mutagen mutagen_sequence>
|
|
|
73
|
|
|
74 The sequence to match determining whether the mutagen of interest is present.
|
|
|
75 Any sequences not matching this sequence will not be used in the analysis
|
|
|
76 while those that do will have the mutagen trimmed prior to alignment. If, for
|
|
|
77 instance the mutagen for a particular project has a sequence of ACTG, but the
|
|
|
78 user also wanted to remove up two bases following the mutagen sequence the
|
|
|
79 wildcard character '_' can be used to specify a mutagen sequence of ACTG__.
|
|
|
80
|
|
|
81 Any number of mutagen sequences may be specified by entering multiple -mutagen
|
|
|
82 entries on the command line. E.G. perl tapdance_runner.pl -mutagen ACGT
|
|
|
83 -mutagen TGCA. This is useful when a mutagen has more than one common captured
|
|
|
84 sequence in the data.
|
|
|
85
|
|
|
86 =item B<-projectName project_name>
|
|
|
87
|
|
|
88 A name for the project, up to 255 chars.
|
|
|
89
|
|
|
90 =item B<-omittedChromosomes chromosomes_to_omit>
|
|
|
91
|
|
|
92 It can be useful to remove the chromosome of the donor concatamer from the
|
|
|
93 calulations to remove the effects of local hopping for some projects. The
|
|
|
94 chromosomes can be specified as a comma delimited list and must match the
|
|
|
95 names used in the reference genome. E.G -omittedChromosomes chr1
|
|
|
96 -omittedChromosomes chr2
|
|
|
97
|
|
|
98 =item B<-output_dir location_to_write_to>
|
|
|
99
|
|
|
100 The location where execution will be performed
|
|
|
101
|
|
|
102 DEFAULT:'./'
|
|
|
103
|
|
|
104 =item B<-metadata library_metadata>
|
|
|
105
|
|
|
106 OPTIONAL. To specify metadata on libraries outside of the barcode to library
|
|
|
107 mapping file, this parameter may be used. The file should contain the name of
|
|
|
108 the library in one column and the metadata tag to affiliate with it in the
|
|
|
109 second. Each library may have as many entries as needed.
|
|
|
110
|
|
|
111 =item B<-lib_pct library_percent>
|
|
|
112
|
|
|
113 =item B<-CIS_tot_p CIS_total_pvalue>
|
|
|
114
|
|
|
115 =item B<-CIS_lib_p CIS_library_pvalue>
|
|
|
116
|
|
|
117 =item B<-CIS_reg_p CIS_region_pvalue>
|
|
|
118
|
|
|
119 =item B<-coCIS_thresh cocis_threshold>
|
|
|
120
|
|
|
121 =item B<-merge merge>
|
|
|
122
|
|
|
123 Specify projects to be merged as the new project specified with -project_name.
|
|
|
124 E.G. -merge my_first_project -merge my_second_project -project_name
|
|
|
125 my_merged_project.
|
|
|
126
|
|
|
127 =item B<-annotation annotation_file>
|
|
|
128
|
|
|
129 Specify the bed file to annotate CISes with. The default feature set is USCS's
|
|
|
130 mm9 refSeq genes.
|
|
|
131
|
|
|
132 =item B<-no_cis>
|
|
|
133
|
|
|
134 To generate a list of inserts only, specify no_cis. This is useful in cases
|
|
|
135 where a new set of data needs to be merged with a previous set of data. Use
|
|
|
136 this option as a first step to prepare the new data. Use -merge to combine
|
|
|
137 the resulting projects and call CISes on the new project.
|
|
|
138
|
|
|
139 =item B<-seqType seqfile_format>
|
|
|
140
|
|
|
141 OPTIONAL. If not specified, TAPDANCE will attempt to identify the input file
|
|
|
142 type on it's own. Valid options are 'tab', 'fasta' and 'fastq'.
|
|
|
143
|
|
|
144 =item B<-debug>
|
|
|
145
|
|
|
146 OPTIONAL.
|
|
|
147
|
|
|
148 =back
|
|
|
149
|
|
|
150 =cut
|
|
|
151
|
|
|
152 #### END of POD documentation.
|
|
|
153 #-----------------------------------------------------------------------------
|
|
|
154
|
|
|
155 use strict;
|
|
|
156 use Cwd;
|
|
|
157 use Getopt::Long;
|
|
|
158 use File::Copy;
|
|
|
159 use File::Find;
|
|
|
160 use File::Temp qw/ tempfile tempdir /;
|
|
|
161 use Pod::Usage;
|
|
|
162
|
|
|
163 #tapdance_runner.pl -s $seqs -b $bar2lib -g $genomeIdx -pn $projName -o $omitChrom -pb $projBed -ps $projSum -cb $cisBed -cs $cisSum -bc $baseConfig
|
|
|
164
|
|
|
165 my $dbh;
|
|
|
166 my $path = $0;
|
|
|
167 $path =~ s/\/\w*\.pl$//g;
|
|
|
168 require "$path/lib/tapdance_base_config.pl";
|
|
|
169 require "$path/util.pl";
|
|
|
170
|
|
|
171 #Universal variables
|
|
|
172 my ($seqFile, $seqType, $bar2libFile, $bowtieIdx, $bwaIdx, $projName, @omitChrom, $baseConfig, @mutagens_array, $metadata, $merge, $preconfig_file, $library_percent, $CIS_total_pvalue, $CIS_library_pvalue, $CIS_region_pvalue, $cocis_threshold, $annotation_file, $db_config);
|
|
|
173 my $no_cis = 0;
|
|
|
174
|
|
|
175 #CMD line variables
|
|
|
176 my ($debug, $output_dir, $noUnlink, $help_flag);
|
|
|
177
|
|
|
178 #Galaxy variables
|
|
|
179 my ($index, $index_id, $index_path, $projBed, $projBedId, $projSum, $projSumId, $projVis, $projVisId, $cisWig, $cisWigId, $cisWigPath, $cisSum, $cisSumId, $tmpDir);
|
|
|
180
|
|
|
181
|
|
|
182 my %options = (
|
|
|
183 #Universal Variables
|
|
|
184 "seqFile|s=s" => \$seqFile,
|
|
|
185 "seqType|st=s" => \$seqType,
|
|
|
186 "bar2libFile|b=s" => \$bar2libFile,
|
|
|
187 "bowtieIdx=s" => \$bowtieIdx,
|
|
|
188 #"bwaIdx=s" => \$bwaIdx,
|
|
|
189 "projectName|pn=s" => \$projName,
|
|
|
190 "omittedChromosomes|o=s" => \@omitChrom,
|
|
|
191 "baseConfig|bc=s" => \$baseConfig,
|
|
|
192 "metadata|m=s" => \$metadata,
|
|
|
193 "mutagen=s" => \@mutagens_array,
|
|
|
194 "lib_pct=f" => \$library_percent,
|
|
|
195 "CIS_tot_p=f" => \$CIS_total_pvalue,
|
|
|
196 "CIS_lib_p=f" => \$CIS_library_pvalue,
|
|
|
197 "CIS_reg_p=f" => \$CIS_region_pvalue,
|
|
|
198 "coCIS_thresh=f" => \$cocis_threshold,
|
|
|
199 "merge=s" => \$merge,
|
|
|
200 "annotation=s" => \$annotation_file,
|
|
|
201 "config=s" => \$preconfig_file,
|
|
|
202 "db_config=s" => \$db_config,
|
|
|
203 "no_cis" => \$no_cis,
|
|
|
204
|
|
|
205 #CMD Line Variables
|
|
|
206 "help" => \$help_flag,
|
|
|
207 "output_dir=s" => \$output_dir,
|
|
|
208 "debug|d" => \$debug,
|
|
|
209
|
|
|
210 #Galaxy Variables
|
|
|
211 "index=s" => \$index,
|
|
|
212 "index_id=s" => \$index_id,
|
|
|
213 "index_path=s" => \$index_path,
|
|
|
214 "projectBed|pb=s" => \$projBed,
|
|
|
215 "projectBedId=s" => \$projBedId,
|
|
|
216 "cisWig|cw=s" => \$cisWig,
|
|
|
217 "cisWigId|cwid=s" => \$cisWigId,
|
|
|
218 "cisWigPath|cwpath=s" => \$cisWigPath,
|
|
|
219 "tmp_dir|t=s" => \$tmpDir,
|
|
|
220 "no_unlink" => \$noUnlink
|
|
|
221 );
|
|
|
222
|
|
|
223 GetOptions(%options) or pod2usage(2);
|
|
|
224 pod2usage(1) if $help_flag;
|
|
|
225
|
|
|
226 $projName = &sanitize_project($projName);
|
|
|
227 my $meta_gen = 0;
|
|
|
228
|
|
|
229 my $envDirN;
|
|
|
230 if (defined($output_dir)) {
|
|
|
231 $envDirN = $output_dir;
|
|
|
232 unless (-d $output_dir) {
|
|
|
233 mkdir ($output_dir);
|
|
|
234 }
|
|
|
235 }
|
|
|
236 elsif (defined($tmpDir)) {
|
|
|
237 if ($noUnlink) {
|
|
|
238 $envDirN = tempdir(DIR => $tmpDir);
|
|
|
239 }
|
|
|
240 else {
|
|
|
241 $envDirN = tempdir(DIR => $tmpDir, UNLINK => 1);
|
|
|
242 }
|
|
|
243 }
|
|
|
244 else {
|
|
|
245 if ($noUnlink) {
|
|
|
246 $envDirN = tempdir();
|
|
|
247 }
|
|
|
248 else {
|
|
|
249 $envDirN = tempdir(UNLINK => 1);
|
|
|
250 }
|
|
|
251 }
|
|
|
252 if ($debug) { print "EnvDir = $envDirN\n"; }
|
|
|
253
|
|
|
254 if (!defined($baseConfig)) { $baseConfig = "$path/lib"; }
|
|
|
255 open(my $baseConfigH, "<", $baseConfig . "/tapdance_base_config.pl") || die "Unable to open $baseConfig: $!\n";
|
|
|
256 open(my $envConfigH, ">", $envDirN . "/config.pl") || die "Unable to open environment $envDirN/config.pl: $!\n";
|
|
|
257
|
|
|
258 if (defined($db_config)) {
|
|
|
259 print $envConfigH "require '" . $envDirN . "/" . $db_config . "';\n";
|
|
|
260 }
|
|
|
261
|
|
|
262 # Copy system defaults first, overwrite as needed
|
|
|
263 while (<$baseConfigH>) {
|
|
|
264 print $envConfigH $_;
|
|
|
265 }
|
|
|
266 close($baseConfigH);
|
|
|
267
|
|
|
268 if (defined($preconfig_file)) {
|
|
|
269 open(my $preConfigH, "<", $preconfig_file) || die "Unable to open input configuration file $preconfig_file. $!\n";
|
|
|
270 }
|
|
|
271 else {
|
|
|
272 print $envConfigH "#Project specific custom values, will override values set above\n";
|
|
|
273 print $envConfigH "\$proj = '$projName';\n";
|
|
|
274 print $envConfigH "\$envDir = '$envDirN';\n";
|
|
|
275 if (defined($library_percent)) {
|
|
|
276 print $envConfigH "\$library_percent = '$library_percent';\n";
|
|
|
277 }
|
|
|
278 if (defined($CIS_total_pvalue)) {
|
|
|
279 print $envConfigH "\$CIS_total_pvalue = '$CIS_total_pvalue';\n";
|
|
|
280 }
|
|
|
281 if (defined($CIS_library_pvalue)) {
|
|
|
282 print $envConfigH "\$CIS_library_pvalue = '$CIS_library_pvalue';\n";
|
|
|
283 }
|
|
|
284 if (defined($CIS_region_pvalue)) {
|
|
|
285 print $envConfigH "\$CIS_region_pvalue = '$CIS_region_pvalue';\n";
|
|
|
286 }
|
|
|
287 if (defined($cocis_threshold)) {
|
|
|
288 print $envConfigH "\$cocis_threshold = '$cocis_threshold';\n";
|
|
|
289 }
|
|
|
290 if (defined($bowtieIdx && $bwaIdx)) {
|
|
|
291 print $envConfigH "\$bwa_exe = 'bwa';\n";
|
|
|
292 print $envConfigH "\$bowtie_exe = 'bowtie --quiet';\n";
|
|
|
293 print $envConfigH "\$bwa_idx = '$bwaIdx';\n";
|
|
|
294 print $envConfigH "\$bowtie_idx = '$bowtieIdx';\n";
|
|
|
295 print $envConfigH "\$aligner = 'bow_bwa';\n";
|
|
|
296 }
|
|
|
297 elsif (defined($bowtieIdx)) {
|
|
|
298 print $envConfigH "\$bowtie_exe = 'bowtie --quiet';\n";
|
|
|
299 print $envConfigH "\$bowtie_idx = '$bowtieIdx';\n";
|
|
|
300 print $envConfigH "\$aligner = 'bowtie';\n";
|
|
|
301 }
|
|
|
302 elsif (defined($bwaIdx)) {
|
|
|
303 print $envConfigH "\$bwa_exe = 'bwa';\n";
|
|
|
304 print $envConfigH "\$bwa_idx = '$bwaIdx';\n";
|
|
|
305 print $envConfigH "\$aligner = 'bwa';\n";
|
|
|
306 }
|
|
|
307
|
|
|
308 if ($#mutagens_array >= 0 && length($mutagens_array[0]) > 0) {
|
|
|
309 print $envConfigH "\$mutagens = '" . join(",", @mutagens_array) . "';\n";
|
|
|
310 }
|
|
|
311 if (defined($annotation_file)) {
|
|
|
312 print $envConfigH "\$annotation_file ='" . $annotation_file . "';\n";
|
|
|
313 }
|
|
|
314 if (!defined($seqType) && defined($seqFile)) {
|
|
|
315 $seqType = &determine_seq_input_type(\$seqFile, \$envConfigH);
|
|
|
316 }
|
|
|
317 }
|
|
|
318 print $envConfigH "return 1;\n";
|
|
|
319 close($envConfigH);
|
|
|
320
|
|
|
321 my ($output, $orig_dir);
|
|
|
322 mkdir ("$envDirN/data"); # || die "Unable to create data dir, $envDirN/data. $!\n";
|
|
|
323 mkdir("$envDirN/lib"); # || die "Unable to create lib. $!\n";
|
|
|
324 my @lib_source = ($path . "/lib/");
|
|
|
325 find(\&lib_copy, @lib_source);
|
|
|
326
|
|
|
327 my $copy_ins_files = 0;
|
|
|
328 my $indexH;
|
|
|
329 if (defined($index)) {
|
|
|
330 open ($indexH, ">", $index) || die "Unable to open $index for writing: $!\n";
|
|
|
331 print $indexH "<HTML>\n<HEAD>\n<TITLE>$projName Results</TITLE>\n</HEAD>\n<BODY>\n<H1>$projName</H1>\n";
|
|
|
332 if (defined($index_path)) {
|
|
|
333 unless (-d $index_path) {
|
|
|
334 mkdir($index_path);
|
|
|
335 }
|
|
|
336 }
|
|
|
337 }
|
|
|
338
|
|
|
339 ###
|
|
|
340 # Phase 1, sequences through mapping to insert list
|
|
|
341 ###
|
|
|
342 if (defined($seqFile)) {
|
|
|
343 my $seqOutFn = "$envDirN/data/seqs.tab";
|
|
|
344 &pre_process_seqs(\$seqType, \$seqFile, \$seqOutFn, \$debug);
|
|
|
345 copy("$bar2libFile", "$envDirN/data/barcode2lib.txt") || die "Unable to link barcode to library file in execution environment. $!\n";
|
|
|
346 $orig_dir = &cwd;
|
|
|
347 if ($debug) { print "Starting dir: $orig_dir.\n"; }
|
|
|
348 chdir($envDirN);
|
|
|
349 if ($debug) { print "Current dir: " . &cwd . "\n"; }
|
|
|
350 open($output, "perl $envDirN/lib/TAPDANCE.pl |") || die "Unable to run TAPDANCE.pl. $!\n";
|
|
|
351 if ($debug) { while (<$output>) { print "$_"; } }
|
|
|
352 close($output);
|
|
|
353 chdir($orig_dir);
|
|
|
354 if (defined($index)) {
|
|
|
355 print $indexH "<H3>Insertion Analysis</H3>\n<P>To visualize the insertions in this project use the \"Non Redundant Inserts BED\" file in the history.\n<UL>\n";
|
|
|
356 #if (defined($projSum)) {
|
|
|
357 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_InsertsVis_hidden.pdf", "pdf", "QC graphs of inserts", 0) . "</LI>\n";
|
|
|
358 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_InsertsSummary_hidden.txt", "txt", "Summary of all inserts", 0) . "</LI>\n";
|
|
|
359 copy("$envDirN/results/summary_$projName.txt", $index_path . "/primary_" . $index_id . "_InsertsSummary_hidden.txt") || die "Unable to retrieve project summary, $envDirN/results/summary_$projName.txt. $!\n";
|
|
|
360 system("Rscript --vanilla $envDirN/lib/insert_vis.R --args $envDirN/results/lib_stats_$projName.txt $envDirN/results/region_stats_$projName.txt " . $index_path . "/primary_" . $index_id . "_InsertsVis_hidden.pdf");
|
|
|
361 #copy("$envDirN/results/summary_$projName.txt", "$projSum") || die "Unable to retrieve project summary, $envDirN/results/summary_$projName.txt. $!\n";
|
|
|
362 print $indexH "</UL>\n</P>\n";
|
|
|
363 }
|
|
|
364 #if (defined($projVis)) {
|
|
|
365 #system("Rscript --vanilla $envDirN/lib/insert_vis.R --args $envDirN/results/lib_stats_$projName.txt $envDirN/results/region_stats_$projName.txt $projVis");
|
|
|
366 #}
|
|
|
367 $copy_ins_files=1;
|
|
|
368 if ($debug) { print "TAPDANCE.pl done.\n"; }
|
|
|
369 }
|
|
|
370
|
|
|
371 ###
|
|
|
372 # Phase 4, merge projects
|
|
|
373 ###
|
|
|
374 if (defined($merge)) {
|
|
|
375 open(my $meta_tab, ">", "$envDirN/data/meta.tab") || die "Unable to write project merge list to $envDirN/data/meta.tab: $!\n";
|
|
|
376 my @merge_projs = split(',', $merge);
|
|
|
377 foreach my $merge_proj (@merge_projs) {
|
|
|
378 print $meta_tab "$merge_proj\n";
|
|
|
379 }
|
|
|
380 close($meta_tab);
|
|
|
381 $orig_dir = &cwd;
|
|
|
382 if ($debug) { print "Starting dir: $orig_dir.\n" }
|
|
|
383 chdir($envDirN);
|
|
|
384 open($output, "perl $envDirN/lib/TAP4.pl |") || die "Unable to run TAP4.pl. $!\n";
|
|
|
385 if ($debug) { while (<$output>) { print "$_"; } }
|
|
|
386 close($output);
|
|
|
387 chdir($orig_dir);
|
|
|
388 $copy_ins_files=1;
|
|
|
389 if ($debug) { print "TAP4.pl done.\n"; }
|
|
|
390 }
|
|
|
391
|
|
|
392 ###
|
|
|
393 # Copy insert files
|
|
|
394 ###
|
|
|
395 if ($copy_ins_files) {
|
|
3
|
396 if (defined($projBed)) {
|
|
|
397 copy("$envDirN/results/raw_$projName.BED", "$projBed") || die "Unable to retrieve project BED, $envDirN/results/raw_$projName.BED. $!\n";
|
|
|
398 }
|
|
0
|
399 if (defined($index)) {
|
|
|
400 #print $indexH "<A HREF=\"primary_" . $index_id . "_InsertsBED_hidden_bed?preview=true\">A BED containing all inserts</A><BR>\n";
|
|
|
401 copy("$envDirN/results/raw_$projName.BED", $index_path . "/primary_" . $index_id . "_InsertsBED_visible_bed") || die "Unable to retrieve project BED, $envDirN/results/raw_$projName.BED. $!\n";
|
|
|
402 }
|
|
|
403 if ($debug) { print "Files copied.\n"; }
|
|
|
404 }
|
|
|
405
|
|
|
406 ###
|
|
|
407 # Phase 2, calculate CISes
|
|
|
408 ###
|
|
|
409 if (!$no_cis) { #defined($cisWig) && defined($cisSum)) {
|
|
|
410 #Fill chromo tab for phase two
|
|
|
411 open(my $chromoTabH, ">", $envDirN . "/data/chromo.tab") || die "Unable to open chromo tab $envDirN/data/chromo.tab: $!\n";
|
|
|
412 if (defined($metadata)) {
|
|
|
413 copy("$metadata", "$envDirN/data/metadata.tab") || die "Unable to copy provided metadata, $metadata. $!\n";
|
|
|
414 }
|
|
|
415 foreach (@omitChrom) { print $chromoTabH "$_\n"; }
|
|
|
416 close ($chromoTabH);
|
|
|
417 if ($debug) { print "Omitted chromosomes written.\n"; }
|
|
|
418
|
|
|
419 my ($metadataTabH, $barcodeInH);
|
|
|
420 my %metadata_attrs = ();
|
|
|
421 my @map;
|
|
|
422 if (!defined($metadata) && defined($bar2libFile)) {
|
|
|
423 open($metadataTabH, ">", $envDirN . "/data/metadata.tab") || die "Unable to open chromo tab $envDirN/data/metadata.tab: $!\n";
|
|
|
424 open($barcodeInH, "<", "$envDirN/data/barcode2lib.txt") || die "Unable to open barcode to library mapping, $bar2libFile: $!\n";
|
|
|
425 my ($idx, $lib_name);
|
|
|
426 while (<$barcodeInH>) {
|
|
|
427 chomp;
|
|
|
428 @map = split("\t", $_);
|
|
|
429 $map[1] =~ s/^\s+//;
|
|
|
430 $map[1] =~ s/\s+$//;
|
|
|
431 $map[1] =~ m/(.*)-[L|R]/;
|
|
|
432 $lib_name = $1;
|
|
|
433 print $metadataTabH join("\t", $lib_name, "all", "cis") . "\n";
|
|
|
434 for ($idx = 3; $idx <= $#map; $idx++) {
|
|
|
435 $map[$idx] =~ s/^\s+//;
|
|
|
436 $map[$idx] =~ s/\s+$//;
|
|
|
437 print $metadataTabH join("\t", $lib_name, $map[$idx], "cis") . "\n";
|
|
|
438 $metadata_attrs{$map[$idx]} = 1;
|
|
|
439 }
|
|
|
440 }
|
|
|
441 close($barcodeInH);
|
|
|
442 close($metadataTabH);
|
|
|
443 $meta_gen = 1;
|
|
|
444 if ($debug) { print "Metadata written.\n"; }
|
|
|
445 }
|
|
|
446 else {
|
|
|
447 open(my $metadata_file, "<", $envDirN . "/data/metadata.tab") || die "Unable to open meta, $!\n";
|
|
|
448 while(<$metadata_file>) {
|
|
|
449 chomp;
|
|
|
450 @map = split("\t", $_);
|
|
|
451 if (uc $map[2] eq "CIS" && uc $map[1] ne "ALL") {
|
|
|
452 $metadata_attrs{$map[1]} = 1;
|
|
|
453 }
|
|
|
454 }
|
|
|
455 close($metadata_file);
|
|
|
456 }
|
|
|
457
|
|
|
458 #mkdir("$envDirN/CIS"); # || die "Unable to create lib. $!\n";
|
|
|
459
|
|
|
460 #if ($debug) { print "Created $envDirN/CIS\n"; }
|
|
|
461
|
|
|
462 $orig_dir = &cwd;
|
|
|
463 if ($debug) { print "Starting dir: $orig_dir.\n" }
|
|
|
464 chdir($envDirN);
|
|
|
465 open($output, "perl ./lib/TAP2.pl |") || die "Unable to run TAP2.pl. $!\n";
|
|
|
466 if ($debug) { while (<$output>) { print "$_"; } }
|
|
|
467 close($output);
|
|
|
468 chdir($orig_dir);
|
|
|
469 if ($debug) { print "TAP2.pl run.\n"; }
|
|
|
470
|
|
|
471 if ($debug) { print "TAP2.pl done.\n"; }
|
|
|
472 if (defined($index)) {
|
|
|
473 print $indexH "<H3>CIS calls</H3>\n<P>To Visualize the CIS Calls, use the \"CIS WIG\" history entry. Each metadata tag that generated it's own CIS calls has it's own WIG file in the history as \"CIS WIG (tag)\".\n<UL>\n";
|
|
|
474 #if (defined($cisSum)) {
|
|
|
475 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_CISSummary_hidden.txt", "txt", "Summary of all CIS calls", 0) . "</LI>\n";
|
|
|
476 copy("$envDirN/results/summary_CIS_$projName.txt", $index_path . "/primary_" . $index_id . "_CISSummary_hidden.txt") || die "Unable to retrieve CIS summary, $envDirN/results/cis_summary.txt. $!\n";
|
|
|
477 #print $indexH "<A HREF=\"primary_" . $index_id . "_CISWIG_visible_wig\">WIG of all CIS calls</A><BR>\n";
|
|
|
478 #copy($envDirN . "/results/all/plot_all-nr-" . $projName . "-" . $library_percent . ".wig", $index_path . "/primary_" . $index_id . "_CISWIG_visible_wig") || die "Unable to retrieve CIS WIG, $envDirN/results/all/plot_all-nr-$projName-$library_percent.wig. $!\n";
|
|
|
479 copy($envDirN . "/results/all/plot_all-nr-" . $projName . "-" . $library_percent . ".wig", $cisWig) || die "Unable to retrieve CIS WIG, $envDirN/results/all/plot_all-nr-$projName-$library_percent.wig. $!\n";
|
|
|
480 print $indexH "<UL>\n";
|
|
|
481 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_Ann_hidden.txt", "txt", "Ann.txt", 0) . "</LI>\n";
|
|
|
482 copy("$envDirN/results/Assoc/Ann.txt", $index_path . "/primary_" . $index_id . "_Ann_hidden.txt") || die "Unable to retrieve Ann.txt. $!\n";
|
|
|
483 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_Cis_hidden.txt", "txt", "Cis.txt", 0) . "</LI>\n";
|
|
|
484 copy("$envDirN/results/Assoc/Cis.txt", $index_path . "/primary_" . $index_id . "_Cis_hidden.txt") || die "Unable to retrieve Cis.txt. $!\n";
|
|
|
485 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnAnnTable_hidden.txt", "txt", "Ann_Ann_table.txt", 0) . "</LI>\n";
|
|
|
486 copy("$envDirN/results/Assoc/Ann_Ann_table.xls", $index_path . "/primary_" . $index_id . "_AnnAnnTable_hidden.txt") || die "Unable to retrieve Ann_Ann_table.xls. $!\n";
|
|
|
487 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnAnnMatrix_hidden.txt", "txt", "Ann_ann_matrix.txt", 0) . "</LI>\n";
|
|
|
488 copy("$envDirN/results/Assoc/Ann_ann_matrix.txt", $index_path . "/primary_" . $index_id . "_AnnAnnMatrix_hidden.txt") || die "Unable to retrieve Ann_ann_matrix.txt. $!\n";
|
|
|
489 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnCisTable_hidden.txt", "txt", "Ann_cis_table.xls", 0) . "</LI>\n";
|
|
|
490 copy("$envDirN/results/Assoc/Ann_cis_table.xls", $index_path . "/primary_" . $index_id . "_AnnCisTable_hidden.txt") || die "Unable to retrieve Ann_cis_table.xls. $!\n";
|
|
|
491 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnCisMatrix_hidden.txt", "txt", "Ann_cis_matrix.txt", 0) . "</LI>\n";
|
|
|
492 copy("$envDirN/results/Assoc/Ann_cis_matrix.txt", $index_path . "/primary_" . $index_id . "_AnnCisMatrix_hidden.txt") || die "Unable to retrieve Ann_cis_matrix.txt. $!\n";
|
|
|
493 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_CisCisTable_hidden.txt", "txt", "Cis_cis_table.xls", 0) . "</LI>\n";
|
|
|
494 copy("$envDirN/results/Assoc/Cis_cis_table.xls", $index_path . "/primary_" . $index_id . "_CisCisTable_hidden.txt") || die "Unable to retrieve Cis_cis_table.xls. $!\n";
|
|
|
495 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_CisCisMatrix_hidden.txt", "txt", "Cis_cis_matrix.txt", 0) . "</LI>\n";
|
|
|
496 copy("$envDirN/results/Assoc/Cis_cis_matrix.txt", $index_path . "/primary_" . $index_id . "_CisCisMatrix_hidden.txt") || die "Unable to retrieve Cis_cis_matrix.txt. $!\n";
|
|
|
497 print $indexH "</UL>\n</UL>\n</P>\n";
|
|
|
498 #copy("$envDirN/results/summary_CIS_$projName.txt", "$cisSum") || die "Unable to retrieve CIS summary, $envDirN/results/cis_summary.txt. $!\n";
|
|
|
499 #}
|
|
|
500 #if (defined($cisWig)) {
|
|
|
501 #copy("$envDirN/results/all/plot_all-nr-$projName-$library_percent.wig", "$cisWig") || die "Unable to retrieve CIS WIG, $envDirN/results/all/plot_all-nr-$projName-$library_percent.wig. $!\n";
|
|
|
502 #}
|
|
|
503 #if (defined($cisWigId) && defined($cisWigPath)) {
|
|
|
504 #my $count;
|
|
|
505 my $filesize;
|
|
|
506 foreach my $tag (keys %metadata_attrs) {
|
|
|
507 #open(FILE, "< $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig") or die "can't open $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig: $!";
|
|
|
508 #for ($count=0; <FILE>; $count++) { }
|
|
|
509 #if ($count > 1) {
|
|
|
510 if (-e "$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig") {
|
|
|
511 $filesize = -s "$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig";
|
|
|
512 if ($filesize > 0) {
|
|
|
513 #print $indexH "<A HREF=\"primary_" . $index_id . "_" . $tag . "_visible_wig\">Summary of CIS calls for libraries with the " . $tag . " label</A><BR>\n";
|
|
|
514 #copy("$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig", $index_path . "/primary_" . $index_id . "_" . $tag . "_visible_wig") || die "Unable to retrieve CIS WIG, $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig. $!\n";
|
|
|
515 copy("$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig", "$cisWigPath/primary_" . $cisWigId . "_" . $tag . "_visible_wig") || die "Unable to retrieve CIS WIG, $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig. $!\n";
|
|
|
516 }
|
|
|
517 }
|
|
|
518 }
|
|
|
519 }
|
|
|
520 if ($debug) { print "Files copied.\n"; }
|
|
|
521 }
|
|
|
522
|
|
|
523 if (defined($index)) {
|
|
|
524 print $indexH "<P>To add files to your history for further processing in Galaxy, right-click the link and select \"Copy Link URL\". Open the \"Get Data\" menu in the \"Tools\" sidebar and open the \"Upload File\" link. Paste the copied URL in the \"URL/Text\" box.</P>\n.";
|
|
|
525 print $indexH "<H3>Generated configuration files</H3>\n<UL>\n";
|
|
|
526 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_ConfigPl_hidden.txt", "txt", "Project configuration", 0) . "</LI>\n";
|
|
|
527 copy("$envDirN/config.pl", $index_path . "/primary_" . $index_id . "_ConfigPl_hidden.txt") || die "Unable to retrieve config.pl. $!\n";
|
|
|
528 if ($meta_gen) {
|
|
|
529 print $indexH "<LI>" . &link_file("config.pl", "txt", "Project configuration", 0) . "</LI>\n";
|
|
|
530 copy("$envDirN/config.pl", $index_path . "/primary_" . $index_id . "_ConfigPl_hidden.txt") || die "Unable to retrieve config.pl. $!\n";
|
|
|
531 }
|
|
|
532 print $indexH "</UL>\n</BODY>\n</HTML>\n";
|
|
|
533 close($indexH);
|
|
|
534 }
|
|
|
535
|
|
|
536 exit(0);
|
|
|
537
|
|
|
538 sub determine_seq_input_type {
|
|
|
539 my ($input_fn_ref, $config_fh_ref) = @_;
|
|
|
540 open(INPUT, "<", ${$input_fn_ref}) || die "Unable to open input file ${$input_fn_ref}, $!\n";
|
|
|
541 my $first_line = <INPUT>;
|
|
|
542 close (INPUT);
|
|
|
543 if ($first_line=~/^@/) {
|
|
|
544 print ${$config_fh_ref} sprintf("\$quality = 1;\n");
|
|
|
545 return "fastq";
|
|
|
546 }
|
|
|
547 elsif ($first_line=~/^>/) {
|
|
|
548 print ${$config_fh_ref} sprintf("\$quality = 0;\n");
|
|
|
549 return "fasta";
|
|
|
550 }
|
|
|
551 else {
|
|
|
552 my @split_array = split("\t", $first_line);
|
|
|
553 if ($#split_array > 0) {
|
|
|
554 return "tab";
|
|
|
555 }
|
|
|
556 else {
|
|
|
557 die "Unable to determine sequence input value type (fastq|fasta|tabular)\n";
|
|
|
558 }
|
|
|
559 }
|
|
|
560 }
|
|
|
561
|
|
|
562 sub pre_process_seqs {
|
|
|
563 my ($seq_type_ref, $in_file_ref, $out_fn_ref, $debug_ref) = @_;
|
|
|
564 #FASTQ
|
|
|
565 if (${$seq_type_ref} eq "fastq") {
|
|
|
566 if (${$debug_ref}) { print "FASTQ\n"; }
|
|
|
567 open (my $out_fh, ">", ${$out_fn_ref}) || die "Unable to open ${$out_fn_ref}, $!\n";
|
|
|
568 &process_fastq($in_file_ref, \&fastq_entry, \$out_fh);
|
|
|
569 close($out_fh);
|
|
|
570 }
|
|
|
571
|
|
|
572 #FASTA
|
|
|
573 elsif (${$seq_type_ref} eq "fasta") {
|
|
|
574 if (${$debug_ref}) { print "FASTA\n"; }
|
|
|
575 open (my $out_fh, ">", ${$out_fn_ref}) || die "Unable to open ${$out_fn_ref}, $!\n";
|
|
|
576 &process_fasta($in_file_ref, \&fasta_entry, \$out_fh);
|
|
|
577 close($out_fh);
|
|
|
578 }
|
|
|
579
|
|
|
580 #Tab, no quality info
|
|
|
581 else {
|
|
|
582 if (${$debug_ref}) { print "TABULAR\n"; }
|
|
|
583 copy (${$in_file_ref}, ${$out_fn_ref}) || die "Unable to copy seq file to execution environment. $!\n";
|
|
|
584 #open($output, "ln -s ${$in_file_ref} ${$out_fn_ref} |") || die "Unable to link seq file in execution environment. $!\n";
|
|
|
585 #if (${$debug_ref}) { while (<$output>) { print "$_"; } }
|
|
|
586 #close($output);
|
|
|
587 }
|
|
|
588 }
|
|
|
589
|
|
|
590 sub fasta_entry {
|
|
|
591 my ($seq_id_ref, $seq_ref, $array_ref) = @_;
|
|
|
592 print "fasta_entry(" . join(",", ${$seq_id_ref}, ${$seq_ref}) . ")\n";
|
|
|
593 #print ${$array_ref->[0]} join("\t", ${$seq_id_ref}, "", ${$seq_ref}) . "\n";
|
|
|
594 #my $seq_qual = "";
|
|
|
595 #for(my $i = 0; $i < length(${$seq_ref}); $i++) { $seq_qual = $seq_qual . 'h'; }
|
|
|
596 #print sprintf("Fasta_entry: length of sequence:%s length of quality:%s", length(${$seq_id_ref}), length($seq_qual));
|
|
|
597 print ${$array_ref->[0]} join("\t", ${$seq_id_ref}, "", ${$seq_ref});
|
|
|
598 ${$seq_ref} =~ s/[A,C,T,G]/I/g;
|
|
|
599 ${$seq_ref} =~ s/N/!/g;
|
|
|
600 print ${$array_ref->[0]} sprintf("\t%s\n", ${$seq_ref});
|
|
|
601 }
|
|
|
602
|
|
|
603 sub fastq_entry {
|
|
|
604 my ($seq_id_ref, $seq_ref, $seq_qual, $array_ref) = @_;
|
|
|
605 #print "fastq_entry(" . join(",", ${$seq_id_ref}, ${$seq_ref}, ${$seq_qual}) . ")\n";
|
|
|
606 print ${$array_ref->[0]} join("\t", ${$seq_id_ref}, "", ${$seq_ref}, ${$seq_qual}) . "\n";
|
|
|
607 }
|
|
|
608
|
|
|
609 sub lib_copy {
|
|
|
610 unless(-d $File::Find::name) {
|
|
|
611 copy($File::Find::name, "$envDirN/lib") || die "Unable to copy $File::Find::name to $envDirN/lib. $!\n";
|
|
|
612 }
|
|
|
613 }
|
|
|
614
|
|
|
615 sub link_file {
|
|
|
616 my ($file_name, $file_type, $link_text, $download) = @_;
|
|
|
617 my $out = "<A HREF=\"" . $file_name . "\">" . $link_text . "</A>";
|
|
|
618 if ($download) {
|
|
|
619 $out = $out . " [<A HREF=\"" . $file_name . "/display?to_ext=" . $file_type . "\">Download</A>]";
|
|
|
620 }
|
|
|
621 return $out;
|
|
|
622 }
|