annotate tapdance_runner.pl @ 3:17ce4f3bffa2 default tip

Uploaded
author jesse-erdmann
date Tue, 24 Jan 2012 18:33:41 -0500
parents 1437a2df99c0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
1 #!/project/bioperl/perl-5.10.1-sles11/bin/perl -w
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
2 #
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
3 #------------------------------------------------------------------------------
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
4 # University of Minnesota
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
5 # Copyright 2010 - 2011, Regents of the University of Minnesota
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
6 #------------------------------------------------------------------------------
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
7 # Author:
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
8 #
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
9 # Jesse Erdmann
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
10 #
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
11 # POD documentation
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
12 #------------------------------------------------------------------------------
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
13 =pod BEGIN
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
14
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
15 =head1 NAME
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
16
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
17 tapdance_runner.pl - TAPDANCE wrapper that provides a single interface to all
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
18 TAPDANCE functionality.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
19
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
20 =head1 SYNOPSIS
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
21
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
22 tapdance_runner.pl [-help]
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
23
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
24 See http://sf.net/p/tapdancebio for full documentation
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
25
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
26 =head1 OPTIONS
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
27
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
28 =over 6
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
29
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
30 =item B<-help>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
31
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
32 Print this usage summary.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
33
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
34 =item B<-seqFile sequence_file>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
35
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
36 The sequences to be processed for insertions.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
37
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
38 =item B<-bar2libFile barcode_to_library_mapping_file>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
39
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
40 A tab delimited file where each line contains the barcode and name of a
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
41 library. Additionally, columns after the second column will be treated as
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
42 metadata tags to be associated with the library.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
43
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
44 =item B<-baseConfig custom_config>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
45
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
46 OPTIONAL. If there is a custom tapdance_base_config.txt to be used in
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
47 special cases, use this parameter to specify it's use. An example where
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
48 this might be useful is the case where distinct groups of users are using
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
49 the same TAPDANCE installation, but separate mutagens.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
50
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
51 =item B<-config predefined_config_file>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
52
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
53 A configuration file may be used rather than specify options on
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
54 the command line. Any options specified in the base config file
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
55 will be overriden by values specified in this config file.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
56
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
57 =item B<-db_config database_configuration_file>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
58
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
59 Use this option if the database configuration needs to be kept separate
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
60 from other configuration information. This is most useful in Galaxy
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
61 where the end user should not have the database user credentials
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
62 exposed to them.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
63
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
64 =item B<-bowtieIdx reference_genome>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
65
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
66 The name of the bowtie index to use for aligning individual sequences. This
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
67 is only used during the first phase of TAPDANCE. It is important to note that
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
68 the index name is not a single file. For instance, the mm9 index has several
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
69 files name mm9.[0-9].ebwt and mm9.rev.[0-9].ebwt. However, the correct
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
70 value for this parameter would be /my/path/to/indexes/mm9
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
71
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
72 =item B<-mutagen mutagen_sequence>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
73
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
74 The sequence to match determining whether the mutagen of interest is present.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
75 Any sequences not matching this sequence will not be used in the analysis
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
76 while those that do will have the mutagen trimmed prior to alignment. If, for
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
77 instance the mutagen for a particular project has a sequence of ACTG, but the
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
78 user also wanted to remove up two bases following the mutagen sequence the
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
79 wildcard character '_' can be used to specify a mutagen sequence of ACTG__.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
80
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
81 Any number of mutagen sequences may be specified by entering multiple -mutagen
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
82 entries on the command line. E.G. perl tapdance_runner.pl -mutagen ACGT
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
83 -mutagen TGCA. This is useful when a mutagen has more than one common captured
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
84 sequence in the data.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
85
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
86 =item B<-projectName project_name>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
87
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
88 A name for the project, up to 255 chars.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
89
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
90 =item B<-omittedChromosomes chromosomes_to_omit>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
91
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
92 It can be useful to remove the chromosome of the donor concatamer from the
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
93 calulations to remove the effects of local hopping for some projects. The
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
94 chromosomes can be specified as a comma delimited list and must match the
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
95 names used in the reference genome. E.G -omittedChromosomes chr1
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
96 -omittedChromosomes chr2
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
97
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
98 =item B<-output_dir location_to_write_to>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
99
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
100 The location where execution will be performed
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
101
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
102 DEFAULT:'./'
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
103
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
104 =item B<-metadata library_metadata>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
105
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
106 OPTIONAL. To specify metadata on libraries outside of the barcode to library
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
107 mapping file, this parameter may be used. The file should contain the name of
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
108 the library in one column and the metadata tag to affiliate with it in the
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
109 second. Each library may have as many entries as needed.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
110
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
111 =item B<-lib_pct library_percent>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
112
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
113 =item B<-CIS_tot_p CIS_total_pvalue>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
114
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
115 =item B<-CIS_lib_p CIS_library_pvalue>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
116
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
117 =item B<-CIS_reg_p CIS_region_pvalue>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
118
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
119 =item B<-coCIS_thresh cocis_threshold>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
120
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
121 =item B<-merge merge>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
122
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
123 Specify projects to be merged as the new project specified with -project_name.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
124 E.G. -merge my_first_project -merge my_second_project -project_name
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
125 my_merged_project.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
126
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
127 =item B<-annotation annotation_file>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
128
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
129 Specify the bed file to annotate CISes with. The default feature set is USCS's
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
130 mm9 refSeq genes.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
131
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
132 =item B<-no_cis>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
133
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
134 To generate a list of inserts only, specify no_cis. This is useful in cases
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
135 where a new set of data needs to be merged with a previous set of data. Use
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
136 this option as a first step to prepare the new data. Use -merge to combine
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
137 the resulting projects and call CISes on the new project.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
138
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
139 =item B<-seqType seqfile_format>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
140
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
141 OPTIONAL. If not specified, TAPDANCE will attempt to identify the input file
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
142 type on it's own. Valid options are 'tab', 'fasta' and 'fastq'.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
143
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
144 =item B<-debug>
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
145
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
146 OPTIONAL.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
147
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
148 =back
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
149
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
150 =cut
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
151
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
152 #### END of POD documentation.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
153 #-----------------------------------------------------------------------------
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
154
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
155 use strict;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
156 use Cwd;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
157 use Getopt::Long;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
158 use File::Copy;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
159 use File::Find;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
160 use File::Temp qw/ tempfile tempdir /;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
161 use Pod::Usage;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
162
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
163 #tapdance_runner.pl -s $seqs -b $bar2lib -g $genomeIdx -pn $projName -o $omitChrom -pb $projBed -ps $projSum -cb $cisBed -cs $cisSum -bc $baseConfig
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
164
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
165 my $dbh;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
166 my $path = $0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
167 $path =~ s/\/\w*\.pl$//g;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
168 require "$path/lib/tapdance_base_config.pl";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
169 require "$path/util.pl";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
170
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
171 #Universal variables
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
172 my ($seqFile, $seqType, $bar2libFile, $bowtieIdx, $bwaIdx, $projName, @omitChrom, $baseConfig, @mutagens_array, $metadata, $merge, $preconfig_file, $library_percent, $CIS_total_pvalue, $CIS_library_pvalue, $CIS_region_pvalue, $cocis_threshold, $annotation_file, $db_config);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
173 my $no_cis = 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
174
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
175 #CMD line variables
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
176 my ($debug, $output_dir, $noUnlink, $help_flag);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
177
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
178 #Galaxy variables
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
179 my ($index, $index_id, $index_path, $projBed, $projBedId, $projSum, $projSumId, $projVis, $projVisId, $cisWig, $cisWigId, $cisWigPath, $cisSum, $cisSumId, $tmpDir);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
180
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
181
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
182 my %options = (
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
183 #Universal Variables
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
184 "seqFile|s=s" => \$seqFile,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
185 "seqType|st=s" => \$seqType,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
186 "bar2libFile|b=s" => \$bar2libFile,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
187 "bowtieIdx=s" => \$bowtieIdx,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
188 #"bwaIdx=s" => \$bwaIdx,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
189 "projectName|pn=s" => \$projName,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
190 "omittedChromosomes|o=s" => \@omitChrom,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
191 "baseConfig|bc=s" => \$baseConfig,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
192 "metadata|m=s" => \$metadata,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
193 "mutagen=s" => \@mutagens_array,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
194 "lib_pct=f" => \$library_percent,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
195 "CIS_tot_p=f" => \$CIS_total_pvalue,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
196 "CIS_lib_p=f" => \$CIS_library_pvalue,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
197 "CIS_reg_p=f" => \$CIS_region_pvalue,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
198 "coCIS_thresh=f" => \$cocis_threshold,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
199 "merge=s" => \$merge,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
200 "annotation=s" => \$annotation_file,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
201 "config=s" => \$preconfig_file,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
202 "db_config=s" => \$db_config,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
203 "no_cis" => \$no_cis,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
204
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
205 #CMD Line Variables
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
206 "help" => \$help_flag,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
207 "output_dir=s" => \$output_dir,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
208 "debug|d" => \$debug,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
209
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
210 #Galaxy Variables
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
211 "index=s" => \$index,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
212 "index_id=s" => \$index_id,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
213 "index_path=s" => \$index_path,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
214 "projectBed|pb=s" => \$projBed,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
215 "projectBedId=s" => \$projBedId,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
216 "cisWig|cw=s" => \$cisWig,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
217 "cisWigId|cwid=s" => \$cisWigId,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
218 "cisWigPath|cwpath=s" => \$cisWigPath,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
219 "tmp_dir|t=s" => \$tmpDir,
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
220 "no_unlink" => \$noUnlink
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
221 );
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
222
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
223 GetOptions(%options) or pod2usage(2);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
224 pod2usage(1) if $help_flag;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
225
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
226 $projName = &sanitize_project($projName);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
227 my $meta_gen = 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
228
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
229 my $envDirN;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
230 if (defined($output_dir)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
231 $envDirN = $output_dir;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
232 unless (-d $output_dir) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
233 mkdir ($output_dir);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
234 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
235 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
236 elsif (defined($tmpDir)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
237 if ($noUnlink) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
238 $envDirN = tempdir(DIR => $tmpDir);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
239 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
240 else {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
241 $envDirN = tempdir(DIR => $tmpDir, UNLINK => 1);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
242 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
243 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
244 else {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
245 if ($noUnlink) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
246 $envDirN = tempdir();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
247 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
248 else {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
249 $envDirN = tempdir(UNLINK => 1);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
250 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
251 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
252 if ($debug) { print "EnvDir = $envDirN\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
253
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
254 if (!defined($baseConfig)) { $baseConfig = "$path/lib"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
255 open(my $baseConfigH, "<", $baseConfig . "/tapdance_base_config.pl") || die "Unable to open $baseConfig: $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
256 open(my $envConfigH, ">", $envDirN . "/config.pl") || die "Unable to open environment $envDirN/config.pl: $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
257
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
258 if (defined($db_config)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
259 print $envConfigH "require '" . $envDirN . "/" . $db_config . "';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
260 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
261
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
262 # Copy system defaults first, overwrite as needed
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
263 while (<$baseConfigH>) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
264 print $envConfigH $_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
265 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
266 close($baseConfigH);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
267
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
268 if (defined($preconfig_file)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
269 open(my $preConfigH, "<", $preconfig_file) || die "Unable to open input configuration file $preconfig_file. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
270 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
271 else {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
272 print $envConfigH "#Project specific custom values, will override values set above\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
273 print $envConfigH "\$proj = '$projName';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
274 print $envConfigH "\$envDir = '$envDirN';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
275 if (defined($library_percent)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
276 print $envConfigH "\$library_percent = '$library_percent';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
277 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
278 if (defined($CIS_total_pvalue)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
279 print $envConfigH "\$CIS_total_pvalue = '$CIS_total_pvalue';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
280 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
281 if (defined($CIS_library_pvalue)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
282 print $envConfigH "\$CIS_library_pvalue = '$CIS_library_pvalue';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
283 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
284 if (defined($CIS_region_pvalue)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
285 print $envConfigH "\$CIS_region_pvalue = '$CIS_region_pvalue';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
286 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
287 if (defined($cocis_threshold)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
288 print $envConfigH "\$cocis_threshold = '$cocis_threshold';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
289 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
290 if (defined($bowtieIdx && $bwaIdx)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
291 print $envConfigH "\$bwa_exe = 'bwa';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
292 print $envConfigH "\$bowtie_exe = 'bowtie --quiet';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
293 print $envConfigH "\$bwa_idx = '$bwaIdx';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
294 print $envConfigH "\$bowtie_idx = '$bowtieIdx';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
295 print $envConfigH "\$aligner = 'bow_bwa';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
296 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
297 elsif (defined($bowtieIdx)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
298 print $envConfigH "\$bowtie_exe = 'bowtie --quiet';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
299 print $envConfigH "\$bowtie_idx = '$bowtieIdx';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
300 print $envConfigH "\$aligner = 'bowtie';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
301 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
302 elsif (defined($bwaIdx)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
303 print $envConfigH "\$bwa_exe = 'bwa';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
304 print $envConfigH "\$bwa_idx = '$bwaIdx';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
305 print $envConfigH "\$aligner = 'bwa';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
306 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
307
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
308 if ($#mutagens_array >= 0 && length($mutagens_array[0]) > 0) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
309 print $envConfigH "\$mutagens = '" . join(",", @mutagens_array) . "';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
310 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
311 if (defined($annotation_file)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
312 print $envConfigH "\$annotation_file ='" . $annotation_file . "';\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
313 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
314 if (!defined($seqType) && defined($seqFile)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
315 $seqType = &determine_seq_input_type(\$seqFile, \$envConfigH);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
316 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
317 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
318 print $envConfigH "return 1;\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
319 close($envConfigH);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
320
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
321 my ($output, $orig_dir);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
322 mkdir ("$envDirN/data"); # || die "Unable to create data dir, $envDirN/data. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
323 mkdir("$envDirN/lib"); # || die "Unable to create lib. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
324 my @lib_source = ($path . "/lib/");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
325 find(\&lib_copy, @lib_source);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
326
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
327 my $copy_ins_files = 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
328 my $indexH;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
329 if (defined($index)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
330 open ($indexH, ">", $index) || die "Unable to open $index for writing: $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
331 print $indexH "<HTML>\n<HEAD>\n<TITLE>$projName Results</TITLE>\n</HEAD>\n<BODY>\n<H1>$projName</H1>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
332 if (defined($index_path)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
333 unless (-d $index_path) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
334 mkdir($index_path);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
335 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
336 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
337 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
338
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
339 ###
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
340 # Phase 1, sequences through mapping to insert list
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
341 ###
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
342 if (defined($seqFile)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
343 my $seqOutFn = "$envDirN/data/seqs.tab";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
344 &pre_process_seqs(\$seqType, \$seqFile, \$seqOutFn, \$debug);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
345 copy("$bar2libFile", "$envDirN/data/barcode2lib.txt") || die "Unable to link barcode to library file in execution environment. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
346 $orig_dir = &cwd;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
347 if ($debug) { print "Starting dir: $orig_dir.\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
348 chdir($envDirN);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
349 if ($debug) { print "Current dir: " . &cwd . "\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
350 open($output, "perl $envDirN/lib/TAPDANCE.pl |") || die "Unable to run TAPDANCE.pl. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
351 if ($debug) { while (<$output>) { print "$_"; } }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
352 close($output);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
353 chdir($orig_dir);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
354 if (defined($index)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
355 print $indexH "<H3>Insertion Analysis</H3>\n<P>To visualize the insertions in this project use the \"Non Redundant Inserts BED\" file in the history.\n<UL>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
356 #if (defined($projSum)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
357 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_InsertsVis_hidden.pdf", "pdf", "QC graphs of inserts", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
358 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_InsertsSummary_hidden.txt", "txt", "Summary of all inserts", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
359 copy("$envDirN/results/summary_$projName.txt", $index_path . "/primary_" . $index_id . "_InsertsSummary_hidden.txt") || die "Unable to retrieve project summary, $envDirN/results/summary_$projName.txt. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
360 system("Rscript --vanilla $envDirN/lib/insert_vis.R --args $envDirN/results/lib_stats_$projName.txt $envDirN/results/region_stats_$projName.txt " . $index_path . "/primary_" . $index_id . "_InsertsVis_hidden.pdf");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
361 #copy("$envDirN/results/summary_$projName.txt", "$projSum") || die "Unable to retrieve project summary, $envDirN/results/summary_$projName.txt. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
362 print $indexH "</UL>\n</P>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
363 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
364 #if (defined($projVis)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
365 #system("Rscript --vanilla $envDirN/lib/insert_vis.R --args $envDirN/results/lib_stats_$projName.txt $envDirN/results/region_stats_$projName.txt $projVis");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
366 #}
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
367 $copy_ins_files=1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
368 if ($debug) { print "TAPDANCE.pl done.\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
369 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
370
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
371 ###
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
372 # Phase 4, merge projects
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
373 ###
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
374 if (defined($merge)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
375 open(my $meta_tab, ">", "$envDirN/data/meta.tab") || die "Unable to write project merge list to $envDirN/data/meta.tab: $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
376 my @merge_projs = split(',', $merge);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
377 foreach my $merge_proj (@merge_projs) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
378 print $meta_tab "$merge_proj\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
379 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
380 close($meta_tab);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
381 $orig_dir = &cwd;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
382 if ($debug) { print "Starting dir: $orig_dir.\n" }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
383 chdir($envDirN);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
384 open($output, "perl $envDirN/lib/TAP4.pl |") || die "Unable to run TAP4.pl. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
385 if ($debug) { while (<$output>) { print "$_"; } }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
386 close($output);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
387 chdir($orig_dir);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
388 $copy_ins_files=1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
389 if ($debug) { print "TAP4.pl done.\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
390 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
391
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
392 ###
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
393 # Copy insert files
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
394 ###
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
395 if ($copy_ins_files) {
3
17ce4f3bffa2 Uploaded
jesse-erdmann
parents: 0
diff changeset
396 if (defined($projBed)) {
17ce4f3bffa2 Uploaded
jesse-erdmann
parents: 0
diff changeset
397 copy("$envDirN/results/raw_$projName.BED", "$projBed") || die "Unable to retrieve project BED, $envDirN/results/raw_$projName.BED. $!\n";
17ce4f3bffa2 Uploaded
jesse-erdmann
parents: 0
diff changeset
398 }
0
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
399 if (defined($index)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
400 #print $indexH "<A HREF=\"primary_" . $index_id . "_InsertsBED_hidden_bed?preview=true\">A BED containing all inserts</A><BR>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
401 copy("$envDirN/results/raw_$projName.BED", $index_path . "/primary_" . $index_id . "_InsertsBED_visible_bed") || die "Unable to retrieve project BED, $envDirN/results/raw_$projName.BED. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
402 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
403 if ($debug) { print "Files copied.\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
404 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
405
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
406 ###
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
407 # Phase 2, calculate CISes
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
408 ###
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
409 if (!$no_cis) { #defined($cisWig) && defined($cisSum)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
410 #Fill chromo tab for phase two
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
411 open(my $chromoTabH, ">", $envDirN . "/data/chromo.tab") || die "Unable to open chromo tab $envDirN/data/chromo.tab: $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
412 if (defined($metadata)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
413 copy("$metadata", "$envDirN/data/metadata.tab") || die "Unable to copy provided metadata, $metadata. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
414 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
415 foreach (@omitChrom) { print $chromoTabH "$_\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
416 close ($chromoTabH);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
417 if ($debug) { print "Omitted chromosomes written.\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
418
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
419 my ($metadataTabH, $barcodeInH);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
420 my %metadata_attrs = ();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
421 my @map;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
422 if (!defined($metadata) && defined($bar2libFile)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
423 open($metadataTabH, ">", $envDirN . "/data/metadata.tab") || die "Unable to open chromo tab $envDirN/data/metadata.tab: $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
424 open($barcodeInH, "<", "$envDirN/data/barcode2lib.txt") || die "Unable to open barcode to library mapping, $bar2libFile: $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
425 my ($idx, $lib_name);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
426 while (<$barcodeInH>) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
427 chomp;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
428 @map = split("\t", $_);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
429 $map[1] =~ s/^\s+//;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
430 $map[1] =~ s/\s+$//;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
431 $map[1] =~ m/(.*)-[L|R]/;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
432 $lib_name = $1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
433 print $metadataTabH join("\t", $lib_name, "all", "cis") . "\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
434 for ($idx = 3; $idx <= $#map; $idx++) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
435 $map[$idx] =~ s/^\s+//;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
436 $map[$idx] =~ s/\s+$//;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
437 print $metadataTabH join("\t", $lib_name, $map[$idx], "cis") . "\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
438 $metadata_attrs{$map[$idx]} = 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
439 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
440 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
441 close($barcodeInH);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
442 close($metadataTabH);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
443 $meta_gen = 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
444 if ($debug) { print "Metadata written.\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
445 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
446 else {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
447 open(my $metadata_file, "<", $envDirN . "/data/metadata.tab") || die "Unable to open meta, $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
448 while(<$metadata_file>) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
449 chomp;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
450 @map = split("\t", $_);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
451 if (uc $map[2] eq "CIS" && uc $map[1] ne "ALL") {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
452 $metadata_attrs{$map[1]} = 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
453 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
454 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
455 close($metadata_file);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
456 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
457
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
458 #mkdir("$envDirN/CIS"); # || die "Unable to create lib. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
459
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
460 #if ($debug) { print "Created $envDirN/CIS\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
461
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
462 $orig_dir = &cwd;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
463 if ($debug) { print "Starting dir: $orig_dir.\n" }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
464 chdir($envDirN);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
465 open($output, "perl ./lib/TAP2.pl |") || die "Unable to run TAP2.pl. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
466 if ($debug) { while (<$output>) { print "$_"; } }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
467 close($output);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
468 chdir($orig_dir);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
469 if ($debug) { print "TAP2.pl run.\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
470
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
471 if ($debug) { print "TAP2.pl done.\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
472 if (defined($index)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
473 print $indexH "<H3>CIS calls</H3>\n<P>To Visualize the CIS Calls, use the \"CIS WIG\" history entry. Each metadata tag that generated it's own CIS calls has it's own WIG file in the history as \"CIS WIG (tag)\".\n<UL>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
474 #if (defined($cisSum)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
475 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_CISSummary_hidden.txt", "txt", "Summary of all CIS calls", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
476 copy("$envDirN/results/summary_CIS_$projName.txt", $index_path . "/primary_" . $index_id . "_CISSummary_hidden.txt") || die "Unable to retrieve CIS summary, $envDirN/results/cis_summary.txt. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
477 #print $indexH "<A HREF=\"primary_" . $index_id . "_CISWIG_visible_wig\">WIG of all CIS calls</A><BR>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
478 #copy($envDirN . "/results/all/plot_all-nr-" . $projName . "-" . $library_percent . ".wig", $index_path . "/primary_" . $index_id . "_CISWIG_visible_wig") || die "Unable to retrieve CIS WIG, $envDirN/results/all/plot_all-nr-$projName-$library_percent.wig. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
479 copy($envDirN . "/results/all/plot_all-nr-" . $projName . "-" . $library_percent . ".wig", $cisWig) || die "Unable to retrieve CIS WIG, $envDirN/results/all/plot_all-nr-$projName-$library_percent.wig. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
480 print $indexH "<UL>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
481 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_Ann_hidden.txt", "txt", "Ann.txt", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
482 copy("$envDirN/results/Assoc/Ann.txt", $index_path . "/primary_" . $index_id . "_Ann_hidden.txt") || die "Unable to retrieve Ann.txt. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
483 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_Cis_hidden.txt", "txt", "Cis.txt", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
484 copy("$envDirN/results/Assoc/Cis.txt", $index_path . "/primary_" . $index_id . "_Cis_hidden.txt") || die "Unable to retrieve Cis.txt. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
485 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnAnnTable_hidden.txt", "txt", "Ann_Ann_table.txt", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
486 copy("$envDirN/results/Assoc/Ann_Ann_table.xls", $index_path . "/primary_" . $index_id . "_AnnAnnTable_hidden.txt") || die "Unable to retrieve Ann_Ann_table.xls. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
487 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnAnnMatrix_hidden.txt", "txt", "Ann_ann_matrix.txt", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
488 copy("$envDirN/results/Assoc/Ann_ann_matrix.txt", $index_path . "/primary_" . $index_id . "_AnnAnnMatrix_hidden.txt") || die "Unable to retrieve Ann_ann_matrix.txt. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
489 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnCisTable_hidden.txt", "txt", "Ann_cis_table.xls", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
490 copy("$envDirN/results/Assoc/Ann_cis_table.xls", $index_path . "/primary_" . $index_id . "_AnnCisTable_hidden.txt") || die "Unable to retrieve Ann_cis_table.xls. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
491 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnCisMatrix_hidden.txt", "txt", "Ann_cis_matrix.txt", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
492 copy("$envDirN/results/Assoc/Ann_cis_matrix.txt", $index_path . "/primary_" . $index_id . "_AnnCisMatrix_hidden.txt") || die "Unable to retrieve Ann_cis_matrix.txt. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
493 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_CisCisTable_hidden.txt", "txt", "Cis_cis_table.xls", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
494 copy("$envDirN/results/Assoc/Cis_cis_table.xls", $index_path . "/primary_" . $index_id . "_CisCisTable_hidden.txt") || die "Unable to retrieve Cis_cis_table.xls. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
495 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_CisCisMatrix_hidden.txt", "txt", "Cis_cis_matrix.txt", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
496 copy("$envDirN/results/Assoc/Cis_cis_matrix.txt", $index_path . "/primary_" . $index_id . "_CisCisMatrix_hidden.txt") || die "Unable to retrieve Cis_cis_matrix.txt. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
497 print $indexH "</UL>\n</UL>\n</P>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
498 #copy("$envDirN/results/summary_CIS_$projName.txt", "$cisSum") || die "Unable to retrieve CIS summary, $envDirN/results/cis_summary.txt. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
499 #}
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
500 #if (defined($cisWig)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
501 #copy("$envDirN/results/all/plot_all-nr-$projName-$library_percent.wig", "$cisWig") || die "Unable to retrieve CIS WIG, $envDirN/results/all/plot_all-nr-$projName-$library_percent.wig. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
502 #}
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
503 #if (defined($cisWigId) && defined($cisWigPath)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
504 #my $count;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
505 my $filesize;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
506 foreach my $tag (keys %metadata_attrs) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
507 #open(FILE, "< $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig") or die "can't open $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig: $!";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
508 #for ($count=0; <FILE>; $count++) { }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
509 #if ($count > 1) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
510 if (-e "$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig") {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
511 $filesize = -s "$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
512 if ($filesize > 0) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
513 #print $indexH "<A HREF=\"primary_" . $index_id . "_" . $tag . "_visible_wig\">Summary of CIS calls for libraries with the " . $tag . " label</A><BR>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
514 #copy("$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig", $index_path . "/primary_" . $index_id . "_" . $tag . "_visible_wig") || die "Unable to retrieve CIS WIG, $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
515 copy("$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig", "$cisWigPath/primary_" . $cisWigId . "_" . $tag . "_visible_wig") || die "Unable to retrieve CIS WIG, $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
516 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
517 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
518 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
519 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
520 if ($debug) { print "Files copied.\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
521 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
522
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
523 if (defined($index)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
524 print $indexH "<P>To add files to your history for further processing in Galaxy, right-click the link and select \"Copy Link URL\". Open the \"Get Data\" menu in the \"Tools\" sidebar and open the \"Upload File\" link. Paste the copied URL in the \"URL/Text\" box.</P>\n.";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
525 print $indexH "<H3>Generated configuration files</H3>\n<UL>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
526 print $indexH "<LI>" . &link_file("primary_" . $index_id . "_ConfigPl_hidden.txt", "txt", "Project configuration", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
527 copy("$envDirN/config.pl", $index_path . "/primary_" . $index_id . "_ConfigPl_hidden.txt") || die "Unable to retrieve config.pl. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
528 if ($meta_gen) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
529 print $indexH "<LI>" . &link_file("config.pl", "txt", "Project configuration", 0) . "</LI>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
530 copy("$envDirN/config.pl", $index_path . "/primary_" . $index_id . "_ConfigPl_hidden.txt") || die "Unable to retrieve config.pl. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
531 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
532 print $indexH "</UL>\n</BODY>\n</HTML>\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
533 close($indexH);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
534 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
535
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
536 exit(0);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
537
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
538 sub determine_seq_input_type {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
539 my ($input_fn_ref, $config_fh_ref) = @_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
540 open(INPUT, "<", ${$input_fn_ref}) || die "Unable to open input file ${$input_fn_ref}, $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
541 my $first_line = <INPUT>;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
542 close (INPUT);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
543 if ($first_line=~/^@/) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
544 print ${$config_fh_ref} sprintf("\$quality = 1;\n");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
545 return "fastq";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
546 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
547 elsif ($first_line=~/^>/) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
548 print ${$config_fh_ref} sprintf("\$quality = 0;\n");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
549 return "fasta";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
550 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
551 else {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
552 my @split_array = split("\t", $first_line);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
553 if ($#split_array > 0) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
554 return "tab";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
555 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
556 else {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
557 die "Unable to determine sequence input value type (fastq|fasta|tabular)\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
558 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
559 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
560 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
561
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
562 sub pre_process_seqs {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
563 my ($seq_type_ref, $in_file_ref, $out_fn_ref, $debug_ref) = @_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
564 #FASTQ
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
565 if (${$seq_type_ref} eq "fastq") {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
566 if (${$debug_ref}) { print "FASTQ\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
567 open (my $out_fh, ">", ${$out_fn_ref}) || die "Unable to open ${$out_fn_ref}, $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
568 &process_fastq($in_file_ref, \&fastq_entry, \$out_fh);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
569 close($out_fh);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
570 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
571
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
572 #FASTA
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
573 elsif (${$seq_type_ref} eq "fasta") {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
574 if (${$debug_ref}) { print "FASTA\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
575 open (my $out_fh, ">", ${$out_fn_ref}) || die "Unable to open ${$out_fn_ref}, $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
576 &process_fasta($in_file_ref, \&fasta_entry, \$out_fh);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
577 close($out_fh);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
578 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
579
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
580 #Tab, no quality info
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
581 else {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
582 if (${$debug_ref}) { print "TABULAR\n"; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
583 copy (${$in_file_ref}, ${$out_fn_ref}) || die "Unable to copy seq file to execution environment. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
584 #open($output, "ln -s ${$in_file_ref} ${$out_fn_ref} |") || die "Unable to link seq file in execution environment. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
585 #if (${$debug_ref}) { while (<$output>) { print "$_"; } }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
586 #close($output);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
587 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
588 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
589
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
590 sub fasta_entry {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
591 my ($seq_id_ref, $seq_ref, $array_ref) = @_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
592 print "fasta_entry(" . join(",", ${$seq_id_ref}, ${$seq_ref}) . ")\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
593 #print ${$array_ref->[0]} join("\t", ${$seq_id_ref}, "", ${$seq_ref}) . "\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
594 #my $seq_qual = "";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
595 #for(my $i = 0; $i < length(${$seq_ref}); $i++) { $seq_qual = $seq_qual . 'h'; }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
596 #print sprintf("Fasta_entry: length of sequence:%s length of quality:%s", length(${$seq_id_ref}), length($seq_qual));
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
597 print ${$array_ref->[0]} join("\t", ${$seq_id_ref}, "", ${$seq_ref});
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
598 ${$seq_ref} =~ s/[A,C,T,G]/I/g;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
599 ${$seq_ref} =~ s/N/!/g;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
600 print ${$array_ref->[0]} sprintf("\t%s\n", ${$seq_ref});
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
601 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
602
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
603 sub fastq_entry {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
604 my ($seq_id_ref, $seq_ref, $seq_qual, $array_ref) = @_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
605 #print "fastq_entry(" . join(",", ${$seq_id_ref}, ${$seq_ref}, ${$seq_qual}) . ")\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
606 print ${$array_ref->[0]} join("\t", ${$seq_id_ref}, "", ${$seq_ref}, ${$seq_qual}) . "\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
607 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
608
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
609 sub lib_copy {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
610 unless(-d $File::Find::name) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
611 copy($File::Find::name, "$envDirN/lib") || die "Unable to copy $File::Find::name to $envDirN/lib. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
612 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
613 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
614
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
615 sub link_file {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
616 my ($file_name, $file_type, $link_text, $download) = @_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
617 my $out = "<A HREF=\"" . $file_name . "\">" . $link_text . "</A>";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
618 if ($download) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
619 $out = $out . " [<A HREF=\"" . $file_name . "/display?to_ext=" . $file_type . "\">Download</A>]";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
620 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
621 return $out;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
622 }