Mercurial > repos > jesse-erdmann > tapdance
view tapdance_runner.pl @ 3:17ce4f3bffa2 default tip
Uploaded
author | jesse-erdmann |
---|---|
date | Tue, 24 Jan 2012 18:33:41 -0500 |
parents | 1437a2df99c0 |
children |
line wrap: on
line source
#!/project/bioperl/perl-5.10.1-sles11/bin/perl -w # #------------------------------------------------------------------------------ # University of Minnesota # Copyright 2010 - 2011, Regents of the University of Minnesota #------------------------------------------------------------------------------ # Author: # # Jesse Erdmann # # POD documentation #------------------------------------------------------------------------------ =pod BEGIN =head1 NAME tapdance_runner.pl - TAPDANCE wrapper that provides a single interface to all TAPDANCE functionality. =head1 SYNOPSIS tapdance_runner.pl [-help] See http://sf.net/p/tapdancebio for full documentation =head1 OPTIONS =over 6 =item B<-help> Print this usage summary. =item B<-seqFile sequence_file> The sequences to be processed for insertions. =item B<-bar2libFile barcode_to_library_mapping_file> A tab delimited file where each line contains the barcode and name of a library. Additionally, columns after the second column will be treated as metadata tags to be associated with the library. =item B<-baseConfig custom_config> OPTIONAL. If there is a custom tapdance_base_config.txt to be used in special cases, use this parameter to specify it's use. An example where this might be useful is the case where distinct groups of users are using the same TAPDANCE installation, but separate mutagens. =item B<-config predefined_config_file> A configuration file may be used rather than specify options on the command line. Any options specified in the base config file will be overriden by values specified in this config file. =item B<-db_config database_configuration_file> Use this option if the database configuration needs to be kept separate from other configuration information. This is most useful in Galaxy where the end user should not have the database user credentials exposed to them. =item B<-bowtieIdx reference_genome> The name of the bowtie index to use for aligning individual sequences. This is only used during the first phase of TAPDANCE. It is important to note that the index name is not a single file. For instance, the mm9 index has several files name mm9.[0-9].ebwt and mm9.rev.[0-9].ebwt. However, the correct value for this parameter would be /my/path/to/indexes/mm9 =item B<-mutagen mutagen_sequence> The sequence to match determining whether the mutagen of interest is present. Any sequences not matching this sequence will not be used in the analysis while those that do will have the mutagen trimmed prior to alignment. If, for instance the mutagen for a particular project has a sequence of ACTG, but the user also wanted to remove up two bases following the mutagen sequence the wildcard character '_' can be used to specify a mutagen sequence of ACTG__. Any number of mutagen sequences may be specified by entering multiple -mutagen entries on the command line. E.G. perl tapdance_runner.pl -mutagen ACGT -mutagen TGCA. This is useful when a mutagen has more than one common captured sequence in the data. =item B<-projectName project_name> A name for the project, up to 255 chars. =item B<-omittedChromosomes chromosomes_to_omit> It can be useful to remove the chromosome of the donor concatamer from the calulations to remove the effects of local hopping for some projects. The chromosomes can be specified as a comma delimited list and must match the names used in the reference genome. E.G -omittedChromosomes chr1 -omittedChromosomes chr2 =item B<-output_dir location_to_write_to> The location where execution will be performed DEFAULT:'./' =item B<-metadata library_metadata> OPTIONAL. To specify metadata on libraries outside of the barcode to library mapping file, this parameter may be used. The file should contain the name of the library in one column and the metadata tag to affiliate with it in the second. Each library may have as many entries as needed. =item B<-lib_pct library_percent> =item B<-CIS_tot_p CIS_total_pvalue> =item B<-CIS_lib_p CIS_library_pvalue> =item B<-CIS_reg_p CIS_region_pvalue> =item B<-coCIS_thresh cocis_threshold> =item B<-merge merge> Specify projects to be merged as the new project specified with -project_name. E.G. -merge my_first_project -merge my_second_project -project_name my_merged_project. =item B<-annotation annotation_file> Specify the bed file to annotate CISes with. The default feature set is USCS's mm9 refSeq genes. =item B<-no_cis> To generate a list of inserts only, specify no_cis. This is useful in cases where a new set of data needs to be merged with a previous set of data. Use this option as a first step to prepare the new data. Use -merge to combine the resulting projects and call CISes on the new project. =item B<-seqType seqfile_format> OPTIONAL. If not specified, TAPDANCE will attempt to identify the input file type on it's own. Valid options are 'tab', 'fasta' and 'fastq'. =item B<-debug> OPTIONAL. =back =cut #### END of POD documentation. #----------------------------------------------------------------------------- use strict; use Cwd; use Getopt::Long; use File::Copy; use File::Find; use File::Temp qw/ tempfile tempdir /; use Pod::Usage; #tapdance_runner.pl -s $seqs -b $bar2lib -g $genomeIdx -pn $projName -o $omitChrom -pb $projBed -ps $projSum -cb $cisBed -cs $cisSum -bc $baseConfig my $dbh; my $path = $0; $path =~ s/\/\w*\.pl$//g; require "$path/lib/tapdance_base_config.pl"; require "$path/util.pl"; #Universal variables my ($seqFile, $seqType, $bar2libFile, $bowtieIdx, $bwaIdx, $projName, @omitChrom, $baseConfig, @mutagens_array, $metadata, $merge, $preconfig_file, $library_percent, $CIS_total_pvalue, $CIS_library_pvalue, $CIS_region_pvalue, $cocis_threshold, $annotation_file, $db_config); my $no_cis = 0; #CMD line variables my ($debug, $output_dir, $noUnlink, $help_flag); #Galaxy variables my ($index, $index_id, $index_path, $projBed, $projBedId, $projSum, $projSumId, $projVis, $projVisId, $cisWig, $cisWigId, $cisWigPath, $cisSum, $cisSumId, $tmpDir); my %options = ( #Universal Variables "seqFile|s=s" => \$seqFile, "seqType|st=s" => \$seqType, "bar2libFile|b=s" => \$bar2libFile, "bowtieIdx=s" => \$bowtieIdx, #"bwaIdx=s" => \$bwaIdx, "projectName|pn=s" => \$projName, "omittedChromosomes|o=s" => \@omitChrom, "baseConfig|bc=s" => \$baseConfig, "metadata|m=s" => \$metadata, "mutagen=s" => \@mutagens_array, "lib_pct=f" => \$library_percent, "CIS_tot_p=f" => \$CIS_total_pvalue, "CIS_lib_p=f" => \$CIS_library_pvalue, "CIS_reg_p=f" => \$CIS_region_pvalue, "coCIS_thresh=f" => \$cocis_threshold, "merge=s" => \$merge, "annotation=s" => \$annotation_file, "config=s" => \$preconfig_file, "db_config=s" => \$db_config, "no_cis" => \$no_cis, #CMD Line Variables "help" => \$help_flag, "output_dir=s" => \$output_dir, "debug|d" => \$debug, #Galaxy Variables "index=s" => \$index, "index_id=s" => \$index_id, "index_path=s" => \$index_path, "projectBed|pb=s" => \$projBed, "projectBedId=s" => \$projBedId, "cisWig|cw=s" => \$cisWig, "cisWigId|cwid=s" => \$cisWigId, "cisWigPath|cwpath=s" => \$cisWigPath, "tmp_dir|t=s" => \$tmpDir, "no_unlink" => \$noUnlink ); GetOptions(%options) or pod2usage(2); pod2usage(1) if $help_flag; $projName = &sanitize_project($projName); my $meta_gen = 0; my $envDirN; if (defined($output_dir)) { $envDirN = $output_dir; unless (-d $output_dir) { mkdir ($output_dir); } } elsif (defined($tmpDir)) { if ($noUnlink) { $envDirN = tempdir(DIR => $tmpDir); } else { $envDirN = tempdir(DIR => $tmpDir, UNLINK => 1); } } else { if ($noUnlink) { $envDirN = tempdir(); } else { $envDirN = tempdir(UNLINK => 1); } } if ($debug) { print "EnvDir = $envDirN\n"; } if (!defined($baseConfig)) { $baseConfig = "$path/lib"; } open(my $baseConfigH, "<", $baseConfig . "/tapdance_base_config.pl") || die "Unable to open $baseConfig: $!\n"; open(my $envConfigH, ">", $envDirN . "/config.pl") || die "Unable to open environment $envDirN/config.pl: $!\n"; if (defined($db_config)) { print $envConfigH "require '" . $envDirN . "/" . $db_config . "';\n"; } # Copy system defaults first, overwrite as needed while (<$baseConfigH>) { print $envConfigH $_; } close($baseConfigH); if (defined($preconfig_file)) { open(my $preConfigH, "<", $preconfig_file) || die "Unable to open input configuration file $preconfig_file. $!\n"; } else { print $envConfigH "#Project specific custom values, will override values set above\n"; print $envConfigH "\$proj = '$projName';\n"; print $envConfigH "\$envDir = '$envDirN';\n"; if (defined($library_percent)) { print $envConfigH "\$library_percent = '$library_percent';\n"; } if (defined($CIS_total_pvalue)) { print $envConfigH "\$CIS_total_pvalue = '$CIS_total_pvalue';\n"; } if (defined($CIS_library_pvalue)) { print $envConfigH "\$CIS_library_pvalue = '$CIS_library_pvalue';\n"; } if (defined($CIS_region_pvalue)) { print $envConfigH "\$CIS_region_pvalue = '$CIS_region_pvalue';\n"; } if (defined($cocis_threshold)) { print $envConfigH "\$cocis_threshold = '$cocis_threshold';\n"; } if (defined($bowtieIdx && $bwaIdx)) { print $envConfigH "\$bwa_exe = 'bwa';\n"; print $envConfigH "\$bowtie_exe = 'bowtie --quiet';\n"; print $envConfigH "\$bwa_idx = '$bwaIdx';\n"; print $envConfigH "\$bowtie_idx = '$bowtieIdx';\n"; print $envConfigH "\$aligner = 'bow_bwa';\n"; } elsif (defined($bowtieIdx)) { print $envConfigH "\$bowtie_exe = 'bowtie --quiet';\n"; print $envConfigH "\$bowtie_idx = '$bowtieIdx';\n"; print $envConfigH "\$aligner = 'bowtie';\n"; } elsif (defined($bwaIdx)) { print $envConfigH "\$bwa_exe = 'bwa';\n"; print $envConfigH "\$bwa_idx = '$bwaIdx';\n"; print $envConfigH "\$aligner = 'bwa';\n"; } if ($#mutagens_array >= 0 && length($mutagens_array[0]) > 0) { print $envConfigH "\$mutagens = '" . join(",", @mutagens_array) . "';\n"; } if (defined($annotation_file)) { print $envConfigH "\$annotation_file ='" . $annotation_file . "';\n"; } if (!defined($seqType) && defined($seqFile)) { $seqType = &determine_seq_input_type(\$seqFile, \$envConfigH); } } print $envConfigH "return 1;\n"; close($envConfigH); my ($output, $orig_dir); mkdir ("$envDirN/data"); # || die "Unable to create data dir, $envDirN/data. $!\n"; mkdir("$envDirN/lib"); # || die "Unable to create lib. $!\n"; my @lib_source = ($path . "/lib/"); find(\&lib_copy, @lib_source); my $copy_ins_files = 0; my $indexH; if (defined($index)) { open ($indexH, ">", $index) || die "Unable to open $index for writing: $!\n"; print $indexH "<HTML>\n<HEAD>\n<TITLE>$projName Results</TITLE>\n</HEAD>\n<BODY>\n<H1>$projName</H1>\n"; if (defined($index_path)) { unless (-d $index_path) { mkdir($index_path); } } } ### # Phase 1, sequences through mapping to insert list ### if (defined($seqFile)) { my $seqOutFn = "$envDirN/data/seqs.tab"; &pre_process_seqs(\$seqType, \$seqFile, \$seqOutFn, \$debug); copy("$bar2libFile", "$envDirN/data/barcode2lib.txt") || die "Unable to link barcode to library file in execution environment. $!\n"; $orig_dir = &cwd; if ($debug) { print "Starting dir: $orig_dir.\n"; } chdir($envDirN); if ($debug) { print "Current dir: " . &cwd . "\n"; } open($output, "perl $envDirN/lib/TAPDANCE.pl |") || die "Unable to run TAPDANCE.pl. $!\n"; if ($debug) { while (<$output>) { print "$_"; } } close($output); chdir($orig_dir); if (defined($index)) { print $indexH "<H3>Insertion Analysis</H3>\n<P>To visualize the insertions in this project use the \"Non Redundant Inserts BED\" file in the history.\n<UL>\n"; #if (defined($projSum)) { print $indexH "<LI>" . &link_file("primary_" . $index_id . "_InsertsVis_hidden.pdf", "pdf", "QC graphs of inserts", 0) . "</LI>\n"; print $indexH "<LI>" . &link_file("primary_" . $index_id . "_InsertsSummary_hidden.txt", "txt", "Summary of all inserts", 0) . "</LI>\n"; copy("$envDirN/results/summary_$projName.txt", $index_path . "/primary_" . $index_id . "_InsertsSummary_hidden.txt") || die "Unable to retrieve project summary, $envDirN/results/summary_$projName.txt. $!\n"; system("Rscript --vanilla $envDirN/lib/insert_vis.R --args $envDirN/results/lib_stats_$projName.txt $envDirN/results/region_stats_$projName.txt " . $index_path . "/primary_" . $index_id . "_InsertsVis_hidden.pdf"); #copy("$envDirN/results/summary_$projName.txt", "$projSum") || die "Unable to retrieve project summary, $envDirN/results/summary_$projName.txt. $!\n"; print $indexH "</UL>\n</P>\n"; } #if (defined($projVis)) { #system("Rscript --vanilla $envDirN/lib/insert_vis.R --args $envDirN/results/lib_stats_$projName.txt $envDirN/results/region_stats_$projName.txt $projVis"); #} $copy_ins_files=1; if ($debug) { print "TAPDANCE.pl done.\n"; } } ### # Phase 4, merge projects ### if (defined($merge)) { open(my $meta_tab, ">", "$envDirN/data/meta.tab") || die "Unable to write project merge list to $envDirN/data/meta.tab: $!\n"; my @merge_projs = split(',', $merge); foreach my $merge_proj (@merge_projs) { print $meta_tab "$merge_proj\n"; } close($meta_tab); $orig_dir = &cwd; if ($debug) { print "Starting dir: $orig_dir.\n" } chdir($envDirN); open($output, "perl $envDirN/lib/TAP4.pl |") || die "Unable to run TAP4.pl. $!\n"; if ($debug) { while (<$output>) { print "$_"; } } close($output); chdir($orig_dir); $copy_ins_files=1; if ($debug) { print "TAP4.pl done.\n"; } } ### # Copy insert files ### if ($copy_ins_files) { if (defined($projBed)) { copy("$envDirN/results/raw_$projName.BED", "$projBed") || die "Unable to retrieve project BED, $envDirN/results/raw_$projName.BED. $!\n"; } if (defined($index)) { #print $indexH "<A HREF=\"primary_" . $index_id . "_InsertsBED_hidden_bed?preview=true\">A BED containing all inserts</A><BR>\n"; copy("$envDirN/results/raw_$projName.BED", $index_path . "/primary_" . $index_id . "_InsertsBED_visible_bed") || die "Unable to retrieve project BED, $envDirN/results/raw_$projName.BED. $!\n"; } if ($debug) { print "Files copied.\n"; } } ### # Phase 2, calculate CISes ### if (!$no_cis) { #defined($cisWig) && defined($cisSum)) { #Fill chromo tab for phase two open(my $chromoTabH, ">", $envDirN . "/data/chromo.tab") || die "Unable to open chromo tab $envDirN/data/chromo.tab: $!\n"; if (defined($metadata)) { copy("$metadata", "$envDirN/data/metadata.tab") || die "Unable to copy provided metadata, $metadata. $!\n"; } foreach (@omitChrom) { print $chromoTabH "$_\n"; } close ($chromoTabH); if ($debug) { print "Omitted chromosomes written.\n"; } my ($metadataTabH, $barcodeInH); my %metadata_attrs = (); my @map; if (!defined($metadata) && defined($bar2libFile)) { open($metadataTabH, ">", $envDirN . "/data/metadata.tab") || die "Unable to open chromo tab $envDirN/data/metadata.tab: $!\n"; open($barcodeInH, "<", "$envDirN/data/barcode2lib.txt") || die "Unable to open barcode to library mapping, $bar2libFile: $!\n"; my ($idx, $lib_name); while (<$barcodeInH>) { chomp; @map = split("\t", $_); $map[1] =~ s/^\s+//; $map[1] =~ s/\s+$//; $map[1] =~ m/(.*)-[L|R]/; $lib_name = $1; print $metadataTabH join("\t", $lib_name, "all", "cis") . "\n"; for ($idx = 3; $idx <= $#map; $idx++) { $map[$idx] =~ s/^\s+//; $map[$idx] =~ s/\s+$//; print $metadataTabH join("\t", $lib_name, $map[$idx], "cis") . "\n"; $metadata_attrs{$map[$idx]} = 1; } } close($barcodeInH); close($metadataTabH); $meta_gen = 1; if ($debug) { print "Metadata written.\n"; } } else { open(my $metadata_file, "<", $envDirN . "/data/metadata.tab") || die "Unable to open meta, $!\n"; while(<$metadata_file>) { chomp; @map = split("\t", $_); if (uc $map[2] eq "CIS" && uc $map[1] ne "ALL") { $metadata_attrs{$map[1]} = 1; } } close($metadata_file); } #mkdir("$envDirN/CIS"); # || die "Unable to create lib. $!\n"; #if ($debug) { print "Created $envDirN/CIS\n"; } $orig_dir = &cwd; if ($debug) { print "Starting dir: $orig_dir.\n" } chdir($envDirN); open($output, "perl ./lib/TAP2.pl |") || die "Unable to run TAP2.pl. $!\n"; if ($debug) { while (<$output>) { print "$_"; } } close($output); chdir($orig_dir); if ($debug) { print "TAP2.pl run.\n"; } if ($debug) { print "TAP2.pl done.\n"; } if (defined($index)) { print $indexH "<H3>CIS calls</H3>\n<P>To Visualize the CIS Calls, use the \"CIS WIG\" history entry. Each metadata tag that generated it's own CIS calls has it's own WIG file in the history as \"CIS WIG (tag)\".\n<UL>\n"; #if (defined($cisSum)) { print $indexH "<LI>" . &link_file("primary_" . $index_id . "_CISSummary_hidden.txt", "txt", "Summary of all CIS calls", 0) . "</LI>\n"; copy("$envDirN/results/summary_CIS_$projName.txt", $index_path . "/primary_" . $index_id . "_CISSummary_hidden.txt") || die "Unable to retrieve CIS summary, $envDirN/results/cis_summary.txt. $!\n"; #print $indexH "<A HREF=\"primary_" . $index_id . "_CISWIG_visible_wig\">WIG of all CIS calls</A><BR>\n"; #copy($envDirN . "/results/all/plot_all-nr-" . $projName . "-" . $library_percent . ".wig", $index_path . "/primary_" . $index_id . "_CISWIG_visible_wig") || die "Unable to retrieve CIS WIG, $envDirN/results/all/plot_all-nr-$projName-$library_percent.wig. $!\n"; copy($envDirN . "/results/all/plot_all-nr-" . $projName . "-" . $library_percent . ".wig", $cisWig) || die "Unable to retrieve CIS WIG, $envDirN/results/all/plot_all-nr-$projName-$library_percent.wig. $!\n"; print $indexH "<UL>\n"; print $indexH "<LI>" . &link_file("primary_" . $index_id . "_Ann_hidden.txt", "txt", "Ann.txt", 0) . "</LI>\n"; copy("$envDirN/results/Assoc/Ann.txt", $index_path . "/primary_" . $index_id . "_Ann_hidden.txt") || die "Unable to retrieve Ann.txt. $!\n"; print $indexH "<LI>" . &link_file("primary_" . $index_id . "_Cis_hidden.txt", "txt", "Cis.txt", 0) . "</LI>\n"; copy("$envDirN/results/Assoc/Cis.txt", $index_path . "/primary_" . $index_id . "_Cis_hidden.txt") || die "Unable to retrieve Cis.txt. $!\n"; print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnAnnTable_hidden.txt", "txt", "Ann_Ann_table.txt", 0) . "</LI>\n"; copy("$envDirN/results/Assoc/Ann_Ann_table.xls", $index_path . "/primary_" . $index_id . "_AnnAnnTable_hidden.txt") || die "Unable to retrieve Ann_Ann_table.xls. $!\n"; print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnAnnMatrix_hidden.txt", "txt", "Ann_ann_matrix.txt", 0) . "</LI>\n"; copy("$envDirN/results/Assoc/Ann_ann_matrix.txt", $index_path . "/primary_" . $index_id . "_AnnAnnMatrix_hidden.txt") || die "Unable to retrieve Ann_ann_matrix.txt. $!\n"; print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnCisTable_hidden.txt", "txt", "Ann_cis_table.xls", 0) . "</LI>\n"; copy("$envDirN/results/Assoc/Ann_cis_table.xls", $index_path . "/primary_" . $index_id . "_AnnCisTable_hidden.txt") || die "Unable to retrieve Ann_cis_table.xls. $!\n"; print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnCisMatrix_hidden.txt", "txt", "Ann_cis_matrix.txt", 0) . "</LI>\n"; copy("$envDirN/results/Assoc/Ann_cis_matrix.txt", $index_path . "/primary_" . $index_id . "_AnnCisMatrix_hidden.txt") || die "Unable to retrieve Ann_cis_matrix.txt. $!\n"; print $indexH "<LI>" . &link_file("primary_" . $index_id . "_CisCisTable_hidden.txt", "txt", "Cis_cis_table.xls", 0) . "</LI>\n"; copy("$envDirN/results/Assoc/Cis_cis_table.xls", $index_path . "/primary_" . $index_id . "_CisCisTable_hidden.txt") || die "Unable to retrieve Cis_cis_table.xls. $!\n"; print $indexH "<LI>" . &link_file("primary_" . $index_id . "_CisCisMatrix_hidden.txt", "txt", "Cis_cis_matrix.txt", 0) . "</LI>\n"; copy("$envDirN/results/Assoc/Cis_cis_matrix.txt", $index_path . "/primary_" . $index_id . "_CisCisMatrix_hidden.txt") || die "Unable to retrieve Cis_cis_matrix.txt. $!\n"; print $indexH "</UL>\n</UL>\n</P>\n"; #copy("$envDirN/results/summary_CIS_$projName.txt", "$cisSum") || die "Unable to retrieve CIS summary, $envDirN/results/cis_summary.txt. $!\n"; #} #if (defined($cisWig)) { #copy("$envDirN/results/all/plot_all-nr-$projName-$library_percent.wig", "$cisWig") || die "Unable to retrieve CIS WIG, $envDirN/results/all/plot_all-nr-$projName-$library_percent.wig. $!\n"; #} #if (defined($cisWigId) && defined($cisWigPath)) { #my $count; my $filesize; foreach my $tag (keys %metadata_attrs) { #open(FILE, "< $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig") or die "can't open $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig: $!"; #for ($count=0; <FILE>; $count++) { } #if ($count > 1) { if (-e "$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig") { $filesize = -s "$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig"; if ($filesize > 0) { #print $indexH "<A HREF=\"primary_" . $index_id . "_" . $tag . "_visible_wig\">Summary of CIS calls for libraries with the " . $tag . " label</A><BR>\n"; #copy("$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig", $index_path . "/primary_" . $index_id . "_" . $tag . "_visible_wig") || die "Unable to retrieve CIS WIG, $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig. $!\n"; copy("$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig", "$cisWigPath/primary_" . $cisWigId . "_" . $tag . "_visible_wig") || die "Unable to retrieve CIS WIG, $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig. $!\n"; } } } } if ($debug) { print "Files copied.\n"; } } if (defined($index)) { print $indexH "<P>To add files to your history for further processing in Galaxy, right-click the link and select \"Copy Link URL\". Open the \"Get Data\" menu in the \"Tools\" sidebar and open the \"Upload File\" link. Paste the copied URL in the \"URL/Text\" box.</P>\n."; print $indexH "<H3>Generated configuration files</H3>\n<UL>\n"; print $indexH "<LI>" . &link_file("primary_" . $index_id . "_ConfigPl_hidden.txt", "txt", "Project configuration", 0) . "</LI>\n"; copy("$envDirN/config.pl", $index_path . "/primary_" . $index_id . "_ConfigPl_hidden.txt") || die "Unable to retrieve config.pl. $!\n"; if ($meta_gen) { print $indexH "<LI>" . &link_file("config.pl", "txt", "Project configuration", 0) . "</LI>\n"; copy("$envDirN/config.pl", $index_path . "/primary_" . $index_id . "_ConfigPl_hidden.txt") || die "Unable to retrieve config.pl. $!\n"; } print $indexH "</UL>\n</BODY>\n</HTML>\n"; close($indexH); } exit(0); sub determine_seq_input_type { my ($input_fn_ref, $config_fh_ref) = @_; open(INPUT, "<", ${$input_fn_ref}) || die "Unable to open input file ${$input_fn_ref}, $!\n"; my $first_line = <INPUT>; close (INPUT); if ($first_line=~/^@/) { print ${$config_fh_ref} sprintf("\$quality = 1;\n"); return "fastq"; } elsif ($first_line=~/^>/) { print ${$config_fh_ref} sprintf("\$quality = 0;\n"); return "fasta"; } else { my @split_array = split("\t", $first_line); if ($#split_array > 0) { return "tab"; } else { die "Unable to determine sequence input value type (fastq|fasta|tabular)\n"; } } } sub pre_process_seqs { my ($seq_type_ref, $in_file_ref, $out_fn_ref, $debug_ref) = @_; #FASTQ if (${$seq_type_ref} eq "fastq") { if (${$debug_ref}) { print "FASTQ\n"; } open (my $out_fh, ">", ${$out_fn_ref}) || die "Unable to open ${$out_fn_ref}, $!\n"; &process_fastq($in_file_ref, \&fastq_entry, \$out_fh); close($out_fh); } #FASTA elsif (${$seq_type_ref} eq "fasta") { if (${$debug_ref}) { print "FASTA\n"; } open (my $out_fh, ">", ${$out_fn_ref}) || die "Unable to open ${$out_fn_ref}, $!\n"; &process_fasta($in_file_ref, \&fasta_entry, \$out_fh); close($out_fh); } #Tab, no quality info else { if (${$debug_ref}) { print "TABULAR\n"; } copy (${$in_file_ref}, ${$out_fn_ref}) || die "Unable to copy seq file to execution environment. $!\n"; #open($output, "ln -s ${$in_file_ref} ${$out_fn_ref} |") || die "Unable to link seq file in execution environment. $!\n"; #if (${$debug_ref}) { while (<$output>) { print "$_"; } } #close($output); } } sub fasta_entry { my ($seq_id_ref, $seq_ref, $array_ref) = @_; print "fasta_entry(" . join(",", ${$seq_id_ref}, ${$seq_ref}) . ")\n"; #print ${$array_ref->[0]} join("\t", ${$seq_id_ref}, "", ${$seq_ref}) . "\n"; #my $seq_qual = ""; #for(my $i = 0; $i < length(${$seq_ref}); $i++) { $seq_qual = $seq_qual . 'h'; } #print sprintf("Fasta_entry: length of sequence:%s length of quality:%s", length(${$seq_id_ref}), length($seq_qual)); print ${$array_ref->[0]} join("\t", ${$seq_id_ref}, "", ${$seq_ref}); ${$seq_ref} =~ s/[A,C,T,G]/I/g; ${$seq_ref} =~ s/N/!/g; print ${$array_ref->[0]} sprintf("\t%s\n", ${$seq_ref}); } sub fastq_entry { my ($seq_id_ref, $seq_ref, $seq_qual, $array_ref) = @_; #print "fastq_entry(" . join(",", ${$seq_id_ref}, ${$seq_ref}, ${$seq_qual}) . ")\n"; print ${$array_ref->[0]} join("\t", ${$seq_id_ref}, "", ${$seq_ref}, ${$seq_qual}) . "\n"; } sub lib_copy { unless(-d $File::Find::name) { copy($File::Find::name, "$envDirN/lib") || die "Unable to copy $File::Find::name to $envDirN/lib. $!\n"; } } sub link_file { my ($file_name, $file_type, $link_text, $download) = @_; my $out = "<A HREF=\"" . $file_name . "\">" . $link_text . "</A>"; if ($download) { $out = $out . " [<A HREF=\"" . $file_name . "/display?to_ext=" . $file_type . "\">Download</A>]"; } return $out; }