view tapdance_runner.pl @ 0:1437a2df99c0

Uploaded
author jesse-erdmann
date Fri, 09 Dec 2011 11:56:56 -0500
parents
children 17ce4f3bffa2
line wrap: on
line source

#!/project/bioperl/perl-5.10.1-sles11/bin/perl -w
#
#------------------------------------------------------------------------------
#                         University of Minnesota
#      Copyright 2010 - 2011, Regents of the University of Minnesota
#------------------------------------------------------------------------------
# Author:
#
#  Jesse Erdmann
#
# POD documentation
#------------------------------------------------------------------------------
=pod BEGIN

=head1 NAME

  tapdance_runner.pl - TAPDANCE wrapper that provides a single interface to all
 TAPDANCE functionality.

=head1 SYNOPSIS
    
   tapdance_runner.pl [-help] 

    See http://sf.net/p/tapdancebio for full documentation

=head1 OPTIONS

=over 6

=item  B<-help>

  Print this usage summary.

=item B<-seqFile sequence_file>

  The sequences to be processed for insertions.

=item B<-bar2libFile barcode_to_library_mapping_file>

  A tab delimited file where each line contains the barcode and name of a 
   library.  Additionally, columns after the second column will be treated as 
   metadata tags to be associated with the library.

=item B<-baseConfig custom_config>

  OPTIONAL.  If there is a custom tapdance_base_config.txt to be used in 
   special cases, use this parameter to specify it's use.  An example where 
   this might be useful is the case where distinct groups of users are using
   the same TAPDANCE installation, but separate mutagens.

=item B<-config predefined_config_file>
 
  A configuration file may be used rather than  specify options on
   the command line.  Any options specified in the base config file
   will be overriden by values specified in this config file.

=item B<-db_config database_configuration_file>

  Use this option if the database configuration needs to be kept separate 
   from other configuration information. This is most useful in Galaxy
   where the end user should not have the database user credentials 
   exposed to them.

=item B<-bowtieIdx reference_genome>

  The name of the bowtie index to use for aligning individual sequences.  This
   is only used during the first phase of TAPDANCE.  It is important to note that
   the index name is not a single file. For instance, the mm9 index has several 
   files name mm9.[0-9].ebwt and mm9.rev.[0-9].ebwt.  However, the correct 
   value for this parameter would be /my/path/to/indexes/mm9

=item B<-mutagen mutagen_sequence>

  The sequence to match determining whether the mutagen of interest is present.
   Any sequences not matching this sequence will not be used in the analysis 
   while those that do will have the mutagen trimmed prior to alignment. If, for
   instance the mutagen for a particular project has a sequence of ACTG, but the 
   user also wanted to remove up two bases following the mutagen sequence the 
   wildcard character '_' can be used to specify a mutagen sequence of ACTG__.

  Any number of mutagen sequences may be specified by entering multiple -mutagen
   entries on the command line.  E.G. perl tapdance_runner.pl -mutagen ACGT 
   -mutagen TGCA.  This is useful when a mutagen has more than one common captured
   sequence in the data.

=item B<-projectName project_name>

  A name for the project, up to 255 chars.

=item B<-omittedChromosomes chromosomes_to_omit>

  It can be useful to remove the chromosome of the donor concatamer from the 
   calulations to remove the effects of local hopping for some projects.  The 
   chromosomes can be specified as a comma delimited list and must match the 
   names used in the reference genome.  E.G -omittedChromosomes chr1 
   -omittedChromosomes chr2

=item B<-output_dir location_to_write_to>

  The location where execution will be performed

  DEFAULT:'./'

=item B<-metadata library_metadata>

  OPTIONAL.  To specify metadata on libraries outside of the barcode to library
   mapping file, this parameter may be used.  The file should contain the name of
   the library in one column and the metadata tag to affiliate with it in the 
   second.  Each library may have as many entries as needed.

=item B<-lib_pct library_percent>

=item B<-CIS_tot_p CIS_total_pvalue>

=item B<-CIS_lib_p CIS_library_pvalue>

=item B<-CIS_reg_p CIS_region_pvalue>

=item B<-coCIS_thresh cocis_threshold>

=item B<-merge merge>

  Specify projects to be merged as the new project specified with -project_name.
   E.G. -merge my_first_project -merge my_second_project -project_name 
   my_merged_project.

=item B<-annotation annotation_file>

  Specify the bed file to annotate CISes with.  The default feature set is USCS's 
   mm9 refSeq genes.

=item B<-no_cis>

  To generate a list of inserts only, specify no_cis.  This is useful in cases
   where a new set of data needs to be merged with a previous set of data.  Use
   this option as a first step to prepare the new data.  Use -merge to combine
   the resulting projects and call CISes on the new project.

=item B<-seqType seqfile_format>

  OPTIONAL.  If not specified, TAPDANCE will attempt to identify the input file
   type on it's own.  Valid options are 'tab', 'fasta' and 'fastq'.

=item B<-debug>

  OPTIONAL.  

=back

=cut

#### END of POD documentation.
#-----------------------------------------------------------------------------

use strict;
use Cwd;
use Getopt::Long;
use File::Copy;
use File::Find;
use File::Temp qw/ tempfile tempdir /;
use Pod::Usage;

#tapdance_runner.pl -s $seqs -b $bar2lib -g $genomeIdx -pn $projName -o $omitChrom -pb $projBed -ps $projSum -cb $cisBed -cs $cisSum -bc $baseConfig

my $dbh;
my $path = $0;
$path =~ s/\/\w*\.pl$//g;
require "$path/lib/tapdance_base_config.pl";
require "$path/util.pl";

#Universal variables
my ($seqFile, $seqType, $bar2libFile, $bowtieIdx, $bwaIdx, $projName, @omitChrom, $baseConfig, @mutagens_array, $metadata, $merge, $preconfig_file, $library_percent, $CIS_total_pvalue, $CIS_library_pvalue, $CIS_region_pvalue, $cocis_threshold, $annotation_file, $db_config);
my $no_cis = 0;

#CMD line variables
my ($debug, $output_dir, $noUnlink, $help_flag);

#Galaxy variables
my ($index, $index_id, $index_path, $projBed, $projBedId, $projSum, $projSumId, $projVis, $projVisId, $cisWig, $cisWigId, $cisWigPath, $cisSum, $cisSumId, $tmpDir);


my %options = (
#Universal Variables
    "seqFile|s=s"            => \$seqFile,
    "seqType|st=s"           => \$seqType,
    "bar2libFile|b=s"        => \$bar2libFile,
    "bowtieIdx=s"            => \$bowtieIdx,
    #"bwaIdx=s"               => \$bwaIdx,
    "projectName|pn=s"       => \$projName,
    "omittedChromosomes|o=s" => \@omitChrom,
    "baseConfig|bc=s"        => \$baseConfig,
    "metadata|m=s"           => \$metadata,
    "mutagen=s"              => \@mutagens_array,
    "lib_pct=f"              => \$library_percent,
    "CIS_tot_p=f"            => \$CIS_total_pvalue,
    "CIS_lib_p=f"            => \$CIS_library_pvalue,
    "CIS_reg_p=f"            => \$CIS_region_pvalue,
    "coCIS_thresh=f"         => \$cocis_threshold,
    "merge=s"                => \$merge,
    "annotation=s"           => \$annotation_file,
    "config=s"               => \$preconfig_file,
    "db_config=s"            => \$db_config,
    "no_cis"                 => \$no_cis,

#CMD Line Variables
    "help"                   => \$help_flag,
    "output_dir=s"           => \$output_dir,
    "debug|d"                => \$debug,
    
#Galaxy Variables
    "index=s"                => \$index,
    "index_id=s"             => \$index_id,
    "index_path=s"           => \$index_path,
    "projectBed|pb=s"        => \$projBed,
    "projectBedId=s"         => \$projBedId,
    "cisWig|cw=s"            => \$cisWig,
    "cisWigId|cwid=s"        => \$cisWigId,
    "cisWigPath|cwpath=s"    => \$cisWigPath,
    "tmp_dir|t=s"            => \$tmpDir,
    "no_unlink"              => \$noUnlink
    );

GetOptions(%options) or pod2usage(2);
pod2usage(1) if $help_flag;

$projName = &sanitize_project($projName);
my $meta_gen = 0;

my $envDirN;
if (defined($output_dir)) {
    $envDirN = $output_dir;
    unless (-d $output_dir) {
	mkdir ($output_dir);
    }
}
elsif (defined($tmpDir)) {
  if ($noUnlink) {
    $envDirN = tempdir(DIR => $tmpDir);
  }
  else {
    $envDirN = tempdir(DIR => $tmpDir, UNLINK => 1);
  }
}
else {
  if ($noUnlink) {
    $envDirN = tempdir();
  }
  else {
    $envDirN = tempdir(UNLINK => 1);    
  }
}
if ($debug) { print "EnvDir = $envDirN\n"; }

if (!defined($baseConfig)) { $baseConfig = "$path/lib"; }
open(my $baseConfigH, "<", $baseConfig . "/tapdance_base_config.pl") || die "Unable to open $baseConfig: $!\n";
open(my $envConfigH, ">", $envDirN . "/config.pl") || die "Unable to open environment $envDirN/config.pl: $!\n";

if (defined($db_config)) {
    print $envConfigH "require '" . $envDirN . "/" . $db_config . "';\n";
}

# Copy system defaults first, overwrite as needed
while (<$baseConfigH>) {
  print $envConfigH $_;
}
close($baseConfigH);

if (defined($preconfig_file)) {
    open(my $preConfigH, "<", $preconfig_file) || die "Unable to open input configuration file $preconfig_file. $!\n";
} 
else {
    print $envConfigH "#Project specific custom values, will override values set above\n";
    print $envConfigH "\$proj = '$projName';\n";
    print $envConfigH "\$envDir = '$envDirN';\n";
    if (defined($library_percent)) {
	print $envConfigH "\$library_percent = '$library_percent';\n";
    }
    if (defined($CIS_total_pvalue)) {
	print $envConfigH "\$CIS_total_pvalue = '$CIS_total_pvalue';\n";
    }
    if (defined($CIS_library_pvalue)) {
	print $envConfigH "\$CIS_library_pvalue = '$CIS_library_pvalue';\n";
    }
    if (defined($CIS_region_pvalue)) {
	print $envConfigH "\$CIS_region_pvalue = '$CIS_region_pvalue';\n";
    }
    if (defined($cocis_threshold)) {
	print $envConfigH "\$cocis_threshold = '$cocis_threshold';\n";
    }
    if (defined($bowtieIdx && $bwaIdx)) {
	print $envConfigH "\$bwa_exe = 'bwa';\n";
	print $envConfigH "\$bowtie_exe = 'bowtie --quiet';\n";
	print $envConfigH "\$bwa_idx = '$bwaIdx';\n";
	print $envConfigH "\$bowtie_idx = '$bowtieIdx';\n";
	print $envConfigH "\$aligner = 'bow_bwa';\n";
    }
    elsif (defined($bowtieIdx)) {
	print $envConfigH "\$bowtie_exe = 'bowtie --quiet';\n";
	print $envConfigH "\$bowtie_idx = '$bowtieIdx';\n";
	print $envConfigH "\$aligner = 'bowtie';\n";
    }
    elsif (defined($bwaIdx)) {
	print $envConfigH "\$bwa_exe = 'bwa';\n";
	print $envConfigH "\$bwa_idx = '$bwaIdx';\n";
	print $envConfigH "\$aligner = 'bwa';\n";
    }
    
    if ($#mutagens_array >= 0 && length($mutagens_array[0]) > 0) {
	print $envConfigH "\$mutagens = '" . join(",", @mutagens_array) . "';\n";
    }
    if (defined($annotation_file)) {
	print $envConfigH "\$annotation_file ='" . $annotation_file . "';\n";
    }
    if (!defined($seqType) && defined($seqFile)) {
	$seqType = &determine_seq_input_type(\$seqFile, \$envConfigH);
    }
}
print $envConfigH "return 1;\n";
close($envConfigH);

my ($output, $orig_dir);
mkdir ("$envDirN/data"); # || die "Unable to create data dir, $envDirN/data. $!\n";
mkdir("$envDirN/lib"); # || die "Unable to create lib. $!\n";
my @lib_source = ($path . "/lib/");
find(\&lib_copy, @lib_source);

my $copy_ins_files = 0;
my $indexH;
if (defined($index)) {
    open ($indexH, ">", $index) || die "Unable to open $index for writing: $!\n";
    print $indexH "<HTML>\n<HEAD>\n<TITLE>$projName Results</TITLE>\n</HEAD>\n<BODY>\n<H1>$projName</H1>\n";
    if (defined($index_path)) {
	unless (-d $index_path) {
	    mkdir($index_path);
	}
    }
}

###
#  Phase 1, sequences through mapping to insert list
###
if (defined($seqFile)) {
    my $seqOutFn = "$envDirN/data/seqs.tab";
    &pre_process_seqs(\$seqType, \$seqFile, \$seqOutFn, \$debug); 
    copy("$bar2libFile", "$envDirN/data/barcode2lib.txt") || die "Unable to link barcode to library file in execution environment. $!\n";
    $orig_dir = &cwd;
    if ($debug) { print "Starting dir: $orig_dir.\n"; }
    chdir($envDirN);
    if ($debug) { print "Current dir: " . &cwd . "\n"; }
    open($output, "perl $envDirN/lib/TAPDANCE.pl |") || die "Unable to run TAPDANCE.pl. $!\n";
    if ($debug) { while (<$output>) { print "$_"; } }
    close($output);
    chdir($orig_dir);
    if (defined($index)) {
	print $indexH "<H3>Insertion Analysis</H3>\n<P>To visualize the insertions in this project use the \"Non Redundant Inserts BED\" file in the history.\n<UL>\n";
	#if (defined($projSum)) {
	print $indexH "<LI>" . &link_file("primary_" . $index_id . "_InsertsVis_hidden.pdf", "pdf", "QC graphs of inserts", 0) . "</LI>\n";
	print $indexH "<LI>" . &link_file("primary_" . $index_id . "_InsertsSummary_hidden.txt", "txt", "Summary of all inserts", 0) . "</LI>\n";
	copy("$envDirN/results/summary_$projName.txt", $index_path . "/primary_" . $index_id . "_InsertsSummary_hidden.txt") || die "Unable to retrieve project summary, $envDirN/results/summary_$projName.txt. $!\n";
	system("Rscript --vanilla $envDirN/lib/insert_vis.R --args $envDirN/results/lib_stats_$projName.txt $envDirN/results/region_stats_$projName.txt " .  $index_path . "/primary_" . $index_id . "_InsertsVis_hidden.pdf");
	#copy("$envDirN/results/summary_$projName.txt", "$projSum") || die "Unable to retrieve project summary, $envDirN/results/summary_$projName.txt. $!\n";
	print $indexH "</UL>\n</P>\n";
    }
    #if (defined($projVis)) {
    #system("Rscript --vanilla $envDirN/lib/insert_vis.R --args $envDirN/results/lib_stats_$projName.txt $envDirN/results/region_stats_$projName.txt $projVis");
    #}
    $copy_ins_files=1;
    if ($debug) { print "TAPDANCE.pl done.\n"; }
}

###
#  Phase 4, merge projects
###
if (defined($merge)) {
    open(my $meta_tab, ">", "$envDirN/data/meta.tab") || die "Unable to write project merge list to $envDirN/data/meta.tab: $!\n";
    my @merge_projs = split(',', $merge);
    foreach my $merge_proj (@merge_projs) {
	print $meta_tab "$merge_proj\n";
    }
    close($meta_tab);
    $orig_dir = &cwd;
    if ($debug) { print "Starting dir: $orig_dir.\n" }
    chdir($envDirN);
    open($output, "perl $envDirN/lib/TAP4.pl |") || die "Unable to run TAP4.pl. $!\n";
    if ($debug) { while (<$output>) { print "$_"; } }
    close($output);
    chdir($orig_dir);
    $copy_ins_files=1;
    if ($debug) { print "TAP4.pl done.\n"; }
}

###
#  Copy insert files
###
if ($copy_ins_files) {
    #if (defined($projBed)) {
    if (defined($index)) {
	#copy("$envDirN/results/raw_$projName.BED", "$projBed") || die "Unable to retrieve project BED, $envDirN/results/raw_$projName.BED. $!\n";
	#print $indexH "<A HREF=\"primary_" . $index_id . "_InsertsBED_hidden_bed?preview=true\">A BED containing all inserts</A><BR>\n";
	copy("$envDirN/results/raw_$projName.BED", $index_path . "/primary_" . $index_id . "_InsertsBED_visible_bed") || die "Unable to retrieve project BED, $envDirN/results/raw_$projName.BED. $!\n";
    }
    if ($debug) { print "Files copied.\n"; }
}

###
#  Phase 2, calculate CISes
###
if (!$no_cis) { #defined($cisWig) && defined($cisSum)) {
    #Fill chromo tab for phase two
    open(my $chromoTabH, ">", $envDirN . "/data/chromo.tab") || die "Unable to open chromo tab $envDirN/data/chromo.tab: $!\n";
    if (defined($metadata)) {
	copy("$metadata", "$envDirN/data/metadata.tab") || die "Unable to copy provided metadata, $metadata. $!\n";
    }
    foreach (@omitChrom) { print $chromoTabH "$_\n"; }
    close ($chromoTabH);
    if ($debug) { print "Omitted chromosomes written.\n"; }

    my ($metadataTabH, $barcodeInH);
    my %metadata_attrs = ();
    my @map;
    if (!defined($metadata) && defined($bar2libFile)) {
	open($metadataTabH, ">", $envDirN . "/data/metadata.tab") || die "Unable to open chromo tab $envDirN/data/metadata.tab: $!\n";
	open($barcodeInH, "<", "$envDirN/data/barcode2lib.txt") || die "Unable to open barcode to library mapping, $bar2libFile: $!\n";
	my ($idx, $lib_name);
	while (<$barcodeInH>) {
	    chomp;
	    @map = split("\t", $_);
	    $map[1] =~ s/^\s+//;
	    $map[1] =~ s/\s+$//;
	    $map[1] =~ m/(.*)-[L|R]/;
	    $lib_name = $1;
	    print $metadataTabH join("\t", $lib_name, "all", "cis") . "\n";
	    for ($idx = 3; $idx <= $#map; $idx++) {
		$map[$idx] =~ s/^\s+//;
		$map[$idx] =~ s/\s+$//;
		print $metadataTabH join("\t", $lib_name, $map[$idx], "cis") . "\n";
		$metadata_attrs{$map[$idx]} = 1; 
	    }
	}
	close($barcodeInH);
	close($metadataTabH);
	$meta_gen = 1;
	if ($debug) { print "Metadata written.\n"; }
    }
    else {
	open(my $metadata_file, "<", $envDirN . "/data/metadata.tab") || die "Unable to open meta, $!\n";
	while(<$metadata_file>) {
	    chomp;
	    @map = split("\t", $_);
	    if (uc $map[2] eq "CIS" && uc $map[1] ne "ALL") { 
		$metadata_attrs{$map[1]} = 1;
	    }
	}
	close($metadata_file);
    }

    #mkdir("$envDirN/CIS"); # || die "Unable to create lib. $!\n";

    #if ($debug) { print "Created $envDirN/CIS\n"; }

    $orig_dir = &cwd;
    if ($debug) { print "Starting dir: $orig_dir.\n" }  
    chdir($envDirN);
    open($output, "perl ./lib/TAP2.pl |") || die "Unable to run TAP2.pl. $!\n";
    if ($debug) { while (<$output>) { print "$_"; } }
    close($output);
    chdir($orig_dir);
    if ($debug) { print "TAP2.pl run.\n"; }
    
    if ($debug) { print "TAP2.pl done.\n"; }
    if (defined($index)) {
	print $indexH "<H3>CIS calls</H3>\n<P>To Visualize the CIS Calls, use the \"CIS WIG\" history entry.  Each metadata tag that generated it's own CIS calls has it's own WIG file in the history as \"CIS WIG (tag)\".\n<UL>\n";
	#if (defined($cisSum)) {
	print $indexH "<LI>" . &link_file("primary_" . $index_id . "_CISSummary_hidden.txt", "txt", "Summary of all CIS calls", 0) . "</LI>\n";
	copy("$envDirN/results/summary_CIS_$projName.txt", $index_path . "/primary_" . $index_id . "_CISSummary_hidden.txt") || die "Unable to retrieve CIS summary, $envDirN/results/cis_summary.txt. $!\n";
	#print $indexH "<A HREF=\"primary_" . $index_id . "_CISWIG_visible_wig\">WIG of all CIS calls</A><BR>\n";
	#copy($envDirN . "/results/all/plot_all-nr-" . $projName . "-" . $library_percent . ".wig", $index_path . "/primary_" . $index_id . "_CISWIG_visible_wig") || die "Unable to retrieve CIS WIG, $envDirN/results/all/plot_all-nr-$projName-$library_percent.wig. $!\n";
	copy($envDirN . "/results/all/plot_all-nr-" . $projName . "-" . $library_percent . ".wig", $cisWig) || die "Unable to retrieve CIS WIG, $envDirN/results/all/plot_all-nr-$projName-$library_percent.wig. $!\n";
	print $indexH "<UL>\n";
	print $indexH "<LI>" . &link_file("primary_" . $index_id . "_Ann_hidden.txt", "txt", "Ann.txt", 0) . "</LI>\n";
	copy("$envDirN/results/Assoc/Ann.txt", $index_path . "/primary_" . $index_id . "_Ann_hidden.txt") || die "Unable to retrieve Ann.txt. $!\n";
	print $indexH "<LI>" . &link_file("primary_" . $index_id . "_Cis_hidden.txt", "txt", "Cis.txt", 0) . "</LI>\n";
	copy("$envDirN/results/Assoc/Cis.txt", $index_path . "/primary_" . $index_id . "_Cis_hidden.txt") || die "Unable to retrieve Cis.txt. $!\n";
	print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnAnnTable_hidden.txt", "txt", "Ann_Ann_table.txt", 0) . "</LI>\n";
	copy("$envDirN/results/Assoc/Ann_Ann_table.xls", $index_path . "/primary_" . $index_id . "_AnnAnnTable_hidden.txt") || die "Unable to retrieve Ann_Ann_table.xls. $!\n";
	print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnAnnMatrix_hidden.txt", "txt", "Ann_ann_matrix.txt", 0) . "</LI>\n";
	copy("$envDirN/results/Assoc/Ann_ann_matrix.txt", $index_path . "/primary_" . $index_id . "_AnnAnnMatrix_hidden.txt") || die "Unable to retrieve Ann_ann_matrix.txt. $!\n";
	print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnCisTable_hidden.txt", "txt", "Ann_cis_table.xls", 0) . "</LI>\n";
	copy("$envDirN/results/Assoc/Ann_cis_table.xls", $index_path . "/primary_" . $index_id . "_AnnCisTable_hidden.txt") || die "Unable to retrieve Ann_cis_table.xls. $!\n";
	print $indexH "<LI>" . &link_file("primary_" . $index_id . "_AnnCisMatrix_hidden.txt", "txt", "Ann_cis_matrix.txt", 0) . "</LI>\n";
	copy("$envDirN/results/Assoc/Ann_cis_matrix.txt", $index_path . "/primary_" . $index_id . "_AnnCisMatrix_hidden.txt") || die "Unable to retrieve Ann_cis_matrix.txt. $!\n";
	print $indexH "<LI>" . &link_file("primary_" . $index_id . "_CisCisTable_hidden.txt", "txt", "Cis_cis_table.xls", 0) . "</LI>\n";
	copy("$envDirN/results/Assoc/Cis_cis_table.xls", $index_path . "/primary_" . $index_id . "_CisCisTable_hidden.txt") || die "Unable to retrieve Cis_cis_table.xls. $!\n";
	print $indexH "<LI>" . &link_file("primary_" . $index_id . "_CisCisMatrix_hidden.txt", "txt", "Cis_cis_matrix.txt", 0) . "</LI>\n";
	copy("$envDirN/results/Assoc/Cis_cis_matrix.txt", $index_path . "/primary_" . $index_id . "_CisCisMatrix_hidden.txt") || die "Unable to retrieve Cis_cis_matrix.txt. $!\n";
	print $indexH "</UL>\n</UL>\n</P>\n";
	#copy("$envDirN/results/summary_CIS_$projName.txt", "$cisSum") || die "Unable to retrieve CIS summary, $envDirN/results/cis_summary.txt. $!\n";
	#}
	#if (defined($cisWig)) {
	#copy("$envDirN/results/all/plot_all-nr-$projName-$library_percent.wig", "$cisWig") || die "Unable to retrieve CIS WIG, $envDirN/results/all/plot_all-nr-$projName-$library_percent.wig. $!\n";
	#}
	#if (defined($cisWigId) && defined($cisWigPath)) {
	#my $count;
	my $filesize;
	foreach my $tag (keys %metadata_attrs) {
	    #open(FILE, "< $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig") or die "can't open $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig: $!";
	    #for ($count=0; <FILE>; $count++) { }
	    #if ($count > 1) {
	    if (-e "$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig") {
		$filesize = -s "$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig";
		if ($filesize > 0) {
		        #print $indexH "<A HREF=\"primary_" . $index_id . "_" . $tag . "_visible_wig\">Summary of CIS calls for libraries with the " . $tag . " label</A><BR>\n";
			#copy("$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig", $index_path . "/primary_" . $index_id . "_" . $tag . "_visible_wig") || die "Unable to retrieve CIS WIG, $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig. $!\n";
			copy("$envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig", "$cisWigPath/primary_" . $cisWigId . "_" . $tag . "_visible_wig") || die "Unable to retrieve CIS WIG, $envDirN/results/$tag/plot_" . $tag . "-nr-" . $projName . "-" . $library_percent . ".wig. $!\n";
		}
	    }
	}
    }
    if ($debug) { print "Files copied.\n"; }
}

if (defined($index)) {
    print $indexH "<P>To add files to your history for further processing in Galaxy, right-click the link and select \"Copy Link URL\". Open the \"Get Data\" menu in the \"Tools\" sidebar and open the \"Upload File\" link.  Paste the copied URL in the \"URL/Text\" box.</P>\n.";
    print $indexH "<H3>Generated configuration files</H3>\n<UL>\n";
    print $indexH "<LI>" . &link_file("primary_" . $index_id . "_ConfigPl_hidden.txt", "txt", "Project configuration", 0) . "</LI>\n"; 
    copy("$envDirN/config.pl", $index_path . "/primary_" . $index_id . "_ConfigPl_hidden.txt") || die "Unable to retrieve config.pl. $!\n";
    if ($meta_gen) {
	print $indexH "<LI>" . &link_file("config.pl", "txt", "Project configuration", 0) . "</LI>\n"; 
	copy("$envDirN/config.pl", $index_path . "/primary_" . $index_id . "_ConfigPl_hidden.txt") || die "Unable to retrieve config.pl. $!\n";
    }
    print $indexH "</UL>\n</BODY>\n</HTML>\n";
    close($indexH);
}

exit(0);

sub determine_seq_input_type {
    my ($input_fn_ref, $config_fh_ref) = @_;
    open(INPUT, "<", ${$input_fn_ref}) || die "Unable to open input file ${$input_fn_ref}, $!\n";
    my $first_line = <INPUT>;
    close (INPUT);
    if ($first_line=~/^@/) {
	print ${$config_fh_ref} sprintf("\$quality = 1;\n");
	return "fastq";
    }
    elsif ($first_line=~/^>/) {
	print ${$config_fh_ref} sprintf("\$quality = 0;\n");
	return "fasta";
    }
    else {
	my @split_array = split("\t", $first_line);
	if ($#split_array > 0) {
	    return "tab";
	}
	else {
	    die "Unable to determine sequence input value type (fastq|fasta|tabular)\n";
	} 
    }
}

sub pre_process_seqs {
    my ($seq_type_ref, $in_file_ref, $out_fn_ref, $debug_ref) = @_; 
    #FASTQ
    if (${$seq_type_ref} eq "fastq") {
	if (${$debug_ref}) { print "FASTQ\n"; }
	open (my $out_fh, ">", ${$out_fn_ref}) || die "Unable to open ${$out_fn_ref}, $!\n";  
	&process_fastq($in_file_ref, \&fastq_entry, \$out_fh);
	close($out_fh);
    }
    
    #FASTA
    elsif (${$seq_type_ref} eq "fasta") {
	if (${$debug_ref}) { print "FASTA\n"; }
	open (my $out_fh, ">", ${$out_fn_ref}) || die "Unable to open ${$out_fn_ref}, $!\n"; 
	&process_fasta($in_file_ref, \&fasta_entry, \$out_fh);
	close($out_fh);
    }
    
    #Tab, no quality info
    else {
	if (${$debug_ref}) { print "TABULAR\n"; }
	copy (${$in_file_ref}, ${$out_fn_ref}) || die "Unable to copy seq file to execution environment. $!\n";
	#open($output, "ln -s ${$in_file_ref} ${$out_fn_ref} |") || die "Unable to link seq file in execution environment. $!\n";
	#if (${$debug_ref}) { while (<$output>) { print "$_"; } }
	#close($output);
    }    
}

sub fasta_entry {
    my ($seq_id_ref, $seq_ref, $array_ref) = @_;
    print "fasta_entry(" . join(",", ${$seq_id_ref}, ${$seq_ref}) . ")\n"; 
    #print ${$array_ref->[0]} join("\t", ${$seq_id_ref}, "", ${$seq_ref}) . "\n";
    #my $seq_qual = "";
    #for(my $i = 0; $i < length(${$seq_ref}); $i++) { $seq_qual = $seq_qual . 'h'; }
    #print sprintf("Fasta_entry: length of sequence:%s length of quality:%s", length(${$seq_id_ref}), length($seq_qual));
    print ${$array_ref->[0]} join("\t", ${$seq_id_ref}, "", ${$seq_ref});
    ${$seq_ref} =~ s/[A,C,T,G]/I/g;
    ${$seq_ref} =~ s/N/!/g;
    print ${$array_ref->[0]} sprintf("\t%s\n", ${$seq_ref});
}

sub fastq_entry {
    my ($seq_id_ref, $seq_ref, $seq_qual, $array_ref) = @_;
    #print "fastq_entry(" . join(",", ${$seq_id_ref}, ${$seq_ref}, ${$seq_qual}) . ")\n"; 
    print ${$array_ref->[0]} join("\t", ${$seq_id_ref}, "", ${$seq_ref}, ${$seq_qual}) . "\n";
}

sub lib_copy {
    unless(-d $File::Find::name) {
	copy($File::Find::name, "$envDirN/lib") || die "Unable to copy $File::Find::name to $envDirN/lib. $!\n";
    }
}

sub link_file {
    my ($file_name, $file_type, $link_text, $download) = @_;
    my $out = "<A HREF=\"" . $file_name . "\">" . $link_text . "</A>";
    if ($download) {
	$out = $out . " [<A HREF=\"" . $file_name . "/display?to_ext=" . $file_type . "\">Download</A>]";
    }
    return $out;
}