Mercurial > repos > yusuf > transfer_convert_nextseq
view copyNextSeq.pl @ 0:d4ac6e05c96c default tip
initial commit
author | Yusuf Ali <ali@yusuf.email> |
---|---|
date | Wed, 25 Mar 2015 13:43:47 -0600 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use warnings; use Getopt::Long; use File::Find; use File::Basename; use vars qw(@fastq_files); my $dirname = dirname(__FILE__); my $pythonScript = "$dirname/rgFastQC.py"; my $tool_dir = shift @ARGV; my $pythonJars = "$tool_dir/shared/jars/FastQC/fastqc"; # Site config my $num_threads = 32; my $fastq_sample_size = 400000; my $seq_host = "10.81.192.138"; my $seq_username = "nextseq-user"; my $seq_dir = "Desktop/Share"; #get localdir if(not -e "$tool_dir/transfer_convert_nextseq.loc"){ system("cat $dirname/tool-data/transfer_convert_nextseq.loc > $tool_dir/transfer_convert_nextseq.loc"); } open FILE, "$tool_dir/transfer_convert_nextseq.loc" or die "Could not open configuration file: $!\n"; my @keys = split("=",<FILE>); (my $local_dir = $keys[$#keys]) =~s/\s+//g; close FILE; # store arguments into variables my $runName; my $sampleSheet; my $user; my $accessFile; my $outDir; my $htmlFile; my $archiveFile; GetOptions ("run=s" => \$runName, "samplesheet=s" => \$sampleSheet, "user=s" => \$user, "toolDir=s" => \$accessFile, "out=s" => \$outDir, "html=s" => \$htmlFile, "archive=s" => \$archiveFile); if(not defined $runName or not defined $sampleSheet or not defined $user or not defined $accessFile or not defined $outDir or not defined $htmlFile){ die "Usage: $0 -run <unique_suffix> -samplesheet <illumina.csv> -user <user\@domain in nextseq_access.conf> -toolDir <galaxy tool conf dir> ", "-out <output dir for FASTQC report> -html <FASTQC report file name> -archive <SAV files.zip>\n"; } $accessFile = "$accessFile/nextseq_access.conf"; # create access file if not already there my $command = `touch $accessFile`; open my $handle, '<', "$accessFile"; chomp(my @allowed_users = <$handle>); $runName = quotemeta($runName); my ($out_file, $out_path, $out_ext ) = fileparse( $htmlFile, "\.[^.]*" ); # check to make sure $user is allowed to run script if (! ($user ~~ @allowed_users) ){ die "Please ask the administrator to add $user to $accessFile in order to gain access to this tool\n"; } # First, sanity check the sample file open(CSV, $sampleSheet) or die "Cannot open $sampleSheet for reading: $!\n"; undef $/; # slurp up whole file at once by undefining record separator my @CSV = split /\r?\n/, <CSV>; # allow different endings close(CSV); $/="\n"; # restore normal per-line reading my ($has_header, $has_reads, $has_data); for(@CSV){ if(/^\[Header\]/){ $has_header = 1; } elsif(/^\[Reads\]/){ $has_reads = 1; } elsif(/^\[Data\]/){ $has_data = 1; } } if(not defined $has_header){ die "Header section is missing in sample sheet, please fix and resubmit this job\n"; } if(not defined $has_reads){ die "Reads section is missing in sample sheet, please fix and resubmit this job\n"; } if(not defined $has_data){ die "Data section is missing in sample sheet, please fix and resubmit this job\n"; } # Expand the catridge ID into the full run name on the remote host, input should look something like "H35VJBGXX" open(SSH, "ssh $seq_username\@$seq_host ls -1 $seq_dir |") or die "Could not run ssh login to $seq_host: $!\n"; my @matchOptions; my @mismatchOptions; while(<SSH>){ chomp; if(/$runName/o){ push @matchOptions, $_; } else{ push @mismatchOptions, $_; } } close(SSH); if(not @matchOptions){ if(not @mismatchOptions){ die "There was no data found on the rempote server at all, please ask the administrator to ", "check this tool's setup (currently checking $seq_username\@$seq_host:$seq_dir)\n"; } # Keep only the ones not already uploaded as options @mismatchOptions = grep {not -e "$local_dir/$_"} @mismatchOptions; die "No run folder matching $runName was found at $seq_username\@$seq_host:$seq_dir, please try with another ", "run name. The following would work currently: ", join(", ", @mismatchOptions), "\n"; } elsif(@matchOptions > 1){ die "Ambiguous run name specification, please revise \"$runName\" to distinguish between existing datasets: ", join(", ", @matchOptions), "\n"; } my $expandedRunName = $matchOptions[0]; # unambiguous, so proceed # if sample already exits as a folder, die if(-e "$local_dir/$expandedRunName"){ # die "Run $expandedRunName already exists on galaxy ($local_dir/$expandedRunName), cannot copy over\n"; } # if not, copy to folder else{ # system("scp -r $seq_username\@$seq_host\:$seq_dir/$expandedRunName $local_dir") >> 8 and die "Failed to copy from $seq_host to galaxy: scp exit status $?\n"; } # Put the sample sheet where it needs to be with the transfered data open(CSV, ">$local_dir/$expandedRunName/SampleSheet.csv") or die "Cannot open $local_dir/$expandedRunName/SampleSheet.csv for writing: $!\nThe data files have been transfered, but no BCL to FASTQ conversion has taken place.\n"; print CSV join("\n", @CSV); close(CSV); # convert bcl files to fastq #system("cd $local_dir/$expandedRunName; /export/common/programs/bcl2fastq/bin/bcl2fastq -r $num_threads -d $num_threads -p $num_threads -w $num_threads")>>8 # and die "BCL to FASTQ conversion had non-zero exit status ($?). The BCL files were transfered, but FASTQ files were not generated.\n"; # Find the FASTQ files generated find(sub{push @fastq_files, $File::Find::name if /\.fastq.gz$/}, "$local_dir/$expandedRunName"); # Run FASTQC on sample of data from each lane/barcode # open output file and write html open(OUTFILE, ">$htmlFile") or die "Cannot open $htmlFile for writing: $!\n"; print OUTFILE "<html><body><h1>Barcodes</h1>"; system("mkdir -p $outDir"); # generate html plot using python tool $SIG{'PIPE'} = 'IGNORE'; my $cwd = dirname(__FILE__); foreach my $file (@fastq_files){ my ($barcode, $path, $ext ) = fileparse( $file, "\.fastq\.gz" ); my $cmd = "gzip -cd $file | head -n $fastq_sample_size | python $pythonScript -i /dev/stdin " . "-d $outDir/$barcode/. " . "-o fastqc_report.html " . "-n \"FASTQC $barcode\" " . "-f \"FASTQ\" " . "-j \"$barcode$ext\" " . "-e $pythonJars"; # Assumes the bash shell is being used open(CMD, "trap '' SIGPIPE; $cmd 2| grep -v \"Broken pipe\" |") or die "Cannot run FASTQC: $!\n"; while(<CMD>){ # Can safely ignore blank lines and SIGPIPE warnings next if /^\s*$/ or /Broken pipe/; print STDERR $_; # forward any other errors } close(CMD); system("perl -i.bak -pe \"s/>FastQC Report</>FastQC Report<div><a href='..\\/index.html'>Back to Table of Contents<\\\/a><\\\/div></;s/Images|Icons/./\" $outDir/$barcode/fastqc_report.html"); system("unzip -o -d $outDir/$barcode -qq -j $outDir/$barcode/$barcode\_fastqc.zip $barcode\_fastqc/Icons/*.png"); # append to html file print OUTFILE "<div><a href='$barcode/fastqc_report.html'>$barcode</a></div>"; } print OUTFILE "</body></html>"; close(OUTFILE); system("cp $htmlFile $outDir/index.html"); system("cd $local_dir/$expandedRunName; rm $archiveFile; zip -r $archiveFile RunInfo.xml RunParameters.xml InterOp -q");