Mercurial > repos > yusuf > transfer_convert_nextseq
comparison copyNextSeq.pl @ 0:d4ac6e05c96c default tip
initial commit
author | Yusuf Ali <ali@yusuf.email> |
---|---|
date | Wed, 25 Mar 2015 13:43:47 -0600 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d4ac6e05c96c |
---|---|
1 #!/usr/bin/perl | |
2 use strict; | |
3 use warnings; | |
4 use Getopt::Long; | |
5 use File::Find; | |
6 use File::Basename; | |
7 use vars qw(@fastq_files); | |
8 | |
9 my $dirname = dirname(__FILE__); | |
10 my $pythonScript = "$dirname/rgFastQC.py"; | |
11 my $tool_dir = shift @ARGV; | |
12 my $pythonJars = "$tool_dir/shared/jars/FastQC/fastqc"; | |
13 | |
14 # Site config | |
15 my $num_threads = 32; | |
16 my $fastq_sample_size = 400000; | |
17 my $seq_host = "10.81.192.138"; | |
18 my $seq_username = "nextseq-user"; | |
19 my $seq_dir = "Desktop/Share"; | |
20 | |
21 #get localdir | |
22 if(not -e "$tool_dir/transfer_convert_nextseq.loc"){ | |
23 system("cat $dirname/tool-data/transfer_convert_nextseq.loc > $tool_dir/transfer_convert_nextseq.loc"); | |
24 } | |
25 open FILE, "$tool_dir/transfer_convert_nextseq.loc" or die "Could not open configuration file: $!\n"; | |
26 my @keys = split("=",<FILE>); | |
27 (my $local_dir = $keys[$#keys]) =~s/\s+//g; | |
28 close FILE; | |
29 | |
30 # store arguments into variables | |
31 my $runName; | |
32 my $sampleSheet; | |
33 my $user; | |
34 my $accessFile; | |
35 my $outDir; | |
36 my $htmlFile; | |
37 my $archiveFile; | |
38 | |
39 GetOptions ("run=s" => \$runName, | |
40 "samplesheet=s" => \$sampleSheet, | |
41 "user=s" => \$user, | |
42 "toolDir=s" => \$accessFile, | |
43 "out=s" => \$outDir, | |
44 "html=s" => \$htmlFile, | |
45 "archive=s" => \$archiveFile); | |
46 | |
47 if(not defined $runName or not defined $sampleSheet or not defined $user or not defined $accessFile or not defined $outDir or not defined $htmlFile){ | |
48 die "Usage: $0 -run <unique_suffix> -samplesheet <illumina.csv> -user <user\@domain in nextseq_access.conf> -toolDir <galaxy tool conf dir> ", | |
49 "-out <output dir for FASTQC report> -html <FASTQC report file name> -archive <SAV files.zip>\n"; | |
50 } | |
51 | |
52 $accessFile = "$accessFile/nextseq_access.conf"; | |
53 | |
54 # create access file if not already there | |
55 my $command = `touch $accessFile`; | |
56 open my $handle, '<', "$accessFile"; | |
57 chomp(my @allowed_users = <$handle>); | |
58 | |
59 $runName = quotemeta($runName); | |
60 | |
61 my ($out_file, $out_path, $out_ext ) = fileparse( $htmlFile, "\.[^.]*" ); | |
62 | |
63 # check to make sure $user is allowed to run script | |
64 if (! ($user ~~ @allowed_users) ){ | |
65 die "Please ask the administrator to add $user to $accessFile in order to gain access to this tool\n"; | |
66 } | |
67 | |
68 # First, sanity check the sample file | |
69 open(CSV, $sampleSheet) | |
70 or die "Cannot open $sampleSheet for reading: $!\n"; | |
71 undef $/; # slurp up whole file at once by undefining record separator | |
72 my @CSV = split /\r?\n/, <CSV>; # allow different endings | |
73 close(CSV); | |
74 $/="\n"; # restore normal per-line reading | |
75 my ($has_header, $has_reads, $has_data); | |
76 for(@CSV){ | |
77 if(/^\[Header\]/){ | |
78 $has_header = 1; | |
79 } | |
80 elsif(/^\[Reads\]/){ | |
81 $has_reads = 1; | |
82 } | |
83 elsif(/^\[Data\]/){ | |
84 $has_data = 1; | |
85 } | |
86 } | |
87 if(not defined $has_header){ | |
88 die "Header section is missing in sample sheet, please fix and resubmit this job\n"; | |
89 } | |
90 if(not defined $has_reads){ | |
91 die "Reads section is missing in sample sheet, please fix and resubmit this job\n"; | |
92 } | |
93 if(not defined $has_data){ | |
94 die "Data section is missing in sample sheet, please fix and resubmit this job\n"; | |
95 } | |
96 | |
97 # Expand the catridge ID into the full run name on the remote host, input should look something like "H35VJBGXX" | |
98 open(SSH, "ssh $seq_username\@$seq_host ls -1 $seq_dir |") | |
99 or die "Could not run ssh login to $seq_host: $!\n"; | |
100 my @matchOptions; | |
101 my @mismatchOptions; | |
102 while(<SSH>){ | |
103 chomp; | |
104 if(/$runName/o){ | |
105 push @matchOptions, $_; | |
106 } | |
107 else{ | |
108 push @mismatchOptions, $_; | |
109 } | |
110 } | |
111 close(SSH); | |
112 if(not @matchOptions){ | |
113 if(not @mismatchOptions){ | |
114 die "There was no data found on the rempote server at all, please ask the administrator to ", | |
115 "check this tool's setup (currently checking $seq_username\@$seq_host:$seq_dir)\n"; | |
116 } | |
117 # Keep only the ones not already uploaded as options | |
118 @mismatchOptions = grep {not -e "$local_dir/$_"} @mismatchOptions; | |
119 die "No run folder matching $runName was found at $seq_username\@$seq_host:$seq_dir, please try with another ", | |
120 "run name. The following would work currently: ", join(", ", @mismatchOptions), "\n"; | |
121 } | |
122 elsif(@matchOptions > 1){ | |
123 die "Ambiguous run name specification, please revise \"$runName\" to distinguish between existing datasets: ", | |
124 join(", ", @matchOptions), "\n"; | |
125 } | |
126 my $expandedRunName = $matchOptions[0]; # unambiguous, so proceed | |
127 | |
128 # if sample already exits as a folder, die | |
129 if(-e "$local_dir/$expandedRunName"){ | |
130 # die "Run $expandedRunName already exists on galaxy ($local_dir/$expandedRunName), cannot copy over\n"; | |
131 } | |
132 # if not, copy to folder | |
133 else{ | |
134 # system("scp -r $seq_username\@$seq_host\:$seq_dir/$expandedRunName $local_dir") >> 8 and die "Failed to copy from $seq_host to galaxy: scp exit status $?\n"; | |
135 } | |
136 | |
137 # Put the sample sheet where it needs to be with the transfered data | |
138 open(CSV, ">$local_dir/$expandedRunName/SampleSheet.csv") | |
139 or die "Cannot open $local_dir/$expandedRunName/SampleSheet.csv for writing: $!\nThe data files have been transfered, but no BCL to FASTQ conversion has taken place.\n"; | |
140 print CSV join("\n", @CSV); | |
141 close(CSV); | |
142 | |
143 # convert bcl files to fastq | |
144 #system("cd $local_dir/$expandedRunName; /export/common/programs/bcl2fastq/bin/bcl2fastq -r $num_threads -d $num_threads -p $num_threads -w $num_threads")>>8 | |
145 # and die "BCL to FASTQ conversion had non-zero exit status ($?). The BCL files were transfered, but FASTQ files were not generated.\n"; | |
146 | |
147 # Find the FASTQ files generated | |
148 find(sub{push @fastq_files, $File::Find::name if /\.fastq.gz$/}, "$local_dir/$expandedRunName"); | |
149 | |
150 # Run FASTQC on sample of data from each lane/barcode | |
151 # open output file and write html | |
152 open(OUTFILE, ">$htmlFile") | |
153 or die "Cannot open $htmlFile for writing: $!\n"; | |
154 print OUTFILE "<html><body><h1>Barcodes</h1>"; | |
155 system("mkdir -p $outDir"); | |
156 | |
157 # generate html plot using python tool | |
158 $SIG{'PIPE'} = 'IGNORE'; | |
159 my $cwd = dirname(__FILE__); | |
160 foreach my $file (@fastq_files){ | |
161 my ($barcode, $path, $ext ) = fileparse( $file, "\.fastq\.gz" ); | |
162 my $cmd = "gzip -cd $file | head -n $fastq_sample_size | python $pythonScript -i /dev/stdin " | |
163 . "-d $outDir/$barcode/. " | |
164 . "-o fastqc_report.html " | |
165 . "-n \"FASTQC $barcode\" " | |
166 . "-f \"FASTQ\" " | |
167 . "-j \"$barcode$ext\" " | |
168 . "-e $pythonJars"; | |
169 # Assumes the bash shell is being used | |
170 open(CMD, "trap '' SIGPIPE; $cmd 2| grep -v \"Broken pipe\" |") | |
171 or die "Cannot run FASTQC: $!\n"; | |
172 while(<CMD>){ | |
173 # Can safely ignore blank lines and SIGPIPE warnings | |
174 next if /^\s*$/ or /Broken pipe/; | |
175 print STDERR $_; # forward any other errors | |
176 } | |
177 close(CMD); | |
178 system("perl -i.bak -pe \"s/>FastQC Report</>FastQC Report<div><a href='..\\/index.html'>Back to Table of Contents<\\\/a><\\\/div></;s/Images|Icons/./\" $outDir/$barcode/fastqc_report.html"); | |
179 system("unzip -o -d $outDir/$barcode -qq -j $outDir/$barcode/$barcode\_fastqc.zip $barcode\_fastqc/Icons/*.png"); | |
180 # append to html file | |
181 print OUTFILE "<div><a href='$barcode/fastqc_report.html'>$barcode</a></div>"; | |
182 } | |
183 | |
184 | |
185 print OUTFILE "</body></html>"; | |
186 close(OUTFILE); | |
187 system("cp $htmlFile $outDir/index.html"); | |
188 system("cd $local_dir/$expandedRunName; rm $archiveFile; zip -r $archiveFile RunInfo.xml RunParameters.xml InterOp -q"); |