# HG changeset patch
# User devteam
# Date 1390832954 18000
# Node ID e28c965eeed4adeb19cb086d1e0f5b3ca6dc8a5d
Imported from capsule None
diff -r 000000000000 -r e28c965eeed4 rgFastQC.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rgFastQC.py Mon Jan 27 09:29:14 2014 -0500
@@ -0,0 +1,216 @@
+"""
+# May 2013 ross added check for bogus gz extension - fastqc gets confused
+# added sanitizer for user supplied name
+# removed shell and make cl a sequence for Popen call
+# ross lazarus August 10 2012 in response to anon insecurity report
+wrapper for fastqc
+
+called as
+
+ rgFastqc.py -i $input_file -d $html_file.files_path -o $html_file -n "$out_prefix"
+
+
+
+
+Current release seems overly intolerant of sam/bam header strangeness
+Author notified...
+
+
+"""
+import re
+import os
+import sys
+import subprocess
+import optparse
+import shutil
+import tempfile
+import zipfile
+import gzip
+
+
+def getFileString(fpath, outpath):
+ """
+ format a nice file size string
+ """
+ size = ''
+ fp = os.path.join(outpath, fpath)
+ s = '? ?'
+ if os.path.isfile(fp):
+ n = float(os.path.getsize(fp))
+ if n > 2**20:
+ size = ' (%1.1f MB)' % (n/2**20)
+ elif n > 2**10:
+ size = ' (%1.1f KB)' % (n/2**10)
+ elif n > 0:
+ size = ' (%d B)' % (int(n))
+ s = '%s %s' % (fpath, size)
+ return s
+
+
+class FastQC():
+ """wrapper
+ """
+
+
+ def __init__(self,opts=None):
+ assert opts <> None
+ self.opts = opts
+
+
+ def run_fastqc(self):
+ """
+ In batch mode fastqc behaves not very nicely - will write to a new folder in
+ the same place as the infile called [infilebasename]_fastqc
+ rlazarus@omics:/data/galaxy/test$ ls FC041_1_sequence_fastqc
+ duplication_levels.png fastqc_icon.png per_base_n_content.png per_sequence_gc_content.png summary.txt
+ error.png fastqc_report.html per_base_quality.png per_sequence_quality.png tick.png
+ fastqc_data.txt per_base_gc_content.png per_base_sequence_content.png sequence_length_distribution.png warning.png
+
+ """
+ serr = ''
+ dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir)
+ sout = open(tlog, 'w')
+ fastq = os.path.basename(self.opts.input)
+ cl = [self.opts.executable,'--outdir=%s' % self.opts.outputdir]
+ if self.opts.informat in ['sam','bam']:
+ cl.append('--f=%s' % self.opts.informat)
+ if self.opts.contaminants <> None :
+ cl.append('--contaminants=%s' % self.opts.contaminants)
+ # patch suggested by bwlang https://bitbucket.org/galaxy/galaxy-central/pull-request/30
+ # use a symlink in a temporary directory so that the FastQC report reflects the history input file name
+ infname = self.opts.inputfilename
+ linf = infname.lower()
+ trimext = False
+ # decompression at upload currently does NOT remove this now bogus ending - fastqc will barf
+ # patched may 29 2013 until this is fixed properly
+ if ( linf.endswith('.gz') or linf.endswith('.gzip') ):
+ f = gzip.open(self.opts.input)
+ try:
+ testrow = f.readline()
+ except:
+ trimext = True
+ f.close()
+ elif linf.endswith('bz2'):
+ f = bz2.open(self.opts.input,'rb')
+ try:
+ f.readline()
+ except:
+ trimext = True
+ f.close()
+ elif linf.endswith('.zip'):
+ if not zipfile.is_zipfile(self.opts.input):
+ trimext = True
+ if trimext:
+ infname = os.path.splitext(infname)[0]
+ fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname))
+ link_name = os.path.join(self.opts.outputdir, fastqinfilename)
+ os.symlink(self.opts.input, link_name)
+ cl.append(link_name)
+ sout.write('# FastQC cl = %s\n' % ' '.join(cl))
+ sout.flush()
+ p = subprocess.Popen(cl, shell=False, stderr=sout, stdout=sout, cwd=self.opts.outputdir)
+ retval = p.wait()
+ sout.close()
+ runlog = open(tlog,'r').readlines()
+ os.unlink(link_name)
+ flist = os.listdir(self.opts.outputdir) # fastqc plays games with its output directory name. eesh
+ odpath = None
+ for f in flist:
+ d = os.path.join(self.opts.outputdir,f)
+ if os.path.isdir(d):
+ if d.endswith('_fastqc'):
+ odpath = d
+ hpath = None
+ if odpath <> None:
+ try:
+ hpath = os.path.join(odpath,'fastqc_report.html')
+ rep = open(hpath,'r').readlines() # for our new html file but we need to insert our stuff after the
tag
+ except:
+ pass
+ if hpath == None:
+ serr = '\n'.join(runlog)
+ res = ['## odpath=%s: No output found in %s. Output for the run was:Files created by FastQC
\n']
+ flist.sort()
+ for i,f in enumerate(flist):
+ if not(os.path.isdir(f)):
+ fn = os.path.split(f)[-1]
+ res.append('%s |
\n' % (fn,getFileString(fn, self.opts.outputdir)))
+ res.append('
\n')
+ res.append('
FastQC documentation and full attribution is here
\n')
+ res.append('FastQC was run by Galaxy using the rgenetics rgFastQC wrapper - see http://bitbucket.org/rgenetics for details and licensing\n
')
+ res.append(footer)
+ fixed = rep[:bodyindex] + res + rep[bodyindex:]
+ return fixed # with our additions
+
+
+ def fix_fastqcimages(self,odpath):
+ """ Galaxy wants everything in the same files_dir
+ """
+ icpath = os.path.join(odpath,'Icons')
+ impath = os.path.join(odpath,'Images')
+ for adir in [icpath,impath,odpath]:
+ if os.path.exists(adir):
+ flist = os.listdir(adir) # get all files created
+ for f in flist:
+ if not os.path.isdir(os.path.join(adir,f)):
+ sauce = os.path.join(adir,f)
+ dest = os.path.join(self.opts.outputdir,f)
+ shutil.move(sauce,dest)
+ os.rmdir(adir)
+
+
+
+if __name__ == '__main__':
+ op = optparse.OptionParser()
+ op.add_option('-i', '--input', default=None)
+ op.add_option('-j', '--inputfilename', default=None)
+ op.add_option('-o', '--htmloutput', default=None)
+ op.add_option('-d', '--outputdir', default="/tmp/shortread")
+ op.add_option('-f', '--informat', default='fastq')
+ op.add_option('-n', '--namejob', default='rgFastQC')
+ op.add_option('-c', '--contaminants', default=None)
+ op.add_option('-e', '--executable', default='fastqc')
+ opts, args = op.parse_args()
+ assert opts.input <> None
+ assert os.path.isfile(opts.executable),'##rgFastQC.py error - cannot find executable %s' % opts.executable
+ if not os.path.exists(opts.outputdir):
+ os.makedirs(opts.outputdir)
+ f = FastQC(opts)
+ html,retval,serr = f.run_fastqc()
+ f = open(opts.htmloutput, 'w')
+ f.write(''.join(html))
+ f.close()
+ if retval <> 0:
+ print >> sys.stderr, serr # indicate failure
+
+
+
diff -r 000000000000 -r e28c965eeed4 rgFastQC.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rgFastQC.xml Mon Jan 27 09:29:14 2014 -0500
@@ -0,0 +1,101 @@
+