Mercurial > repos > louise > export_to_fastq

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_to_fastq/README	Tue Jun 07 17:21:49 2011 -0400
@@ -0,0 +1,112 @@
+Here is the needed class to handle Solexa Export file type.
+
+The tool and class were written by Nicolas Delhomme (delhomme@embl.de).
+Released under the GNU GPL: http://www.opensource.org/licenses/gpl-3.0.html
+
+The threshold parameter was commented but it can very well be used. Just uncomment the commented code and comment the current command tag in the XML file.
+
+If you want to apply this file as a patch, just run:
+patch <path_to_galaxy>/lib/galaxy/datatypes/tabular.py README
+---
+
+diff -r 50e249442c5a lib/galaxy/datatypes/tabular.py
+--- a/lib/galaxy/datatypes/tabular.py	Thu Apr 07 08:39:07 2011 -0400
++++ b/lib/galaxy/datatypes/tabular.py	Tue May 24 14:16:12 2011 +0200
+@@ -504,3 +504,95 @@
+
+     def get_track_type( self ):
+         return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"}
++
++class Export( Tabular ):
++    file_ext = 'export'
++    def __init__(self, **kwd):
++        """Initialize export datatype"""
++        Tabular.__init__( self, **kwd )
++        self.column_names = ['MACHINE', 'RUN', 'LANE', 'TILE',
++                             'X', 'Y', 'MULTIPLEX', 'PAIRID',
++                             'READ', 'QUALITY', 'CHROMOSOME', 'CONTIG',
++                             'POSITION','STRAND','ALN_QUAL','CHASTITY'
++                             ]
++
++    def make_html_table( self, dataset, skipchars=[] ):
++        """Create HTML table, used for displaying peek"""
++        out = ['<table cellspacing="0" cellpadding="3">']
++        try:
++            # Generate column header
++            out.append( '<tr>' )
++            for i, name in enumerate( self.column_names ):
++                out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) )
++            # This data type requires at least 16 columns in the data
++            if dataset.metadata.columns - len( self.column_names ) > 0:
++                for i in range( len( self.column_names ), dataset.metadata.columns ):
++                    out.append( '<th>%s</th>' % str( i+1 ) )
++                out.append( '</tr>' )
++            out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) )
++            out.append( '</table>' )
++            out = "".join( out )
++        except Exception, exc:
++            out = "Can't create peek %s" % exc
++        return out
++
++    def set_meta( self, dataset, overwrite = True, **kwd ):
++
++        #we'll arbitrarily only use the first 100 data lines in the export file to calculate tabular attributes (column types)
++        #optional metadata values set in Tabular class will be 'None'
++        Tabular.set_meta( self, dataset, overwrite = overwrite, max_data_lines = 100 )
++
++    def sniff( self, filename ):
++        """
++        Determines whether the file is in Export format
++
++        A file in Export format consists of lines of tab-separated data.
++        It does not have any header
++
++        Rules for sniffing as True:
++            There must be 16 columns of data on each line
++            Columns 2 to 8 must be numbers
++            Column 16 should be either Y or N
++            We will only check that up to the first 5 alignments are correctly formatted.
++
++        """
++        try:
++            fh = open( filename )
++            count = 0
++            while True:
++                line = fh.readline()
++                line = line.strip()
++                if not line:
++                    break #EOF
++                if line:
++                    if line[0] != '@':
++                        linePieces = line.split('\t')
++                        if len(linePieces) != 22:
++                            return False
++                        try:
++                            check = int(linePieces[1])
++                            check = int(linePieces[2])
++                            check = int(linePieces[3])
++                            check = int(linePieces[4])
++                            check = int(linePieces[5])
++                            check = int(linePieces[6])
++                            check = int(linePieces[7])
++                            assert linePieces[21] in [ 'Y', 'N' ]
++                        except ValueError:
++                            return False
++                        count += 1
++                        if count == 5:
++                            return True
++            fh.close()
++            if count < 5 and count > 0:
++                return True
++        except:
++            pass
++        return False
++
++class BarcodeSet( Tabular ):
++    file_ext = 'bs'
++    column_names = ['SAMPLE', 'BARCODE']
++
++    def sniff( self, filename ):
++        return False
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_to_fastq/export_to_fastq_past.R	Tue Jun 07 17:21:49 2011 -0400
@@ -0,0 +1,49 @@
+my.args <- commandArgs(trailingOnly = TRUE)
+# ----- Check input and output directories -----
+if(!file.exists(my.args[1])){
+   stop("The provided project directory does not exist!")
+}
+inputDirectory=my.args[1] #Directory where input data are. (e.g export or fastq ...)
+
+output_file = my.args[3]
+
+threshold=as.numeric(my.args[2]) #threshold for nFilter
+
+#print(my.args)
+
+library(ShortRead)
+#source('/home/galaxy/galaxy_dev/tools/EMBL_tools/HTS_helper_src_for_export_to_fastq.R')
+
+####Solution temporaire, chastityFilter sera dans le RNASeq package
+chastityFilter <- function(.name="Illumina Chastity Filter")
+{
+  srFilter(function(x){
+    if(any(rownames(varMetadata(alignData(x))) == "filtering")){
+      keep<-alignData(x)$filtering=="Y"
+    } else {
+      warning(paste("The '",.name,"' filter is only valid for Illumina reads.",sep=""))
+      keep<-rep(TRUE,length(x))
+    }
+    return(keep)
+  },name=.name)
+}
+
+"summarize.by.transcripts" <- function(sample,annotation){
+
+  transcripts <- do.call(rbind,lapply(names(sample),function(chr,sample,annotation){
+    counts<-stats:::aggregate(sample[[chr]],list(transcript=annotation[chr]$transcript),sum)
+  },sample,annotation))
+
+  colnames(transcripts)[2] <- "counts"
+
+  return(transcripts)
+}
+###
+
+#----- FILTER ----
+filter<- compose(chastityFilter(),nFilter(threshold=threshold))
+
+#----- ALIGN ----
+# call the readAligned function with this filter
+aln<-readAligned(inputDirectory, type='SolexaExport',filter=filter, withAll=TRUE )
+writeFastq(aln,file=output_file,mode='a')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_to_fastq/export_to_fastq_past.sh	Tue Jun 07 17:21:49 2011 -0400
@@ -0,0 +1,2 @@
+#!/bin/sh
+R --vanilla --min-nsize=20M --min-vsize=12G --args $1 $2 $3 </home/galaxy/galaxy_dev/tools/EMBL_tools/export_to_fastq.R 2>/dev/null
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_to_fastq/export_to_fastq_past.xml	Tue Jun 07 17:21:49 2011 -0400
@@ -0,0 +1,38 @@
+<tool id="export_to_fastq" name="export_to_fastq" version="0.1">
+  <description>Convert export file to fastq</description>
+  <!-- <command interpreter="sh">export_to_fastq.sh $inputDirectory $threshold $output_file </command> -->
+ <command interpreter="sh">export_to_fastq.sh $inputDirectory 0 $output_file </command>
+  <inputs nginx_upload="true" >
+    <param type="data" name="inputDirectory" label="Directory that containe export file"/>
+    <!-- <param type="select" name="threshold" label="Threshold for nFilter">
+	   <option value="0">0</option>
+	   <option value="1">1</option>
+	   <option value="2" selected="true">2</option>
+	   <option value="3">3</option>
+	   <option value="4">4</option>
+	   <option value="5">5</option>
+	   <option value="6">6</option>
+	   <option value="7">7</option>
+	   <option value="8">8</option>
+	   <option value="9">9</option>
+	   <option value="10">10</option>
+   </param> -->
+  </inputs>
+  <outputs>
+    <data format="fastqillumina" name="output_file" />
+  </outputs>
+
+  <help>
+	  Program: export_to_fastq (v0.1)
+
+	  Author:  Nicolas Delhomme (delhomme@embl.de)
+
+	  Summary: Converts export files to FASTQ format.
+
+	  Usage:   export_to_fastq.sh *input_directory* *threshold* *fastq_file*
+
+	  Note: Here the threshold is set on 0.
+
+  </help>
+
+</tool>