Mercurial > repos > louise > export_to_fastq
changeset 0:97792524cc9c default tip
Migrated tool version 0.1 from old tool shed archive to new tool shed repository
author | louise |
---|---|
date | Tue, 07 Jun 2011 17:21:49 -0400 |
parents | |
children | |
files | export_to_fastq/README export_to_fastq/export_to_fastq_past.R export_to_fastq/export_to_fastq_past.sh export_to_fastq/export_to_fastq_past.xml |
diffstat | 4 files changed, 201 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/export_to_fastq/README Tue Jun 07 17:21:49 2011 -0400 @@ -0,0 +1,112 @@ +Here is the needed class to handle Solexa Export file type. + +The tool and class were written by Nicolas Delhomme (delhomme@embl.de). +Released under the GNU GPL: http://www.opensource.org/licenses/gpl-3.0.html + +The threshold parameter was commented but it can very well be used. Just uncomment the commented code and comment the current command tag in the XML file. + +If you want to apply this file as a patch, just run: +patch <path_to_galaxy>/lib/galaxy/datatypes/tabular.py README +--- + +diff -r 50e249442c5a lib/galaxy/datatypes/tabular.py +--- a/lib/galaxy/datatypes/tabular.py Thu Apr 07 08:39:07 2011 -0400 ++++ b/lib/galaxy/datatypes/tabular.py Tue May 24 14:16:12 2011 +0200 +@@ -504,3 +504,95 @@ + + def get_track_type( self ): + return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"} ++ ++class Export( Tabular ): ++ file_ext = 'export' ++ def __init__(self, **kwd): ++ """Initialize export datatype""" ++ Tabular.__init__( self, **kwd ) ++ self.column_names = ['MACHINE', 'RUN', 'LANE', 'TILE', ++ 'X', 'Y', 'MULTIPLEX', 'PAIRID', ++ 'READ', 'QUALITY', 'CHROMOSOME', 'CONTIG', ++ 'POSITION','STRAND','ALN_QUAL','CHASTITY' ++ ] ++ ++ def make_html_table( self, dataset, skipchars=[] ): ++ """Create HTML table, used for displaying peek""" ++ out = ['<table cellspacing="0" cellpadding="3">'] ++ try: ++ # Generate column header ++ out.append( '<tr>' ) ++ for i, name in enumerate( self.column_names ): ++ out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) ) ++ # This data type requires at least 16 columns in the data ++ if dataset.metadata.columns - len( self.column_names ) > 0: ++ for i in range( len( self.column_names ), dataset.metadata.columns ): ++ out.append( '<th>%s</th>' % str( i+1 ) ) ++ out.append( '</tr>' ) ++ out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) ) ++ out.append( '</table>' ) ++ out = "".join( out ) ++ except Exception, exc: ++ out = "Can't create peek %s" % exc ++ return out ++ ++ def set_meta( self, dataset, overwrite = True, **kwd ): ++ ++ #we'll arbitrarily only use the first 100 data lines in the export file to calculate tabular attributes (column types) ++ #optional metadata values set in Tabular class will be 'None' ++ Tabular.set_meta( self, dataset, overwrite = overwrite, max_data_lines = 100 ) ++ ++ def sniff( self, filename ): ++ """ ++ Determines whether the file is in Export format ++ ++ A file in Export format consists of lines of tab-separated data. ++ It does not have any header ++ ++ Rules for sniffing as True: ++ There must be 16 columns of data on each line ++ Columns 2 to 8 must be numbers ++ Column 16 should be either Y or N ++ We will only check that up to the first 5 alignments are correctly formatted. ++ ++ """ ++ try: ++ fh = open( filename ) ++ count = 0 ++ while True: ++ line = fh.readline() ++ line = line.strip() ++ if not line: ++ break #EOF ++ if line: ++ if line[0] != '@': ++ linePieces = line.split('\t') ++ if len(linePieces) != 22: ++ return False ++ try: ++ check = int(linePieces[1]) ++ check = int(linePieces[2]) ++ check = int(linePieces[3]) ++ check = int(linePieces[4]) ++ check = int(linePieces[5]) ++ check = int(linePieces[6]) ++ check = int(linePieces[7]) ++ assert linePieces[21] in [ 'Y', 'N' ] ++ except ValueError: ++ return False ++ count += 1 ++ if count == 5: ++ return True ++ fh.close() ++ if count < 5 and count > 0: ++ return True ++ except: ++ pass ++ return False ++ ++class BarcodeSet( Tabular ): ++ file_ext = 'bs' ++ column_names = ['SAMPLE', 'BARCODE'] ++ ++ def sniff( self, filename ): ++ return False + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/export_to_fastq/export_to_fastq_past.R Tue Jun 07 17:21:49 2011 -0400 @@ -0,0 +1,49 @@ +my.args <- commandArgs(trailingOnly = TRUE) +# ----- Check input and output directories ----- +if(!file.exists(my.args[1])){ + stop("The provided project directory does not exist!") +} +inputDirectory=my.args[1] #Directory where input data are. (e.g export or fastq ...) + +output_file = my.args[3] + +threshold=as.numeric(my.args[2]) #threshold for nFilter + +#print(my.args) + +library(ShortRead) +#source('/home/galaxy/galaxy_dev/tools/EMBL_tools/HTS_helper_src_for_export_to_fastq.R') + +####Solution temporaire, chastityFilter sera dans le RNASeq package +chastityFilter <- function(.name="Illumina Chastity Filter") +{ + srFilter(function(x){ + if(any(rownames(varMetadata(alignData(x))) == "filtering")){ + keep<-alignData(x)$filtering=="Y" + } else { + warning(paste("The '",.name,"' filter is only valid for Illumina reads.",sep="")) + keep<-rep(TRUE,length(x)) + } + return(keep) + },name=.name) +} + +"summarize.by.transcripts" <- function(sample,annotation){ + + transcripts <- do.call(rbind,lapply(names(sample),function(chr,sample,annotation){ + counts<-stats:::aggregate(sample[[chr]],list(transcript=annotation[chr]$transcript),sum) + },sample,annotation)) + + colnames(transcripts)[2] <- "counts" + + return(transcripts) +} +### + +#----- FILTER ---- +filter<- compose(chastityFilter(),nFilter(threshold=threshold)) + +#----- ALIGN ---- +# call the readAligned function with this filter +aln<-readAligned(inputDirectory, type='SolexaExport',filter=filter, withAll=TRUE ) +writeFastq(aln,file=output_file,mode='a')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/export_to_fastq/export_to_fastq_past.sh Tue Jun 07 17:21:49 2011 -0400 @@ -0,0 +1,2 @@ +#!/bin/sh +R --vanilla --min-nsize=20M --min-vsize=12G --args $1 $2 $3 </home/galaxy/galaxy_dev/tools/EMBL_tools/export_to_fastq.R 2>/dev/null
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/export_to_fastq/export_to_fastq_past.xml Tue Jun 07 17:21:49 2011 -0400 @@ -0,0 +1,38 @@ +<tool id="export_to_fastq" name="export_to_fastq" version="0.1"> + <description>Convert export file to fastq</description> + <!-- <command interpreter="sh">export_to_fastq.sh $inputDirectory $threshold $output_file </command> --> + <command interpreter="sh">export_to_fastq.sh $inputDirectory 0 $output_file </command> + <inputs nginx_upload="true" > + <param type="data" name="inputDirectory" label="Directory that containe export file"/> + <!-- <param type="select" name="threshold" label="Threshold for nFilter"> + <option value="0">0</option> + <option value="1">1</option> + <option value="2" selected="true">2</option> + <option value="3">3</option> + <option value="4">4</option> + <option value="5">5</option> + <option value="6">6</option> + <option value="7">7</option> + <option value="8">8</option> + <option value="9">9</option> + <option value="10">10</option> + </param> --> + </inputs> + <outputs> + <data format="fastqillumina" name="output_file" /> + </outputs> + + <help> + Program: export_to_fastq (v0.1) + + Author: Nicolas Delhomme (delhomme@embl.de) + + Summary: Converts export files to FASTQ format. + + Usage: export_to_fastq.sh *input_directory* *threshold* *fastq_file* + + Note: Here the threshold is set on 0. + + </help> + +</tool>