Mercurial > repos > matt-shirley > sra_tools
changeset 1:75d914fa5711 draft default tip
Moving repository to testtoolshed for now.
author | matt-shirley |
---|---|
date | Tue, 27 Nov 2012 13:43:03 -0500 |
parents | cdcc400dcafc |
children | |
files | datatypes_conf.xml fastq_dump.xml sam_dump.xml sra.py sra_fetch.py sra_fetch.xml |
diffstat | 6 files changed, 0 insertions(+), 168 deletions(-) [+] |
line wrap: on
line diff
--- a/datatypes_conf.xml Tue Nov 27 13:31:09 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -<?xml version="1.0"?> -<datatypes> - <datatype_files> - <datatype_file name="sra.py"/> - </datatype_files> - <registration> - <datatype extension="sra" type="galaxy.datatypes.binary:Sra" display_in_upload="true"/> - </registration> - <sniffers> - <sniffer type="galaxy.datatypes.binary:Sra"/> - </sniffers> -</datatypes> \ No newline at end of file
--- a/fastq_dump.xml Tue Nov 27 13:31:09 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -<tool id="fastq_dump" name="Extract fastq" version="1.0.0"> - <description> format reads from NCBI SRA.</description> - <command>./fastq-dump --log-level fatal --report never --accession '${input.name}' --stdout $split $aligned '$input' > $output </command> - <version_string>fastq-dump --version</version_string> - <inputs> - <param format="sra" name="input" type="data" label="sra archive"/> - <param format="text" name="split" type="select" value=""> - <label>Split read pairs</label> - <option value="">No</option> - <option value="--split-spot">Yes</option> - </param> - <param format="text" name="aligned" type="select" value=""> - <label>Specify alignment</label> - <option value="">All</option> - <option value="--aligned">Only aligned</option> - <option value="--unaligned">Only unaligned</option> - </param> - </inputs> - <outputs> - <data format="fastqsanger" name="output"/> - </outputs> - <stdio> - <exit_code range="127" level="fatal" description="Cannot find fastq-dump binary"/> - </stdio> - <requirements> - <requirement type="binary">fastq-dump</requirement> - </requirements> - <help> - This tool extracts fastqsanger reads from SRA archives using fastq-dump. The fastq-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. - </help> -</tool>
--- a/sam_dump.xml Tue Nov 27 13:31:09 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -<tool id="sam_dump" name="Extract SAM" version="1.0.0"> - <description> format reads from NCBI SRA.</description> - <command>sam-dump $header $aligned $primary '$input' > $output</command> - <version_string>sam-dump --version</version_string> - <inputs> - <param format="sra" name="input" type="data" label="sra archive"/> - <param format="text" name="header" type="select" value=""> - <label>Output SAM header</label> - <option value="--header">Yes</option> - <option value="--no-header">No</option> - </param> - <param format="text" name="aligned" type="select" value=""> - <label>Output unaligned reads</label> - <option value="">No</option> - <option value="--unaligned">Yes</option> - </param> - <param format="text" name="primary" type="select" value=""> - <label>Output only primary aligments</label> - <option value="">No</option> - <option value="--primary">Yes</option> - </param> - </inputs> - <outputs> - <data format="sam" name="output"/> - </outputs> - <requirements> - <requirement type="binary">sam-dump</requirement> - </requirements> - <help> - This tool extracts SAM format reads from SRA archives using sam-dump. The sam-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. -Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. - </help> -</tool>
--- a/sra.py Tue Nov 27 13:31:09 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ -""" -Sra class -""" - -import galaxy.datatypes.binary -from galaxy.datatypes.binary import Binary -import data, logging, binascii -from galaxy.datatypes.metadata import MetadataElement -from galaxy.datatypes import metadata -from galaxy.datatypes.sniff import * -from galaxy import eggs -import pkg_resources -pkg_resources.require( "bx-python" ) -import os, subprocess, tempfile -import struct - -class Sra( Binary ): - """ Sequence Read Archive (SRA) """ - file_ext = "sra" - - def __init__( self, **kwd ): - Binary.__init__( self, **kwd ) - def sniff( self, filename ): - # The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ. For details - # about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure - try: - header = open( filename ).read(8) - if binascii.b2a_hex( header ) == binascii.hexlify( 'NCBI.sra' ): - return True - return False - except: - return False - def set_peek( self, dataset, is_multi_byte=False ): - if not dataset.dataset.purged: - dataset.peek = "Binary sra file" - dataset.blurb = data.nice_size( dataset.get_size() ) - else: - dataset.peek = 'file does not exist' - dataset.blurb = 'file purged from disk' - def display_peek( self, dataset ): - try: - return dataset.peek - except: - return "Binary sra file (%s)" % ( data.nice_size( dataset.get_size() ) ) - -Binary.register_sniffable_binary_format("sra", "sra", Sra)
--- a/sra_fetch.py Tue Nov 27 13:31:09 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -from ftplib import FTP -import sys - -# Get accession number from argument -accession = sys.argv[1] -outfile = sys.argv[2] -prefix = accession[0:3] -middle = accession[3:6] -suffix = accession[6:9] - -# NCBI SRA FTP site -ftp = FTP('ftp-trace.ncbi.nih.gov') - -# Open file and transfer requested SRA as a file -# Try to change the working directory until it works -sra = open(outfile, 'wb') -ftp.login('ftp') -connected = False -while not connected: - try: - ftp.cwd('/sra/sra-instant/reads/ByRun/sra/' + - prefix + '/' + - prefix + middle + '/' + - prefix + middle + suffix + '/') - connected = True - except: - pass - -ftp.retrbinary('RETR ' + prefix + middle + suffix + '.sra', sra.write) -ftp.quit()
--- a/sra_fetch.xml Tue Nov 27 13:31:09 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -<tool id="sra_fetch" name="Fetch SRA" version="1.0.0"> - <description> by accession from NCBI SRA.</description> - <command interpreter="python">sra_fetch.py '$accession' '$output'</command> - <inputs> - <param name="accession" size="13" type="text" value="SRR000001" label="SRA run accession"/> - </inputs> - <outputs> - <data format="sra" name="output" label="Fetch ${accession.value}"/> - </outputs> - <requirements> - <requirement type="python">sra_fetch.py</requirement> - </requirements> - <help> - This tool fetches SRA archives from NCBI over FTP using the python ftplib. - </help> -</tool>