# HG changeset patch # User matt-shirley # Date 1354041783 18000 # Node ID 75d914fa5711fb08e3e34d38a59f6898aee2b012 # Parent cdcc400dcafc3f396fda1078424f22f25f3dd55a Moving repository to testtoolshed for now. diff -r cdcc400dcafc -r 75d914fa5711 datatypes_conf.xml --- a/datatypes_conf.xml Tue Nov 27 13:31:09 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff -r cdcc400dcafc -r 75d914fa5711 fastq_dump.xml --- a/fastq_dump.xml Tue Nov 27 13:31:09 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ - - format reads from NCBI SRA. - ./fastq-dump --log-level fatal --report never --accession '${input.name}' --stdout $split $aligned '$input' > $output - fastq-dump --version - - - - - - - - - - - - - - - - - - - - - - fastq-dump - - - This tool extracts fastqsanger reads from SRA archives using fastq-dump. The fastq-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. - - diff -r cdcc400dcafc -r 75d914fa5711 sam_dump.xml --- a/sam_dump.xml Tue Nov 27 13:31:09 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ - - format reads from NCBI SRA. - sam-dump $header $aligned $primary '$input' > $output - sam-dump --version - - - - - - - - - - - - - - - - - - - - - - - sam-dump - - - This tool extracts SAM format reads from SRA archives using sam-dump. The sam-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. -Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. - - diff -r cdcc400dcafc -r 75d914fa5711 sra.py --- a/sra.py Tue Nov 27 13:31:09 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ -""" -Sra class -""" - -import galaxy.datatypes.binary -from galaxy.datatypes.binary import Binary -import data, logging, binascii -from galaxy.datatypes.metadata import MetadataElement -from galaxy.datatypes import metadata -from galaxy.datatypes.sniff import * -from galaxy import eggs -import pkg_resources -pkg_resources.require( "bx-python" ) -import os, subprocess, tempfile -import struct - -class Sra( Binary ): - """ Sequence Read Archive (SRA) """ - file_ext = "sra" - - def __init__( self, **kwd ): - Binary.__init__( self, **kwd ) - def sniff( self, filename ): - # The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ. For details - # about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure - try: - header = open( filename ).read(8) - if binascii.b2a_hex( header ) == binascii.hexlify( 'NCBI.sra' ): - return True - return False - except: - return False - def set_peek( self, dataset, is_multi_byte=False ): - if not dataset.dataset.purged: - dataset.peek = "Binary sra file" - dataset.blurb = data.nice_size( dataset.get_size() ) - else: - dataset.peek = 'file does not exist' - dataset.blurb = 'file purged from disk' - def display_peek( self, dataset ): - try: - return dataset.peek - except: - return "Binary sra file (%s)" % ( data.nice_size( dataset.get_size() ) ) - -Binary.register_sniffable_binary_format("sra", "sra", Sra) diff -r cdcc400dcafc -r 75d914fa5711 sra_fetch.py --- a/sra_fetch.py Tue Nov 27 13:31:09 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -from ftplib import FTP -import sys - -# Get accession number from argument -accession = sys.argv[1] -outfile = sys.argv[2] -prefix = accession[0:3] -middle = accession[3:6] -suffix = accession[6:9] - -# NCBI SRA FTP site -ftp = FTP('ftp-trace.ncbi.nih.gov') - -# Open file and transfer requested SRA as a file -# Try to change the working directory until it works -sra = open(outfile, 'wb') -ftp.login('ftp') -connected = False -while not connected: - try: - ftp.cwd('/sra/sra-instant/reads/ByRun/sra/' + - prefix + '/' + - prefix + middle + '/' + - prefix + middle + suffix + '/') - connected = True - except: - pass - -ftp.retrbinary('RETR ' + prefix + middle + suffix + '.sra', sra.write) -ftp.quit() diff -r cdcc400dcafc -r 75d914fa5711 sra_fetch.xml --- a/sra_fetch.xml Tue Nov 27 13:31:09 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ - - by accession from NCBI SRA. - sra_fetch.py '$accession' '$output' - - - - - - - - sra_fetch.py - - - This tool fetches SRA archives from NCBI over FTP using the python ftplib. - -