| 0 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 # NCBI SRA Tools | 
|  | 4 # https://galaxyproject.org/tutorials/upload/ | 
|  | 5 | 
|  | 6 import os | 
|  | 7 import optparse | 
|  | 8 from subprocess import Popen, PIPE | 
|  | 9 | 
|  | 10 db_key = "?"; | 
|  | 11 sra_instant_url = "ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/"; | 
|  | 12 | 
|  | 13 def convertSRA(tmp_dir, accession_number, data_format): | 
|  | 14     absolute_tmp_dir = os.path.abspath(tmp_dir); | 
|  | 15     sra_file_path = os.path.join(absolute_tmp_dir, accession_number+".sra"); | 
|  | 16     if os.path.isdir(absolute_tmp_dir) and os.path.exists(sra_file_path): | 
|  | 17         process = None; | 
|  | 18         if data_format == ".fasta.gz": | 
|  | 19             process = Popen(["fastq-dump", "--fasta", "--gzip", sra_file_path, "--outdir", absolute_tmp_dir], stdout=PIPE); | 
|  | 20         elif data_format == ".fastq.gz": | 
|  | 21             process = Popen(["fastq-dump", "--gzip", sra_file_path, "--outdir", absolute_tmp_dir], stdout=PIPE); | 
|  | 22         elif data_format == ".fasta": | 
|  | 23             process = Popen(["fastq-dump", "--fasta", sra_file_path, "--outdir", absolute_tmp_dir], stdout=PIPE); | 
|  | 24         elif data_format == ".fastq": | 
|  | 25             process = Popen(["fastq-dump", sra_file_path, "--outdir", absolute_tmp_dir], stdout=PIPE); | 
|  | 26         else: | 
|  | 27             process = None; | 
|  | 28         if process is not None: | 
|  | 29             (output, err) = process.communicate(); | 
|  | 30             if err: | 
|  | 31                 # kill the process | 
|  | 32                 # kill_process(process.pid); | 
|  | 33                 # remove any trace of the output file | 
|  | 34                 an_file_path = os.path.join(tmp_dir, accession_number+data_format); | 
|  | 35                 if os.path.exists(an_file_path): | 
|  | 36                     os.unlink(an_file_path); | 
|  | 37                 # try to restart the process | 
|  | 38                 return downloadAccessionData(tmp_dir, accession_number, data_format); | 
|  | 39             #exit_code = process.wait(); | 
|  | 40             return os.path.join(tmp_dir, accession_number+data_format); | 
|  | 41     return ""; | 
|  | 42 | 
|  | 43 def downloadAccessionData(accession_number, accession_path, appdata_path, data_format, limit=10): | 
|  | 44     split = accession_number[:6]; | 
|  | 45     srr_path = sra_instant_url+split+"/"+accession_number+"/"+accession_number+".sra"; | 
|  | 46     sra_file_path = os.path.join(appdata_path, accession_number+".sra"); | 
|  | 47     process = Popen(['wget', srr_path, "--output-document="+sra_file_path], stdout=PIPE); | 
|  | 48     (output, err) = process.communicate(); | 
|  | 49     if err: | 
|  | 50         # remove any trace of the output file | 
|  | 51         if os.path.exists(an_file_path): | 
|  | 52             os.unlink(an_file_path); | 
|  | 53         # try to restart the process | 
|  | 54         if limit > 0: | 
|  | 55             return downloadAccessionData(accession_number, accession_path, appdata_path, data_format, limit-1); | 
|  | 56         return -1; | 
|  | 57     if os.path.exists(sra_file_path): | 
|  | 58         converted_file_path = convertSRA(appdata_path, accession_number, data_format); | 
|  | 59         if os.path.exists(converted_file_path): | 
|  | 60             os.rename(converted_file_path, accession_path); | 
|  | 61         os.unlink(sra_file_path); | 
|  | 62     return 0; | 
|  | 63 | 
|  | 64 def process_accessions( options, args ): | 
|  | 65     # create appdata dir if it does not exist | 
|  | 66     appdata_path = options.appdata; | 
|  | 67     if not os.path.exists(appdata_path): | 
|  | 68         os.makedirs(appdata_path); | 
|  | 69     data_format = options.dataformat; | 
|  | 70     ''' | 
|  | 71     # Collection test | 
|  | 72     test_file_name = "Test Collection" + "_" + "SRRtest" + "_" + data_format[1:] + "_" + db_key; | 
|  | 73     test_file_path = os.path.join(appdata_path, test_file_name); | 
|  | 74     file = open(test_file_path, "w"); | 
|  | 75     file.write("Hello World"); | 
|  | 76     file.close(); | 
|  | 77     ''' | 
|  | 78     # read inputs | 
|  | 79     comma_sep_file_paths = options.files; | 
|  | 80     #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths))); | 
|  | 81     # check if options.files contains at least one file path | 
|  | 82     if comma_sep_file_paths is not None: | 
|  | 83         # split file paths | 
|  | 84         file_paths = comma_sep_file_paths.split(","); | 
|  | 85         # split file names | 
|  | 86         comma_sep_file_names = str(options.names); | 
|  | 87         #print("names: "+str(comma_sep_file_names)); | 
|  | 88         file_names = comma_sep_file_names.split(","); | 
|  | 89         # populate a dictionary with the files containing the sequences to query | 
|  | 90         for idx, file_path in enumerate(file_paths): | 
|  | 91             file_name = file_names[idx]; | 
|  | 92             #print(file_name + ": " + file_path); | 
|  | 93             with open(file_path) as accessions: | 
|  | 94                 for line in accessions: | 
|  | 95                     if line.strip() != "" and not line.startswith(">"): | 
|  | 96                         accession_number = line.strip(); | 
|  | 97                         filename_with_collection_prefix = file_name + "_" + accession_number + "_" + data_format[1:] + "_" + db_key; | 
|  | 98                         accession_path = os.path.join(appdata_path, filename_with_collection_prefix) | 
|  | 99                         # download fastq filte related to accession_number | 
|  | 100                         downloadAccessionData( accession_number, accession_path, appdata_path, data_format ); | 
|  | 101     return 0; | 
|  | 102 | 
|  | 103 def __main__(): | 
|  | 104     # Parse the command line options | 
|  | 105     usage = "Usage: retrieve.py --files comma_sep_file_paths --names comma_seq_file_names --format data_format --appdata folder_name"; | 
|  | 106     parser = optparse.OptionParser(usage = usage); | 
|  | 107     parser.add_option("-f", "--files", type="string", | 
|  | 108                     action="store", dest="files", help="comma separated files path"); | 
|  | 109     parser.add_option("-n", "--names", type="string", | 
|  | 110                     action="store", dest="names", help="comma separated names associated to the files specified in --files"); | 
|  | 111     parser.add_option("-e", "--format", type="string", | 
|  | 112                     action="store", dest="dataformat", help="data format"); | 
|  | 113     parser.add_option("-a", "--appdata", type="string", | 
|  | 114                     action="store", dest="appdata", help="appdata folder name"); | 
|  | 115     parser.add_option("-v", "--version", action="store_true", dest="version", | 
|  | 116                     default=False, help="display version and exit"); | 
|  | 117     (options, args) = parser.parse_args(); | 
|  | 118     if options.version: | 
|  | 119         print __version__; | 
|  | 120     else: | 
|  | 121         return process_accessions( options, args ); | 
|  | 122 | 
|  | 123 if __name__ == "__main__": __main__() |