Mercurial > repos > mingchen0919 > rmarkdown_bdss_client
comparison bdss_client_sra.Rmd @ 0:512d008295db draft default tip
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client_main commit d9ab791a7ce12362dc6e28c0a518a3f23dd581fe-dirty
| author | mingchen0919 | 
|---|---|
| date | Tue, 17 Oct 2017 14:09:01 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:512d008295db | 
|---|---|
| 1 --- | |
| 2 title: 'Download and extract single end fastq/fasta data with BDSS client from SRA accessions' | |
| 3 output: | |
| 4 html_document: | |
| 5 number_sections: true | |
| 6 toc: true | |
| 7 theme: cosmo | |
| 8 highlight: tango | |
| 9 --- | |
| 10 | |
| 11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE} | |
| 12 knitr::opts_chunk$set( | |
| 13 echo = ECHO, | |
| 14 error=TRUE | |
| 15 ) | |
| 16 ``` | |
| 17 | |
| 18 # Command line arguments | |
| 19 | |
| 20 ```{r 'command line arguments'} | |
| 21 str(opt) | |
| 22 ``` | |
| 23 | |
| 24 # BDSS configuration file | |
| 25 | |
| 26 First, we create a bdss configuration file `bdss.cfg` in the current directory. | |
| 27 | |
| 28 ```{r} | |
| 29 system('echo "[metadata_repository]" > bdss.cfg') | |
| 30 system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg') | |
| 31 ``` | |
| 32 | |
| 33 # Download and extract reads | |
| 34 | |
| 35 ```{r 'download and extract reads'} | |
| 36 # create two directories, one for single end and the other for paired end SRA reads. | |
| 37 dir.create('se_read_files_directory') | |
| 38 dir.create('pe_read_files_directory') | |
| 39 # download and extract reads (single end) | |
| 40 sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]] | |
| 41 sra_ids_se = sra_ids_se[sra_ids_se != ''] | |
| 42 # loop through SRA accessions to download and extract reads. | |
| 43 for(id in sra_ids_se) { | |
| 44 # build URL from SRA id | |
| 45 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', | |
| 46 substr(id, 1, 3), '/', | |
| 47 substr(id, 1, 6), '/', id, '/', id, '.sra') | |
| 48 # download sra file with bdss | |
| 49 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) | |
| 50 system(bdss_command, intern = TRUE) | |
| 51 # convert .sra to .fastq/.fasta | |
| 52 if('FORMAT' == 'fasta') { | |
| 53 command = paste0('fastq-dump --fasta -O se_read_files_directory ', id, '.sra') | |
| 54 } else { | |
| 55 command = paste0('fastq-dump -O se_read_files_directory ', id, '.sra') | |
| 56 } | |
| 57 cat('----convert SRA to fastq/fasta------\n') | |
| 58 print(system(command, intern = TRUE)) | |
| 59 } | |
| 60 | |
| 61 # download and extract reads (paired end) | |
| 62 sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]] | |
| 63 sra_ids_pe = sra_ids_pe[sra_ids_pe != ''] | |
| 64 # loop through SRA accessions to download and extract reads. | |
| 65 for(id in sra_ids_pe) { | |
| 66 # build URL from SRA id | |
| 67 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', | |
| 68 substr(id, 1, 3), '/', | |
| 69 substr(id, 1, 6), '/', id, '/', id, '.sra') | |
| 70 # download sra file with bdss | |
| 71 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) | |
| 72 system(bdss_command, intern = TRUE) | |
| 73 # convert .sra to .fastq/.fasta | |
| 74 if('FORMAT' == 'fasta') { | |
| 75 command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id, '.sra') | |
| 76 } else { | |
| 77 command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id, '.sra') | |
| 78 } | |
| 79 cat('----convert SRA to fastq/fasta------\n') | |
| 80 command_stdout = system(command, intern = TRUE) | |
| 81 print(command_stdout) | |
| 82 if(!(paste0(id, '_2.FORMAT') %in% list.files('pe_read_files_directory'))) { | |
| 83 # this is not a paired end SRA file. The corresponding file will be deleted. | |
| 84 cat(paste0(id, ' is not paired end SRA, the corresponding fastq/fasta file will deleted.')) | |
| 85 system(paste0('rm pe_read_files_directory/', id, '_1.*'), intern = TRUE) | |
| 86 } | |
| 87 | |
| 88 } | |
| 89 | |
| 90 cat('-----single end files----\n') | |
| 91 list.files('./se_read_files_directory') | |
| 92 cat('-----paired end files----\n') | |
| 93 list.files('./pe_read_files_directory') | |
| 94 | |
| 95 cat('-----Renaming files------\n') | |
| 96 # rename files for paired end reads | |
| 97 old_files = paste0('./pe_read_files_directory/', list.files('./pe_read_files_directory')) | |
| 98 print(old_files) | |
| 99 new_files = gsub('_1', '_forward', old_files) | |
| 100 new_files = gsub('_2', '_reverse', new_files) | |
| 101 print(new_files) | |
| 102 file.rename(old_files, new_files) | |
| 103 ``` | |
| 104 | |
| 105 | 
