Mercurial > repos > mingchen0919 > rmarkdown_bdss_client
comparison bdss_client_sra.Rmd @ 0:512d008295db draft default tip
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client_main commit d9ab791a7ce12362dc6e28c0a518a3f23dd581fe-dirty
author | mingchen0919 |
---|---|
date | Tue, 17 Oct 2017 14:09:01 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:512d008295db |
---|---|
1 --- | |
2 title: 'Download and extract single end fastq/fasta data with BDSS client from SRA accessions' | |
3 output: | |
4 html_document: | |
5 number_sections: true | |
6 toc: true | |
7 theme: cosmo | |
8 highlight: tango | |
9 --- | |
10 | |
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE} | |
12 knitr::opts_chunk$set( | |
13 echo = ECHO, | |
14 error=TRUE | |
15 ) | |
16 ``` | |
17 | |
18 # Command line arguments | |
19 | |
20 ```{r 'command line arguments'} | |
21 str(opt) | |
22 ``` | |
23 | |
24 # BDSS configuration file | |
25 | |
26 First, we create a bdss configuration file `bdss.cfg` in the current directory. | |
27 | |
28 ```{r} | |
29 system('echo "[metadata_repository]" > bdss.cfg') | |
30 system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg') | |
31 ``` | |
32 | |
33 # Download and extract reads | |
34 | |
35 ```{r 'download and extract reads'} | |
36 # create two directories, one for single end and the other for paired end SRA reads. | |
37 dir.create('se_read_files_directory') | |
38 dir.create('pe_read_files_directory') | |
39 # download and extract reads (single end) | |
40 sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]] | |
41 sra_ids_se = sra_ids_se[sra_ids_se != ''] | |
42 # loop through SRA accessions to download and extract reads. | |
43 for(id in sra_ids_se) { | |
44 # build URL from SRA id | |
45 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', | |
46 substr(id, 1, 3), '/', | |
47 substr(id, 1, 6), '/', id, '/', id, '.sra') | |
48 # download sra file with bdss | |
49 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) | |
50 system(bdss_command, intern = TRUE) | |
51 # convert .sra to .fastq/.fasta | |
52 if('FORMAT' == 'fasta') { | |
53 command = paste0('fastq-dump --fasta -O se_read_files_directory ', id, '.sra') | |
54 } else { | |
55 command = paste0('fastq-dump -O se_read_files_directory ', id, '.sra') | |
56 } | |
57 cat('----convert SRA to fastq/fasta------\n') | |
58 print(system(command, intern = TRUE)) | |
59 } | |
60 | |
61 # download and extract reads (paired end) | |
62 sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]] | |
63 sra_ids_pe = sra_ids_pe[sra_ids_pe != ''] | |
64 # loop through SRA accessions to download and extract reads. | |
65 for(id in sra_ids_pe) { | |
66 # build URL from SRA id | |
67 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', | |
68 substr(id, 1, 3), '/', | |
69 substr(id, 1, 6), '/', id, '/', id, '.sra') | |
70 # download sra file with bdss | |
71 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) | |
72 system(bdss_command, intern = TRUE) | |
73 # convert .sra to .fastq/.fasta | |
74 if('FORMAT' == 'fasta') { | |
75 command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id, '.sra') | |
76 } else { | |
77 command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id, '.sra') | |
78 } | |
79 cat('----convert SRA to fastq/fasta------\n') | |
80 command_stdout = system(command, intern = TRUE) | |
81 print(command_stdout) | |
82 if(!(paste0(id, '_2.FORMAT') %in% list.files('pe_read_files_directory'))) { | |
83 # this is not a paired end SRA file. The corresponding file will be deleted. | |
84 cat(paste0(id, ' is not paired end SRA, the corresponding fastq/fasta file will deleted.')) | |
85 system(paste0('rm pe_read_files_directory/', id, '_1.*'), intern = TRUE) | |
86 } | |
87 | |
88 } | |
89 | |
90 cat('-----single end files----\n') | |
91 list.files('./se_read_files_directory') | |
92 cat('-----paired end files----\n') | |
93 list.files('./pe_read_files_directory') | |
94 | |
95 cat('-----Renaming files------\n') | |
96 # rename files for paired end reads | |
97 old_files = paste0('./pe_read_files_directory/', list.files('./pe_read_files_directory')) | |
98 print(old_files) | |
99 new_files = gsub('_1', '_forward', old_files) | |
100 new_files = gsub('_2', '_reverse', new_files) | |
101 print(new_files) | |
102 file.rename(old_files, new_files) | |
103 ``` | |
104 | |
105 |