comparison mosaics/R/generateWig.R @ 10:d78c3c5e8ff8 draft

Uploaded
author dongjun
date Thu, 10 Jan 2013 16:01:28 -0500
parents b6d0c6ceda2c
children
comparison
equal deleted inserted replaced
9:e41be28411b2 10:d78c3c5e8ff8
1
2 # read alignment files and construct bin-level files
3
4 generateWig <- function( infile=NULL, fileFormat=NULL, outfileLoc="./",
5 byChr=FALSE, useChrfile=FALSE, chrfile=NULL, excludeChr=NULL,
6 PET=FALSE, fragLen=200, span=200, capping=0, normConst=1, perl = "perl" )
7 {
8 # preprocessing perl script embedded in "mosaics/inst/Perl/"
9
10 if ( PET == TRUE ) {
11 script <- "readfile2wig_PET.pl"
12 allFormat <- c( "eland_result", "sam" )
13 allFormatName <- c( "Eland result", "SAM" )
14 } else {
15 script <- "readfile2wig_SET.pl"
16 allFormat <- c( "eland_result", "eland_extended", "eland_export",
17 "bowtie", "sam", "bed", "csem" )
18 allFormatName <- c( "Eland result", "Eland extended", "Eland export",
19 "Bowtie default", "SAM", "BED", "CSEM" )
20 }
21
22 # Check whether perl exists
23
24 CMD <- paste( perl, "-v" )
25 res <- system( CMD, intern = TRUE, ignore.stderr = TRUE )
26
27 if ( length(res) == 0 ) {
28 # cannot proceed if perl does not exist
29
30 stop( "Perl is not found on your system! Either check $PATH if installed or please install Perl." )
31 } else {
32 # process read files into bin-level files if perl exists
33
34 # check whether minimal options are missing
35
36 if ( length(infile) != 1 || is.null(infile) )
37 {
38 stop( "Please specify the name of the aligned read file!" )
39 }
40
41 if ( length(fileFormat) != 1 || is.null(fileFormat) )
42 {
43 stop( "Please specify aligned read file format! Read '?generateWig' for supported file formats" )
44 }
45
46 # check file format specification
47
48 if ( length(which(!is.na(match( fileFormat, allFormat )))) == 0 )
49 {
50 stop( "Unsupported aligned read file format! Read '?generateWig' for supported file formats" )
51 }
52
53 # if useChrfile is TRUE & excludeChr is NOT null, then ignore excludeChr
54
55 if ( useChrfile & !is.null(excludeChr) ) {
56 message( "User set 'useChrfile' as TRUE and also provided 'excludeChr'." )
57 message( "'excludeChr' argument will be ignored." )
58 excludeChr <- NULL
59 }
60
61 # print out processing settings:
62 # by default, set fragment length = 200, bin size = fragment length, capping = 0.
63
64 fileFormatName <- allFormatName[ match( fileFormat, allFormat ) ]
65
66 cat( "------------------------------------------------------------\n" )
67 cat( "Info: setting summary\n" )
68 cat( "------------------------------------------------------------\n" )
69 cat( "Name of aligned read file:", infile, "\n" )
70 cat( "Aligned read file format:", fileFormatName, "\n" )
71 cat( "Directory of processed wig files:", outfileLoc, "\n" )
72 cat( "span of the wig files:", span, "\n" )
73 cat( "Normalizing constant:", normConst, "\n" )
74 cat( "Construct wig files by chromosome?", ifelse(byChr,"Y","N"), "\n" )
75 cat( "Is file for chromosome info provided?", ifelse(useChrfile,"Y","N"), "\n" )
76 if ( useChrfile == TRUE ) {
77 cat( "Name of file for chromosome info: ", chrfile, "\n" )
78 }
79 if ( !is.null(excludeChr) ) {
80 cat( "List of chromosomes to be excluded:", paste(excludeChr,collapse=", "), "\n" )
81 }
82 if ( PET == FALSE ) {
83 cat( "Data type: Single-end tag (SET)\n" )
84 cat( "Average fragment length:", fragLen, "\n" )
85 } else {
86 cat( "Data type: Paired-end tag (PET)\n" )
87 }
88 if ( capping > 0 ) {
89 cat( "Maximum number of reads allowed in each nucleotide:", capping, "\n" )
90 }
91 cat( "------------------------------------------------------------\n" )
92
93
94 # get path to the perl code (unified script for all file formats)
95
96 Fn.Path <- system.file( file.path("Perl",script), package="mosaics")
97
98
99 # process read file to bin-level files using perl codes
100
101 message( "Info: reading the aligned read file and processing it into bin-level files..." )
102
103 if ( capping <= 0 ) {
104 capping <- 0
105 }
106 if ( is.null(excludeChr) ) {
107 excludeChrVec <- ""
108 } else {
109 excludeChrVec <- paste( excludeChr, collapse=" " )
110 }
111
112 if ( PET == TRUE ) {
113 CMD <- paste( perl,
114 " ", Fn.Path,
115 " ", infile,
116 " ", outfileLoc,
117 " ", fileFormat,
118 " ", span,
119 " ", normConst,
120 " ", capping,
121 " ", ifelse(byChr,"Y","N"),
122 " ", ifelse(useChrfile,"Y","N"),
123 " ", ifelse(!is.null(chrfile),chrfile,"-"),
124 " ", paste(excludeChrVec,collpase=" "), sep="" )
125 } else {
126 CMD <- paste( perl,
127 " ", Fn.Path,
128 " ", infile,
129 " ", outfileLoc,
130 " ", fileFormat,
131 " ", span,
132 " ", normConst,
133 " ", fragLen,
134 " ", capping,
135 " ", ifelse(byChr,"Y","N"),
136 " ", ifelse(useChrfile,"Y","N"),
137 " ", ifelse(!is.null(chrfile),chrfile,"-"),
138 " ", paste(excludeChrVec,collpase=" "), sep="" )
139 }
140
141 res <- system( CMD, intern = TRUE )
142
143 message( "Info: done!" )
144
145
146 # print out processing results
147
148 infilename <- basename( infile )
149 # extract only filename from infile
150
151 if ( PET == TRUE ) {
152 if ( byChr ) {
153 outfileName <- list.files( path=outfileLoc,
154 pattern=paste(infilename,"_span",span,"_.*.wig",sep="") )
155 } else {
156 outfileName <- paste(infilename,"_span",span,".wig",sep="")
157 }
158 } else {
159 if ( byChr ) {
160 outfileName <- list.files( path=outfileLoc,
161 pattern=paste(infilename,"_fragL",fragLen,"_span",span,"_.*.wig",sep="") )
162 } else {
163 outfileName <- paste(infilename,"_fragL",fragLen,"_span",span,".wig",sep="")
164 }
165 }
166
167 cat( "------------------------------------------------------------\n" )
168 cat( "Info: processing summary\n" )
169 cat( "------------------------------------------------------------\n" )
170 cat( "Directory of processed wig files:", outfileLoc, "\n" )
171 if ( byChr ) {
172 cat( "List of processed wig files:\n" )
173 for ( i in 1:length(outfileName) ) {
174 cat( "- ",outfileName[i],"\n", sep="" )
175 }
176 } else {
177 cat( "Processed wig file: ",outfileName,"\n", sep="" )
178 }
179 cat( "------------------------------------------------------------\n" )
180 }
181 }