Mercurial > repos > xuebing > sharplab_interval_analysis
view align2multiple.xml @ 22:869c7664e584
Uploaded
author | xuebing |
---|---|
date | Sat, 31 Mar 2012 11:52:14 -0400 |
parents | 16ba480adf96 |
children |
line wrap: on
line source
<tool id="align2multiple" name="align-to-multiple"> <description>features</description> <command>cat $script_file | R --vanilla --slave > $logfile </command> <inputs> <param name="query" type="data" format="interval" label="Query intervals" help= "keep it small (less than 1,000,000 lines)"/> <param name="label" type="text" value="" size="30" label="Data Label"/> <param name="windowsize" size="10" type="integer" value="5000" label="radius of the window" help="will create new intervals of w bp flanking the original center. set to 0 will not change input interval size)"/> <param name="nbins" size="10" type="integer" value="20" label="Number of bins dividing the window"/> <param name="sort" label="Sort intervals" help="Sort by the center of the first input, then the second input, then third..." type="boolean" truevalue="sort" falsevalue="none" checked="True"/> <repeat name="series" title="input file"> <param name="label" type="text" value="" size="30" label="Data Label"/> <param name="input" type="data" format="interval" label="Dataset"/> </repeat> </inputs> <configfiles> <configfile name="script_file"> ## Setup R error handling to go to stderr cat('\n[',date(),'] Start running job\n') options(warn=-1) windowsize = as.integer("$windowsize") labels = '$label' ## align query to itself cmd = 'python /Users/xuebing/galaxy-dist/tools/mytools/alignr.py -a $query -b $query -o $label-$label --profile-only -q -w $windowsize -n $nbins' cat('\n[',date(),'] ',cmd,'\n') system(cmd) ## align other sets to query #for $i,$s in enumerate( $series ) labels = c(labels,'$s.label.value') cmd = 'python /Users/xuebing/galaxy-dist/tools/mytools/alignr.py -a $s.input.file_name -b $query -o $label-$s.label.value --profile-only -q -w $windowsize -n $nbins' cat('\n[',date(),'] ',cmd,'\n') system(cmd) #end for cat('\n[',date(),'] Read output\n') ## read output of query2query print(paste(labels[1],labels[1],sep='-')) x = read.table(paste(labels[1],labels[1],sep='-')) ids = as.character(x[,1]) nfeat = nrow(x) x = as.matrix(x[,3:ncol(x)]) nbin = ncol(x) ## a table mapping id to position ind = list() for (i in 1:nfeat){ ind[[ids[i]]] = i } ## read other output files for (i in 2:length(labels)){ print(paste(labels[1],labels[i],sep='-')) x0 = read.table(paste(labels[1],labels[i],sep='-')) ids0 = as.character(x0[,1]) x0 = as.matrix(x0[,3:ncol(x0)]) x1 = matrix(0,nfeat,nbin) for (j in 1:nrow(x0)){ #cat(j,'\t',ids0[j],'\t',ind[[ids0[j]]],'\n') x1[ind[[ids0[j]]],] = x0[j,] } x = cbind(x,x1) } ## reorder if ("${sort}" == "sort"){ cat('\n[',date(),'] Sort intervals\n') for (i in rev(2:length(labels))){ x = x[order(x[,i*nbin-nbin/2]>0),] } } png("${out_file1}") ##par(mfrow=c(2,length(labels)),mar=c(1,1,4,1)) layout(matrix(seq(2*length(labels)),nrow=2,byrow=T),heights=c(1,5)) cat('\n[',date(),'] Plot summary\n') par(mar=c(0,0,4,0)+0.1) for (i in 1:length(labels)){ plot(colSums(x[,((i-1)*nbin+1):(i*nbin)]),type='l',axes=F,main=labels[i]) } cat('\n[',date(),'] Plot heatmap\n') par(mar=c(0,0,0,0)+0.1) for (i in 1:length(labels)){ image(-t(log2(1+x[,((i-1)*nbin+1):(i*nbin)])),axes=F) } dev.off() cat('\n[',date(),'] Finished\n') </configfile> </configfiles> <outputs> <data format="txt" name="logfile" label="${tool.name} on ${on_string}: (log)" /> <data format="png" name="out_file1" label="${tool.name} on ${on_string}: (plot)" /> </outputs> <help> .. class:: infomark This tool allows you to check the co-localization pattern of multiple interval sets. All interval sets are aligned to the center of the intervals in the query interval set. Each row represents a window of certain size around the center of one interval in the query set, such as ChIP peaks. Each heatmap shows the position of other features in the SAME window (the same rows in each heatmap represent the same interval/genomic position). The example below shows that of all Fox2 peaks, half of them are within 1kb of TSS. Of the half outside TSS, about one half has H3K4me1, two thirds of which are further depleted of H3K4me3. ----- **Example** .. image:: ./static/images/align2multiple.png </help> </tool>