diff align2multiple.xml @ 11:b7f1d9f8f3bc

Uploaded
author xuebing
date Sat, 10 Mar 2012 07:59:27 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/align2multiple.xml	Sat Mar 10 07:59:27 2012 -0500
@@ -0,0 +1,109 @@
+<tool id="align2multiple" name="align-to-multiple">
+  <description>features</description>
+  <command>cat $script_file | R --vanilla --slave > $logfile </command>
+  <inputs>   
+      <param name="query" type="data" format="interval" label="Query intervals" help= "keep it small (less than 1,000,000 lines)"/>
+      <param name="label" type="text" value="" size="30" label="Data Label"/>
+    <param name="windowsize" size="10" type="integer" value="5000" label="radius of the window"  help="will create new intervals of w bp flanking the original center. set to 0 will not change input interval size)"/>
+    <param name="nbins" size="10" type="integer" value="20" label="Number of bins dividing the window"/>
+    <param name="sort" label="Sort intervals" help="Sort by the center of the first input, then the second input, then third..." type="boolean" truevalue="sort" falsevalue="none" checked="True"/>
+    <repeat name="series" title="input file">
+      <param name="label" type="text" value="" size="30" label="Data Label"/>
+      <param name="input" type="data" format="interval" label="Dataset"/>
+    </repeat>       
+  </inputs>
+
+  <configfiles>
+    <configfile name="script_file">
+      ## Setup R error handling to go to stderr
+      cat('\n[',date(),'] Start running job\n')
+      options(warn=-1)
+      windowsize = as.integer("$windowsize")
+      labels = '$label'
+      ## align query to itself
+      cmd = 'python /Users/xuebing/galaxy-dist/tools/mytools/alignr.py -a $query -b $query -o $label-$label --profile-only -q -w $windowsize -n $nbins'
+      cat('\n[',date(),'] ',cmd,'\n')
+      system(cmd)
+      ## align other sets to query
+      #for $i,$s in enumerate( $series )
+        labels = c(labels,'$s.label.value')
+        cmd = 'python /Users/xuebing/galaxy-dist/tools/mytools/alignr.py -a $s.input.file_name -b $query -o $label-$s.label.value --profile-only -q -w $windowsize -n $nbins'
+        cat('\n[',date(),'] ',cmd,'\n')
+        system(cmd)
+      #end for
+      cat('\n[',date(),'] Read output\n')
+      ## read output of query2query
+      print(paste(labels[1],labels[1],sep='-'))
+      x = read.table(paste(labels[1],labels[1],sep='-'))
+      ids = as.character(x[,1])
+      nfeat = nrow(x)
+      x = as.matrix(x[,3:ncol(x)])
+      nbin = ncol(x)
+            
+      ## a table mapping id to position
+      ind = list()
+      for (i in 1:nfeat){
+          ind[[ids[i]]] = i
+      }
+      ## read other output files
+      for (i in 2:length(labels)){
+          print(paste(labels[1],labels[i],sep='-'))
+          x0 = read.table(paste(labels[1],labels[i],sep='-'))
+          ids0 = as.character(x0[,1])
+          x0 = as.matrix(x0[,3:ncol(x0)])
+          x1 = matrix(0,nfeat,nbin)
+          for (j in 1:nrow(x0)){
+              #cat(j,'\t',ids0[j],'\t',ind[[ids0[j]]],'\n')
+              x1[ind[[ids0[j]]],] = x0[j,]                    
+          }
+          x = cbind(x,x1)          
+      }  
+      ## reorder
+      if ("${sort}" == "sort"){
+          cat('\n[',date(),'] Sort intervals\n')
+          for (i in rev(2:length(labels))){
+              x = x[order(x[,i*nbin-nbin/2]>0),]
+          }
+      }
+      png("${out_file1}")
+      ##par(mfrow=c(2,length(labels)),mar=c(1,1,4,1))
+      layout(matrix(seq(2*length(labels)),nrow=2,byrow=T),heights=c(1,5))
+      cat('\n[',date(),'] Plot summary\n')
+      par(mar=c(0,0,4,0)+0.1)
+      for (i in 1:length(labels)){
+          plot(colSums(x[,((i-1)*nbin+1):(i*nbin)]),type='l',axes=F,main=labels[i])
+      }
+      cat('\n[',date(),'] Plot heatmap\n')
+      par(mar=c(0,0,0,0)+0.1)
+      for (i in 1:length(labels)){
+          image(-t(log2(1+x[,((i-1)*nbin+1):(i*nbin)])),axes=F)
+      }
+      dev.off()  
+      cat('\n[',date(),'] Finished\n')
+
+    </configfile>
+  </configfiles>
+
+  <outputs>
+    <data format="txt" name="logfile" label="${tool.name} on ${on_string}: (log)" />
+    <data format="png" name="out_file1" label="${tool.name} on ${on_string}: (plot)" />
+  </outputs>
+
+<help>
+.. class:: infomark
+
+This tool allows you to check the co-localization pattern of multiple interval sets. All interval sets are aligned to the center of the intervals in the query interval set.
+
+Each row represents a window of certain size around the center of one interval in the query set, such as ChIP peaks. Each heatmap shows the position of other features in the SAME window (the same rows in each heatmap represent the same interval/genomic position).
+
+
+The example below shows that of all Fox2 peaks, half of them are within 1kb of TSS. Of the half outside TSS, about one half has H3K4me1, two thirds of which are further depleted of H3K4me3.  
+
+-----
+
+**Example**
+
+.. image:: ./static/images/align2multiple.png
+
+</help>
+</tool>