view interval/binaverage.xml @ 18:9bbb37e8683f

Uploaded
author xuebing
date Sat, 31 Mar 2012 08:24:32 -0400
parents
children
line wrap: on
line source

<tool id="binaverage" name="bin and average">
  <description>of numeric columns</description>
  <command>cat $script_file | R --vanilla --slave > $out_log </command>
  <inputs>
      <param name="input" type="data" format="tabular" label="Data file"/>
      <param name="data_avg" type="integer" value="1" label="Column number of the data to average"/>
      <param name="label_avg" type="text" value="label-avg" size="30" label="data label"/>    
       <param name="log_avg" label="log2 transform the data" type="boolean" truevalue="logavg" falsevalue="none" checked="False"/> 
       <param name="data_bin" type="integer" value="2" label="Column number of the data used to make bins"/>
      <param name="label_bin" type="text" value="label-bin" size="30" label="data label"/> 
      <param name="log_bin" label="log2 transform the data" type="boolean" truevalue="logbin" falsevalue="none" checked="False"/> 
      <param name="nbin" type="integer" value="3" label="number of bins"/>
      <param name="bintype" type="select" label="Bin by rank or by value" >
		  <option value="rank" selected="true">by rank: bins have the same number of data points</option>
		  <option value="value">by value: bins may have different number of data points</option>
      </param>  
      <param name="legendloc" type="select" label="legend location on CDF plot" >
		  <option value="bottomright" selected="true">bottomright</option>
		  <option value="bottomleft">bottomleft</option>
		  <option value="bottom">bottom</option>
		  <option value="left">left</option>
		  <option value="topleft">topleft</option>
		  <option value="top">top</option>
		  <option value="topright">topright</option>      
		  <option value="right">right</option>
		  <option value="center">center</option>  
      </param>
    
      <param name="title" type="text" value="bin-average" size="50" label="title of this analysis"/>       
         
  </inputs>

  <configfiles>
    <configfile name="script_file">
      ## Setup R error handling to go to stderr
      options(warn=-1)
      source("/Users/xuebing/galaxy-dist/tools/mytools/cdf.r")
      x = read.table("${input}",sep='\t')
      x = x[,c($data_bin,$data_avg)]
      label_avg = "${label_avg}"
      label_bin = "${label_bin}"
      if ("${log_bin}" == "logbin"){
          x[,1] = log2(1+x[,1])
          label_bin = paste('log2',label_bin)
      }
      if ("${log_avg}" == "logavg"){
          x[,2] = log2(1+x[,2])
          label_avg = paste('log2',label_avg)
      }
      res = binaverage(x,$nbin,"${bintype}")
      attach(res)
      for (i in 1:${nbin}){
          print(paste(label_bin,labels[i],sep=':'))
          print(summary(binned[[i]]))
      }      
      pdf("${out_file}")
      mycdf(binned,"${title}",labels,"$legendloc",label_avg,label_bin)
      dev.off() 
    </configfile>
  </configfiles>

  <outputs>
    <data format="txt" name="out_log" label="${title}: (log)" />
    <data format="pdf" name="out_file" label="${title}: (plot)" />
  </outputs>

<help>

.. class:: infomark

This tool generates barplot and CDF plot comparing data/rows in a numeric column that are binned by a second numeric column. The input should have at least two numeric columns. One of the column is used to group rows into bins, and then values in the other column are compared using barplot, CDF plot, and KS test.  

</help>
</tool>