diff galaxy/wrappers/VolcanoPlotTool.xml @ 0:c9a38c1eadf1 draft

"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
author vandelj
date Fri, 26 Jun 2020 09:45:41 -0400
parents
children 75505421bcf3
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy/wrappers/VolcanoPlotTool.xml	Fri Jun 26 09:45:41 2020 -0400
@@ -0,0 +1,417 @@
+<tool name="GIANT-Plot volcanos" id="giant_volcano_plot" version="0.3.3">
+  <description>Plot volcano from tabular file</description>
+  <requirements>
+    <requirement type="package" version="1.7.1">r-r.methodss3</requirement>
+    <requirement type="package" version="2.36.1">bioconductor-biomart</requirement>
+    <requirement type="package" version="3.0.0">r-ggplot2</requirement>
+    <requirement type="package" version="4.8.0">r-plotly</requirement>
+    <requirement type="package" version="1.3.1">r-stringr</requirement>
+  </requirements>
+  <code file="../../src/General_functions.py"/>
+  <stdio>
+    <regex match="Execution halted"
+           source="both"
+           level="fatal"
+           description="Execution halted, please contact tool developer or administrators." />
+    <regex match="Error in"
+           source="both"
+           level="fatal"
+           description="An error occured during R execution, please contact tool developer." />
+    <exit_code range="15" level="fatal" description="Error during input file formatting step, see log file for more information." />
+    <exit_code range="10" level="fatal" description="Missing file during html report, see log file for more information." />
+    <exit_code range="1:9" level="fatal" description="Error in R execution, see log file for more information." />
+  </stdio>
+  <command>	<![CDATA[
+    #import imp
+    #set $runIdentifier=str($outputData).rsplit("/")[-1].rsplit(".")[0]
+    #set $general_functions=$imp.load_source('General_functions', $__tool_directory__+'/../../src/General_functions.py')
+    #set $ret_code=$general_functions.selectSubSetTable($inputSection['inputData'].file_name,$inputSection['headerNumber'].value,[0],$volcanoSection['volcanoList'],$__new_file_path__+'/'+$__user_id__+'_'+$runIdentifier+'_statisticsFormated.csv',$log.file_name)
+
+    if [ $ret_code != 0 ]; then
+      printf "[ERROR]Error during formated statistics file generation\n" >> $log;
+      exit $ret_code;
+    fi; 
+
+    mv ${__new_file_path__}/${__user_id__}_${runIdentifier}_statisticsFormated.csv ./statisticsFormated.csv;
+
+    #for $i, $s in enumerate( $volcanoSection.volcanoList )
+    #if $s.volcanoName!='':
+      printf "\$(($i+1))\t${s.volcanoName}\n" >> ./correspondanceVolcanoNames.csv;
+    #else:
+      printf "\$(($i+1))\t\$(($i+1))_${s.pvalColumn}\n" >> ./correspondanceVolcanoNames.csv;
+    #end if
+    #end for
+
+
+    Rscript '$__tool_directory__/../../src/VolcanoPlotsScript.R' -i 'statisticsFormated.csv' -l '$log' -o '$outputData' -f 'pdf'
+      #for $i, $s in enumerate( $volcanoSection.volcanoList )
+        -n '${s.volcanoName}'
+        -p '${s.pvalColumn}'
+        -c '${s.fcColumn}'
+        -m '${s.fdrColumn}'
+      #end for
+        -d '$plotSection.fcKind'
+        -s $plotSection.FDRthreshold
+        -e $plotSection.FCthreshold
+      #if $plotSection.geneInformation.addGeneInfo:
+        -x '$plotSection.geneInformation.organismID'
+        -y '$plotSection.geneInformation.infoInRowType'
+      #end if
+    ;
+     ret_code=\$?;
+     if [ \$ret_code != 0 ]; then
+      exit \$ret_code;
+     else
+      bash $scriptTransfer;
+      ret_code=\$?;
+      if [ \$ret_code != 0 ]; then
+        exit \$ret_code;
+      fi 
+     fi;
+
+  printf "[INFO]End of tool script" >> $log; 
+	]]>
+  </command>
+
+
+  <configfiles>
+    <configfile name="scriptTableToHtml">
+<![CDATA[
+printf  "<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\">
+<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css\">
+<script type=\"text/javascript\" language=\"javascript\" src=\"https://code.jquery.com/jquery-1.12.4.js\">
+</script>
+<script type=\"text/javascript\" language=\"javascript\" src=\"https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js\">
+</script>
+<script type=\"text/javascript\" class=\"init\">
+\\$(document).ready(function() {
+  \\$(\'\#example\').DataTable( {
+        \"columnDefs\": [ {
+            \"visible\": false,
+            \"targets\": -1
+        } ]
+    } );
+} );
+</script>
+</head>
+<body style=\"background-color:white;\">
+<table id=\"example\" class=\"display\" cellspacing=\"0\">
+" > ${html_file.extra_files_path}/output.html
+
+printf "<colgroup>\n" >> ${html_file.extra_files_path}/output.html
+printf "<col span=\"2\" style=\"background-color:rgb(224,235,235)\">\n" >> ${html_file.extra_files_path}/output.html
+awk 'BEGIN{odd=1;FS="\t"} NR==1{for(i=3;i<=NF;i=i+4){if(odd==1){odd=0;printf "<col span=\"4\" style=\"background-color:rgb(224,238,255)\">\n"}else{odd=1;printf "<col span=\"4\" style=\"background-color:rgb(255,221,224)\">\n"}}}' $outputData >> ${html_file.extra_files_path}/output.html
+printf "</colgroup>\n" >> ${html_file.extra_files_path}/output.html
+
+
+printf "<thead>\n<tr>\n" >> ${html_file.extra_files_path}/output.html
+printf "<th rowspan=\"2\">Gene</th>\n" >> ${html_file.extra_files_path}/output.html
+printf "<th rowspan=\"2\">Info</th>\n" >> ${html_file.extra_files_path}/output.html
+awk 'BEGIN{FS="\t"} NR==1{for(i=3;i<=NF;i=i+4)printf "<th colspan=\"4\">"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/output.html
+printf "<th></th>\n" >> ${html_file.extra_files_path}/output.html
+printf "</tr>\n<tr>\n" >> ${html_file.extra_files_path}/output.html
+awk 'BEGIN{FS="\t"} NR==2{for(i=3;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/output.html
+printf "<th></th>\n" >> ${html_file.extra_files_path}/output.html
+printf "</tr>\n</thead>\n" >> ${html_file.extra_files_path}/output.html
+
+printf "<tfoot>\n<tr>\n" >> ${html_file.extra_files_path}/output.html
+awk 'BEGIN{FS="\t"} NR==2{for(i=1;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/output.html
+printf "<th></th>\n" >> ${html_file.extra_files_path}/output.html
+printf "</tr>\n</tfoot>\n">> ${html_file.extra_files_path}/output.html
+
+printf "<tbody>\n" >> ${html_file.extra_files_path}/output.html
+awk 'BEGIN{FS="\t"} NR>2{printf "<tr>\n";for(i=1;i<=NF;i++){printf "<th>"\$i"</th>\n"};printf "<th></th>\n";printf "</tr>\n"}' $outputData >> ${html_file.extra_files_path}/output.html
+printf "</tbody>\n" >> ${html_file.extra_files_path}/output.html
+
+printf "</table>
+</body>
+</html>" >> ${html_file.extra_files_path}/output.html
+
+]]>
+    </configfile>
+
+    <configfile name="scriptTransfer">
+<![CDATA[
+
+
+##for output table
+
+mkdir -p $html_file.extra_files_path
+
+##create HTML file for limma output table
+source $scriptTableToHtml
+
+##check output.html is here
+if ! [ -e ${html_file.extra_files_path}/output.html ]; then
+  printf "[ERROR]output.html is missing" >> $log; 
+  exit 10
+fi
+
+##create header of main HTML file
+printf  "<!DOCTYPE html>\n<html>\n<body>"  > $html_file
+
+##first add reference of the output table
+printf "<h3>Statistics (p.val, adjusted p.val, FC, log2FC)</h3>\n" >> $html_file
+printf "<a href=\"output.html\">Access to statistics</a>\n"  >> $html_file
+
+
+
+printf "<h3>Volcanos</h3>\n" >> $html_file
+
+##create folders in media
+counter=1
+for volcano in \$(ls ./plotLyDir/Volcanos_*html)
+do
+volcanoShort=\${volcano%\.*}
+volcanoShort=\${volcanoShort\#\#*/}
+
+conditionName=\${volcano%\.*}
+conditionName=\${conditionName\#\#*Volcanos_}
+
+##modify HTML to point to plotLy folder
+sed -i "s/\${volcanoShort}_files/PlotLy_Volcano_scripts/g" \$volcano
+
+##copy HTML files in both folders
+cp \$volcano ${html_file.extra_files_path}/Volcano_\$conditionName.html
+
+##get user name of Volcano
+echo \$conditionName > ./temporaryConditionName
+conditionFormatedName=\$(awk 'BEGIN{FS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2{print dico[\$1]}' ./correspondanceVolcanoNames.csv ./temporaryConditionName)
+
+##add HTML link
+printf "<p>\n<a href=\"Volcano_\$conditionName.html\">Volcano \$conditionFormatedName</a>\n</p>\n"  >> $html_file
+
+if [ \$counter = 1 ]; then
+
+#if $plotSection.imagePlotlyFormat=="svg":
+##before copying scripts folder modify them to replace png snapshot with svg (not proud of solution but seems to work)
+cd ./plotLyDir/\${volcanoShort}_files/plotly-main-*/
+awk '{gsub("\"png\"","\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
+awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js
+rm ./plotly-latest.minTemp.js
+cd ../../../
+#end if
+
+##now copy only scripts folder for the first volcano and rename
+cp -r ./plotLyDir/\${volcanoShort}_files $html_file.extra_files_path
+mv ${html_file.extra_files_path}/\${volcanoShort}_files ${html_file.extra_files_path}/PlotLy_Volcano_scripts
+fi
+
+((counter++))
+done
+
+if [ \$counter = 1 ]; then
+  printf "[ERROR]Volcano plots are missing" >> $log; 
+  exit 10
+fi
+
+
+
+##create footer of HTML file
+printf  "</body>\n</html>" >> $html_file
+]]>
+    </configfile>
+  </configfiles>
+
+
+
+  <inputs>
+  <section name="inputSection" title="Input files" expanded="True">
+    <param type="text" name="title" value="Volcano_toPersonalize" label="Title for output"/>
+
+    <param type="data" name="inputData" format="tabular" label="Differential results file" optional="false" multiple="false" refresh_on_change="true"/>
+
+    <param name="headerNumber" type="select" label="Select number of header lines in file" refresh_on_change="true">
+      <option value="0">0</option>
+      <option value="1">1</option>
+      <option value="2" selected="true">2</option>
+      <option value="3">3</option>
+      <option value="4">4</option>
+      <option value="5">5</option>
+    </param>
+  </section>
+
+  <section name="volcanoSection" title="Volcano definition" expanded="True">
+
+
+    <repeat name="volcanoList" title="Volcano">
+
+      <param type="text" name="volcanoName" value="" label="Volcano name"/>
+
+      <param name="pvalColumn" type="select" label="Select column containing p-val statistics" refresh_on_change="true" optional="false" multiple="false" dynamic_options="get_column_names_mergeNumber(inputSection['inputData'].file_name,inputSection['headerNumber'],[0])" help="Do not select ajusted p-val here"/>
+
+      <param name="fdrColumn" type="select" label="Select column containing adjusted p-val statistics (if available)" refresh_on_change="true" optional="true" multiple="false" dynamic_options="get_column_names_mergeNumber(inputSection['inputData'].file_name,inputSection['headerNumber'],[0])" help="If adjusted p-val are available, otherwise it will be computed from p-values."/>
+
+      <param name="fcColumn" type="select" label="Select column containing log2(FoldChange) values" refresh_on_change="true" optional="false" multiple="false" dynamic_options="get_column_names_mergeNumber(inputSection['inputData'].file_name,inputSection['headerNumber'],[0])" help="If only FC are available as input, please select FC column and check options below." />
+
+    </repeat>
+
+  </section>
+
+
+	<section name="plotSection" title="Output section" expanded="True">
+
+    <param name="fcKind" type="select" display="radio" label="Select FC values kind selected as input column" help="Info:log2(FC) will be displayed in volcano plots">
+      <option value="log2FC" selected="true">log2(FC)</option>
+      <option value="FC">FC</option>
+    </param>
+
+    <param name="FDRthreshold" type="float" value="0.05" label="Output adjusted p-val threshold" >
+      <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/>
+    </param>
+
+    <param name="FCthreshold" type="float" value="2" label="Output Fold Change threshold (both 'log2(threshold)' and 'log2(1/threshold)' values will be used)" >
+      <validator type="in_range" min="1" exclude_min="false" message="Threshold should be greater or equal to 1"/>
+    </param>
+
+    <conditional name="geneInformation">
+    <param name="addGeneInfo" type="boolean" label="Add gene/probe information" checked="false"/>
+      <when value="true">
+        <param name="organismID" label="Organism" type="select">
+          <options from_data_table="LimmaTool" >
+            <column name="name" index="1"/>
+            <column name="value" index="0"/>
+            <filter type="unique_value" column="0"/>
+          </options>
+        </param>
+        <param name="infoInRowType" label="Nature of row names" type="select">
+            <options from_data_table="LimmaTool" >
+              <column name="name" index="3"/>
+              <column name="value" index="2"/>
+              <filter type="param_value" ref="organismID" column="0"/>
+            </options>
+        </param>
+      </when>
+      <when value="false">
+      </when>
+    </conditional>
+
+    <param type="select" name="imagePlotlyFormat" display="radio" label="Html snapshot format">
+      <option value="png">PNG format</option>
+      <option value="svg">SVG format</option>
+    </param>
+	</section>
+
+  </inputs>
+
+
+
+  <outputs>
+    <data format="tabular" name="outputData" label="${inputSection.title}_statistics"/>
+	
+	<data format="html" name="html_file" label="${inputSection.title}_HTML.html"/>
+	<!--
+	<collection name="outputHistogramsList" label="${inputSection.title}_HistogramsList" type="list">
+	  <discover_datasets pattern="(?P&lt;designation&gt;Histograms[0-9]+)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
+	  <filter>plotSection['histogramToPlot']</filter>
+	</collection>
+	
+	<collection name="outputVolcanosList" label="${inputSection.title}_VolcanosList" type="list">
+	  <discover_datasets pattern="(?P&lt;designation&gt;Volcanos\_.*)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
+	  <filter>plotSection['volcanoToPlot']</filter>
+	</collection>
+
+  <data name="fRatioOutput" format="png" label="${inputSection.title}_F-ratio">
+        <change_format>
+            <when input="plotSection['imageFormat']" value="pdf" format="pdf" />
+        </change_format>
+  </data>
+	-->
+    <data format="txt" name="log" label="${inputSection.title}_Log" />
+  </outputs>
+
+
+  
+ <tests>
+  <test maxseconds="3600">
+    <param name="wfile" value="wiggle.wig" />
+    <param name="bfile" value="bedfile.bed" />
+    <param name="span" value="3000" />
+    <param name="pfres" value="50" />
+    <param name="lowersize" value="1000" />
+    <param name="middlesize" value="2000" />
+    <param name="uppersize" value="3000" />
+    <param name="lowerbisize" value="2500" />
+    <param name="upperbisize" value="5000" />
+    <param name="reldist" value="3000" />
+    <param name="genome" value="hg18" />
+    <param name="imagetype" value="PDF" />
+    <param name="enable" value="no" />
+    <output name="outputData" file="ceas_1/ceas_1.pdf" />
+  </test>
+</tests> 
+  <help>
+<![CDATA[
+**What it does ?**
+
+This tool plot volcanos from previous differential expression analysis results containing at least Fold Change and p-val statistics for all measured genes.
+
+-----
+
+**Parameters**
+
+\- **Input files**
+
+- **Title** to personalize output file names (avoiding special characters).
+
+- **Differential expression analysis results** with at least contrast statistics (p-val and FC or log2(FC)) as columns and genes as rows. GIANT-Differential Expression Analysis with LIMMA tool output can be used as a model.
+
+    ::
+
+        LIMMA    comparison  WT*Treat  WT*Treat  WT*Treat  WT*Treat  WT*Treat
+        Gene     Info        p-val     FDR.p-val FC        log2(FC)  t-stat
+        ARSD     na          0.0057    0.41      0.8389   -0.2534   -5.175
+        TTTY10   na          1.6e-07   0.0074    0.6403   -0.6432   -6.122
+        MIR548AL na          0.072     0.2914    1.711     0.775     10.43
+
+- **Header lines number** to skip in the differential results file, i.e lines containing column content descripion.
+
+
+\- **Volcano definition** (add as many volcanos as needed)
+
+    - **Volcano name** for better ouput identification (avoiding special characters).
+
+    - **Select p-val statistics column** containing p-value statistics for all genes in the coresponding contrast, FDR correction will be automatically applied on these p-values if adjusted p-values column is not selected in the following option.
+
+    - **Select adjusted p-val column** (optional) if adjusted p-values have been allready computed and are available in the input file. If not, FDR correction will be applied on available p-value statistics. 
+
+    - **Select log2(FoldChange) or FoldChange column** corresponding to the same contrast as previously selected (adjusted)p-value columns. 
+
+
+\- **Output section**
+
+- **FC information available as input** : select if FoldChange columns selected during volcanos definition are allready log2 transformed.
+
+- **Output adjusted/FDR p-val threshold** : only genes with adjusted p-val <= this threshold (in at least one of requested volcanos) will be in result tab.
+
+- **Output Fold Change threshold** : only genes with absolute FC >= this threshold (in at least one of requested volcanos) will be in result tab (both 'log2(threshold)' and 'log2(1/threshold)' values will be used).
+
+- **Add gene/probe information** : if yes, add description of genes to the result tab.
+
+- **Html snapshot format** : format of plot images taken from interactive view
+
+-----
+
+**Outputs**
+
+- **tabular file** containing statistics used for volcano plots, represented as a tab delimited matrix where each colum contains statistics for each gene (in rows).
+
+- **HTML file** to access interactive version of volcanos through PlotLy html pages and tabulated volcano statistics.
+
+- **LOG file** for job log. If you see errors, please attached this in the bug report
+
+]]>  </help>
+
+
+ <citations>
+  <citation type="bibtex">@misc{vandel_jimmy_2018_1477870, author = {Vandel, J. and Gheeraert, C. and Eeckhoute, J. and Staels, B. and Lefebvre, P. and Dubois-Chevalier, J.}, title = {GIANT: Galaxy-based Interactive tools for ANalaysis of Transcriptomic data}, month = nov, year = 2018, doi = {10.5281/zenodo.1477870}, url = {https://doi.org/10.5281/zenodo.1477870}
+  }</citation>
+
+  <citation type="bibtex">@online{plotly, author = {Plotly Technologies Inc.}, title = {Collaborative data science}, publisher = {Plotly Technologies Inc.}, address = {Montreal, QC}, year = {2015}, url = {https://plot.ly}
+  }</citation>
+ </citations>
+
+</tool>