Mercurial > repos > vandelj > giant_plot_functions
view galaxy/wrappers/ExprQCplots.xml @ 1:0435f94d27a7 draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit e2b27d6ff2eab66454f984dbf1a519192f41db97"
author | vandelj |
---|---|
date | Wed, 09 Sep 2020 10:28:54 +0000 |
parents | 488e6e8bb8cb |
children |
line wrap: on
line source
<tool name="GIANT-QC Plots" id="giant_plot_functions" version="0.1.4"> <description>Descriptive plots of .CEL collections or normalized expression data</description> <requirements> <requirement type="package" version="1.40.1">bioconductor-oligo</requirement> <requirement type="package" version="2.2_13">r-ff</requirement> <requirement type="package" version="2.2.1">r-ggplot2</requirement> <requirement type="package" version="4.5.6">r-plotly</requirement> <requirement type="package" version="1.8_17">r-mgcv</requirement> </requirements> <code file="../../src/General_functions.py"/> <!--<code file="./src/General_functions.py"/> change for Planemo test--> <stdio> <regex match="Execution halted" source="both" level="fatal" description="Execution halted, please contact tool developer or administrators." /> <regex match="Error in" source="both" level="fatal" description="An error occured during R execution, please contact tool developer." /> <exit_code range="15" level="fatal" description="Error during formating scripts, see log file for more information." /> <exit_code range="10" level="fatal" description="Missing file during html report, see log file for more information." /> <exit_code range="1:9" level="fatal" description="Error in R execution, see log file for more information." /> </stdio> <command> <![CDATA[ ##set $dataType=$inputData.extension #set $dataType=$inputData[0].ext #if $conditionInformation and $pcaSection.factorsToInclude!="None": bash $scriptPrepareTable; ret_code=\$?; if [ \$ret_code != 0 ]; then exit \$ret_code; fi; #end if #if ($dataType == "cel" and len($inputData)>1) or ($dataType == "tabular" and len($inputData)==1): Rscript '$__tool_directory__/../../src/ExprPlotsScript.R' -i '$inputData' -l '$log' -f '$advSection.imageFormat' -j '$dataType' ##change for Planemo test ##Rscript '$__tool_directory__/src/ExprPlotsScript.R' -i '$inputData' -l '$log' -f '$advSection.imageFormat' -j '$dataType' #if $dataType == "cel": #for $inputDataset in $inputData -c '${inputDataset.name}' #end for #end if #if $conditionInformation and $pcaSection.factorsToInclude!="None": -t 'factorTable.csv' #end if #if $plotSection.histogramToPlot: -h 'Histograms' #end if #if $plotSection.maplotToPlot: -a 'MAplots' #end if #if $plotSection.boxplotToPlot: -b 'Boxplots' #end if #if $plotSection.microarrayToPlot and $dataType == "cel": -m 'Microarray' #end if #if $pcaSection.acpToPlot: -p 'PCA' -s 'screePlot' #end if ; ret_code=\$?; if [ \$ret_code != 0 ]; then exit \$ret_code; else bash $scriptTransfer; ret_code=\$?; if [ \$ret_code != 0 ]; then exit \$ret_code; fi fi; #else: printf "[ERROR]Execution halted, not enough (.CEL files) or too many (tabular file) input files" >> $log; exit 15; #end if printf "[INFO]End of tool script" >> $log; ]]> </command> <configfiles> <configfile name="scriptPrepareTable"> <![CDATA[ awk -v fact="$pcaSection.factorsToInclude" 'BEGIN{FS="\t";OFS="";ORS="";split(fact, factors, ",");for(i in factors)factorsName[factors[i]]=1} NR==1{for(i=2;i<=NF;i++){if(\$i in factorsName)positFactor[i]=1}} {print $1;for(factorID in positFactor)print "\t"\$factorID; print "\n"}' $conditionInformation > ./factorTable.csv; if [ ! -e ./factorTable.csv ]; then printf "[ERROR]factorTable.csv is missing" >> $log; exit 15 fi printf "[INFO]End of scriptPrepareTable\n" >> $log ]]> </configfile> <configfile name="scriptTransfer"> <![CDATA[ #set $dataType=$inputData[0].ext #set $cnt=1 ##create header of HTML file printf "<!DOCTYPE html>\n<html>\n" > $html_file ##add to HTML scripts to show and hide MAplots and MicroArray pictures printf "<head> <script src=\"https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js\"></script> <script> \\$(document).ready(function(){ \\$(\"\#MAplotTitle\").click(function(){ if(\\$(\"p.MAplot\").is(\':visible\'))\\$(\"p.MAplot\").hide(1000); else \\$(\"p.MAplot\").show(1000); }); \\$(\"\#MicroarrayTitle\").click(function(){ if(\\$(\"p.Microarray\").is(\':visible\'))\\$(\"p.Microarray\").hide(1000); else \\$(\"p.Microarray\").show(1000); }); }); </script> </head>\n<body>\n" >> $html_file mkdir -p $html_file.extra_files_path #if $plotSection.histogramToPlot: printf "<h3>Histograms</h3>\n" >> $html_file ##create folders in media counter=1 for histogram in \$(ls ./plotLyDir/Histograms*html) do histogramShort=\${histogram%\.*} histogramShort=\${histogramShort\#\#*/} ##modify HTML to point to plotLy folder sed -i "s/\${histogramShort}_files/PlotLy_scripts/g" \$histogram ##copy HTML files in both folders cp \$histogram ${html_file.extra_files_path}/Histograms\${counter}.html ##add HTML link printf "<a href=\"Histograms\${counter}.html\">Histograms\${counter}</a>\n" >> $html_file if [ \$counter = 1 ]; then #if $advSection.imagePlotlyFormat=="svg": ##before copying modify plotLy script to replace png snapshot with svg (not proud of solution but seems to work) cd ./plotLyDir/\${histogramShort}_files/plotlyjs-*/ awk '{gsub("e=\"png\"","e=\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js rm ./plotly-latest.minTemp.js cd ../../../ #end if ##copy only scripts folder for the first histogram cp -r ./plotLyDir/\${histogramShort}_files $html_file.extra_files_path mv ${html_file.extra_files_path}/\${histogramShort}_files ${html_file.extra_files_path}/PlotLy_scripts fi ((counter++)) done if [ \$counter = 1 ]; then printf "[ERROR]Histograms are missing" >> $log; exit 10 fi #end if #if $plotSection.boxplotToPlot: printf "<h3>Boxplots</h3>\n" >> $html_file ##create folders in media counter=1 for boxplot in \$(ls ./plotLyDir/Boxplots*html) do boxplotShort=\${boxplot%\.*} boxplotShort=\${boxplotShort\#\#*/} ##modify HTML to point to plotLy folder sed -i "s/\${boxplotShort}_files/PlotLy_scripts/g" \$boxplot ##copy HTML files in both folders cp \$boxplot ${html_file.extra_files_path}/Boxplots\${counter}.html ##add HTML link printf "<a href=\"Boxplots\${counter}.html\">Boxplots\${counter}</a>\n" >> $html_file if [ \$counter = 1 ]; then #if $advSection.imagePlotlyFormat=="svg": ##before copying modify plotLy script to replace png snapshot with svg (not proud of solution but seems to work) cd ./plotLyDir/\${boxplotShort}_files/plotlyjs-*/ awk '{gsub("e=\"png\"","e=\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js rm ./plotly-latest.minTemp.js cd ../../../ #end if ##copy only scripts folder for the first boxplot cp -r ./plotLyDir/\${boxplotShort}_files $html_file.extra_files_path mv ${html_file.extra_files_path}/\${boxplotShort}_files ${html_file.extra_files_path}/PlotLy_scripts fi ((counter++)) done if [ \$counter = 1 ]; then printf "[ERROR]Boxplots are missing" >> $log; exit 10 fi #end if #if $plotSection.maplotToPlot: printf "<h3 id=\"MAplotTitle\">MA plots (show/hide)</h3>\n" >> $html_file ##create folders in media counter=1 for MAplot in \$(ls ./plotLyDir/MAplots_*html) do MAplotShort=\${MAplot%\.*} MAplotShort=\${MAplotShort\#\#*/} conditionName=\${MAplot%\.*} conditionName=\${conditionName\#\#*MAplots_} echo \$conditionName > ./temporaryConditionName conditionFormatedName=\$(awk 'BEGIN{FS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2{print dico[\$1]}' ./correspondanceFileNames.csv ./temporaryConditionName) ##modify HTML to point to plotLy folder sed -i "s/\${MAplotShort}_files/PlotLy_scripts/g" \$MAplot ##copy HTML files in both folders cp \$MAplot ${html_file.extra_files_path}/MAplot_\$conditionName.html ##add HTML link printf "<p class=\"MAplot\">\n<a href=\"MAplot_\$conditionName.html\">MAplot \$conditionFormatedName</a>\n</p>\n" >> $html_file if [ \$counter = 1 ]; then ##copy only scripts folder for the first MAplot #if $advSection.imagePlotlyFormat=="svg": ##before copying modify plotLy script to replace png snapshot with svg (not proud of solution but seems to work) cd ./plotLyDir/\${MAplotShort}_files/plotlyjs-*/ awk '{gsub("e=\"png\"","e=\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js rm ./plotly-latest.minTemp.js cd ../../../ #end if cp -r ./plotLyDir/\${MAplotShort}_files $html_file.extra_files_path mv ${html_file.extra_files_path}/\${MAplotShort}_files ${html_file.extra_files_path}/PlotLy_scripts fi ((counter++)) done if [ \$counter = 1 ]; then printf "[ERROR]MAplots are missing" >> $log; exit 10 fi #end if #if $plotSection.microarrayToPlot and $dataType == "cel": printf "<h3 id=\"MicroarrayTitle\">Microarray</h3>\n" >> $html_file for microarray in \$(ls ./plotDir/Microarray_*) do conditionName=\${microarray%\.*} conditionName=\${conditionName\#\#*Microarray_} echo \$conditionName > ./temporaryConditionName conditionFormatedName=\$(awk 'BEGIN{FS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2{print dico[\$1]}' ./correspondanceFileNames.csv ./temporaryConditionName) cp \$microarray ${html_file.extra_files_path}/Microarray_\$conditionName.${advSection.imageFormat} printf "<p class=\"Microarray\"><a href=\"Microarray_\$conditionName.${advSection.imageFormat}\">Microarray \$conditionFormatedName</a>\n</p>" >> $html_file done #end if #if $pcaSection.acpToPlot: printf "<h3>PCA</h3>\n" >> $html_file ##create folders in media counter=1 for pca in \$(ls ./plotLyDir/PCA*html) do pcaShort=\${pca%\.*} pcaShort=\${pcaShort\#\#*/} conditionName=\${pca%\.*} conditionName=\${conditionName\#\#*PCA_} echo \$conditionName > ./temporaryConditionName conditionFormatedName=\$(awk 'BEGIN{FS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2{split(\$0,tab,"_AND_");if(length(tab)==1){if(\$0 in dico){print dico[\$0]}else{print $0}}else{print dico[tab[1]]" * "dico[tab[2]]}}' ./correspondanceFileNames.csv ./temporaryConditionName) ##modify HTML to point to plotLy folder sed -i "s/\${pcaShort}_files/PlotLy_scripts/g" \$pca ##copy HTML files in both folders cp \$pca ${html_file.extra_files_path}/PCA_\$conditionName.html ##add HTML link printf "<p><a href=\"PCA_\$conditionName.html\">PCA \$conditionFormatedName</a>\n</p>" >> $html_file if [ \$counter = 1 ]; then #if $advSection.imagePlotlyFormat=="svg": ##before copying modify plotLy script to replace png snapshot with svg (not proud of solution but seems to work) cd ./plotLyDir/\${pcaShort}_files/plotlyjs-*/ awk '{gsub("e=\"png\"","e=\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js rm ./plotly-latest.minTemp.js cd ../../../ #end if ##copy only scripts folder for the first pca cp -r ./plotLyDir/\${pcaShort}_files $html_file.extra_files_path mv ${html_file.extra_files_path}/\${pcaShort}_files ${html_file.extra_files_path}/PlotLy_scripts fi ((counter++)) done if [ \$counter = 1 ]; then printf "[ERROR]PCA plots are missing" >> $log; exit 10 fi ##now for screePlot if [ -e ./plotLyDir/screePlot.html ]; then ##copy HTML files in both folders cp ./plotLyDir/screePlot.html ${html_file.extra_files_path}/screePlot.html ##add HTML link printf "<p><a href=\"screePlot.html\">Scree plot</a>\n</p>" >> $html_file else printf "[ERROR]screePlot.html is missing" >> $log; exit 10 fi ##copy scripts folder #if $advSection.imagePlotlyFormat=="svg": ##but before copying modify plotLy script to replace png snapshot with svg (not proud of solution but seems to work) cd ./plotLyDir/screePlot_files/plotlyjs-*/ awk '{gsub("e=\"png\"","e=\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js rm ./plotly-latest.minTemp.js cd ../../../ #end if cp -r ./plotLyDir/screePlot_files $html_file.extra_files_path #end if ##create footer of HTML file printf "</body>\n</html>" >> $html_file ]]> </configfile> </configfiles> <inputs> <param type="text" name="title" value="PlotFigure_toPersonalize" label="Title for output"> </param> <param type="data" name="inputData" format="cel,tabular" label="Select one .CEL collection or one tabular file" optional="false" multiple="true" > </param> <section name="plotSection" title="Plots selection" expanded="True"> <param type="boolean" name="histogramToPlot" checked="true" label="Plot histograms" help="Plot intensity distribution for each condition (pm probes for .cel)"> </param> <param type="boolean" name="maplotToPlot" checked="true" label="Plot MA plots" help="Plot MA plot for each condition, median value is used as reference"> </param> <param type="boolean" name="boxplotToPlot" checked="true" label="Plot boxplots" help="Plot intensity through boxplot for each condition (pm probes for .cel)"> </param> <param type="boolean" name="microarrayToPlot" checked="true" label="Display microarray images (only for .CEL files)"> </param> </section> <section name="pcaSection" title="PCA analysis" expanded="True"> <param type="boolean" name="acpToPlot" checked="true" label="Plot 3D PCA" help="3D plot of conditions in the space defined by the 3 principal components"> </param> <param type="data" name="conditionInformation" format="tabular" label="Factor information tabular file (optional)" optional="true" multiple="false"> </param> <param name="factorsToInclude" type="select" optional="true" multiple="true" label="Select factor informations to display (optional)" refresh_on_change="true" dynamic_options="get_column_names(pcaSection['conditionInformation'].file_name,0)"> <validator type="empty_field" message="You should specify one factor"></validator> </param> </section> <section name="advSection" title="Advanced parameters" expanded="False"> <param type="select" name="imageFormat" display="radio" label="Output microarray image format"> <option value="png">PNG format</option> <option value="pdf">PDF format</option> </param> <param type="select" name="imagePlotlyFormat" display="radio" label="Html snapshot format"> <option value="png">PNG format</option> <option value="svg">SVG format</option> </param> </section> </inputs> <outputs> <data format="html" name="html_file" label="${title}_HTML.html"/> <!-- <collection name="outputHistogramsList" label="${title}_HistogramsList" type="list"> <discover_datasets pattern="(?P<designation>Histograms[0-9]+)\.(?P<ext>[^\._]+)?" directory="plotDir" visible="false"/> <filter>plotSection['histogramToPlot']</filter> </collection> <collection name="outputMAplotsList" label="${title}_MAplotsList" type="list"> <discover_datasets pattern="(?P<designation>MAplots[0-9]+)\.(?P<ext>[^\._]+)?" directory="plotDir" visible="false"/> <filter>plotSection['maplotToPlot']</filter> </collection> <collection name="outputBoxplotsList" label="${title}_BoxplotsList" type="list"> <discover_datasets pattern="(?P<designation>Boxplots[0-9]+)\.(?P<ext>[^\._]+)?" directory="plotDir" visible="false"/> <filter>plotSection['boxplotToPlot']</filter> </collection> <collection name="outputMicroarrayList" label="${title}_MicroarrayList" type="list"> <discover_datasets pattern="(?P<designation>Microarray\_.*)\.(?P<ext>[^\._]+)?" directory="plotDir" visible="false"/> <filter>plotSection['microarrayToPlot'] and inputData[0].ext == "cel"</filter> </collection> <collection name="outputPCAList" label="${title}_PCA" type="list"> <discover_datasets pattern="(?P<designation>PCA[0-9]+)\.(?P<ext>[^\._]+)?" directory="plotDir" visible="false"/> <discover_datasets pattern="(?P<designation>screePlot)\.(?P<ext>[^\._]+)?" directory="plotDir" visible="false"/> <filter>pcaSection['acpToPlot']</filter> </collection> --> <data name="log" label="${title}_Log" format="txt"/> </outputs> <tests> <test maxseconds="3600"> <param name="inputData" value="./NormalizedData.tabular" /> <section name="plotSection" > <param name="histogramToPlot" value="true" /> <param name="maplotToPlot" value="true" /> <param name="boxplotToPlot" value="true" /> <param name="microarrayToPlot" value="false" /> </section> <section name="pcaSection" > <param name="acpToPlot" value="true" /> <param name="conditionInformation" value="./FactorFileGenerator/output/conditionsFile.csv" /> <param name="factorsToInclude" value="Strain,Treatment" /> </section> <section name="advSection" > <param name="imageFormat" value="png" /> <param name="imagePlotlyFormat" value="png" /> </section> <output name="html_file" file="./ExprQCplots/output/outputHTML.zip" decompress="true" > </output> <output name="log" file="./ExprQCplots/output/outputLog.txt" compare="sim_size" delta_frac="0.10" /> </test> </tests> <help> <![CDATA[ **What it does ?** This tool generate descriptive plots for Affymetrix raw data (.CEL file) or any tabular file containing expression data (from arrays or RNA-seq). ----- **Parameters** \- **Title** to personalize output file names (please avoid special characters and spaces). \- **Input Data** - **.CEL files** of your study (you can select multiple .CEL files or unique collection). For vizualization purposes, expression data extracted from .CEL files will be automatically log2 transformed by the tool. OR - **Expression tabular file** with samples as columns and genes as rows (header row contains sample names and first column gene identifiers). For better vizualization, expression data sould be preferentially already log2 transformed, no such transformation will be performed by the tool. :: Conditions 157_(HuGene-2_0-st).CEL 156_(HuGene-2_0-st).CEL 155_(HuGene-2_0-st).CEL 154_(HuGene-2_0-st).CEL DDX11L2 4.500872 4.429759 4.780281 4.996189 MIR1302-2 3.415065 3.520472 3.471503 3.567988 OR4F5 3.737956 3.011586 3.424494 3.497545 VWA1 5.189621 5.129595 4.806793 5.227014 \- **Plots selection** : to select required plots - **Plot histograms** - **Plot MA plots** - **Plot boxplots** - **Display microarray images** (only if "Input Data" are .CEL files) \- **PCA analysis** - **Plot 3D PCA** plot the 3 first principal components - **Factor information tabular file** with factors as columns and samples as rows (header row contains factor names and first column sample names). Can help to discriminate samples in PCA plot. :: Conditions Sex Treatment Reaction 138_(HuGene-2_0-st).CEL 1 TreatA Pos 148_(HuGene-2_0-st).CEL 0 NoTreat Pos 139_(HuGene-2_0-st).CEL 0 TreatB Neg 149_(HuGene-2_0-st).CEL 0 NoTreat Neg - **Select factor informations** you want to display in PCA plot (1 for samples colors and 1 for samples shapes) \- **Advanced parameters** - **Output microarray image format** available only for .CEL input files - **Html snapshot format** : format of plot images taken from interactive view ----- \- **Outputs** - **HTML file** to access interactive version of plots through PlotLy html pages - **LOG file** for job logs. In case of job failure, please attached this file to bug report ]]> </help> <citations> <citation type="bibtex">@misc{vandel_jimmy_2018_1477870, author = {Vandel, J. and Gheeraert, C. and Eeckhoute, J. and Staels, B. and Lefebvre, P. and Dubois-Chevalier, J.}, title = {GIANT: Galaxy-based Interactive tools for ANalaysis of Transcriptomic data}, month = nov, year = 2018, doi = {10.5281/zenodo.1477870}, url = {https://doi.org/10.5281/zenodo.1477870} }</citation> <citation type="bibtex">@online{plotly, author = {Plotly Technologies Inc.}, title = {Collaborative data science}, publisher = {Plotly Technologies Inc.}, address = {Montreal, QC}, year = {2015}, url = {https://plot.ly} }</citation> </citations> </tool>