Mercurial > repos > rnateam > graphprot_predict_profile
view graphprot_predict_profile.xml @ 0:215925e588c4 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit 9bb5f3c8ed8e87ec5652b5bc8bf9c774d5534a1a
author | rnateam |
---|---|
date | Fri, 25 May 2018 12:17:44 -0400 |
parents | |
children |
line wrap: on
line source
<tool id="graphprot_predict_profile" name="GraphProt predict profile" version="1.1.7"> <description>- Predict RBP binding profiles</description> <requirements> <requirement type="package" version="1.1.7">graphprot</requirement> </requirements> <command><![CDATA[ #if $select_model.model_selector == 'select_model_from_repo': mkdir -p ./model && tar -zxvf '$__tool_directory__/data/${select_model.repo_model}.tar.gz' -C ./model && #end if perl '$__tool_directory__/graphprot_predict_profile_wrapper.pl' -fasta '$fasta_file' #if $select_model.model_selector == 'select_model_from_history': -model '$select_model.model_file' #if $select_model.set_params.set_params_selector == 'supply_params_file': -params '$select_model.set_params.params_file' #elif $select_model.set_params.set_params_selector == 'manual_params_setting': #if $select_model.set_params.model_type.model_type_selector == 'sequence': -onlyseq #elif $select_model.set_params.model_type.model_type_selector == 'structure': -abstraction $select_model.set_params.model_type.gp_abstraction #end if -R $select_model.set_params.gp_r -D $select_model.set_params.gp_d -bitsize $select_model.set_params.gp_bitsize -lambda $select_model.set_params.gp_lambda -epochs $select_model.set_params.gp_epochs #if $select_model.set_params.gev_options.distr_my -distr-my $select_model.set_params.gev_options.distr_my #end if #if $select_model.set_params.gev_options.distr_sigma -distr-sigma $select_model.set_params.gev_options.distr_sigma #end if #if $select_model.set_params.gev_options.distr_xi -distr-xi $select_model.set_params.gev_options.distr_xi #end if #end if #elif $select_model.model_selector == 'select_model_from_repo': -model './model/${select_model.repo_model}.model' -params './model/${select_model.repo_model}.params' #end if $peak_region_options.p50_output #if $peak_region_options.merge_dist -merge-dist $peak_region_options.merge_dist #end if #if $peak_region_options.p_val_thr -thr-p $peak_region_options.p_val_thr #end if #if $peak_region_options.score_thr -thr-sc $peak_region_options.score_thr #end if ]]></command> <inputs> <param name="fasta_file" type="data" format="fasta" label="Input FASTA file" argument="-fasta" help="FASTA file containing sequences to predict binding profiles on"/> <conditional name="select_model"> <param name="model_selector" type="select" label="Select GraphProt model" help="Select GraphProt model for binding profile prediction"> <option value="select_model_from_history" selected="true">Select model from history</option> <option value="select_model_from_repo">Select model from repository</option> </param> <when value="select_model_from_history"> <param name="model_file" type="data" format="data" label="GraphProt model file" argument="-model" help="Predict binding profile for the given GraphProt RBP model"/> <conditional name="set_params"> <param name="set_params_selector" type="select" label="Set model parameters"> <option value="supply_params_file" selected="true">Select parameter file from history</option> <option value="manual_params_setting">Manually set model parameters</option> </param> <when value="supply_params_file"> <param name="params_file" type="data" format="txt" label="Model parameter file" argument="-params" help="Parameter file containing model parameters"/> </when> <when value="manual_params_setting"> <conditional name="model_type"> <param name="model_type_selector" type="select" label="Select model type"> <option value="sequence" selected="true">Supplied model is a sequence model</option> <option value="structure">Supplied model is a structure model</option> </param> <when value="sequence"/> <when value="structure"> <param name="gp_abstraction" type="integer" value="3" argument="-abstraction" label="RNAshapes abstraction level for RNA structure graphs"/> </when> </conditional> <param name="gp_r" type="integer" value="1" min="0" max="4" label="GraphProt radius" argument="-R"/> <param name="gp_d" type="integer" value="4" min="0" max="6" label="GraphProt distance" argument="-D"/> <param name="gp_bitsize" type="integer" value="14" label="GraphProt bitsize used for feature encoding" argument="-bitsize"/> <param name="gp_lambda" type="float" value="10e-6" label="SGD parameter lambda for classification" argument="-lambda"/> <param name="gp_epochs" type="integer" value="10" label="SGD parameter epochs for classification" argument="-epochs"/> <section name="gev_options" title="GEV distribution parameters for signifying GraphProt scores"> <param name="distr_my" type="float" optional="True" label="GEV distribution parameter my" argument="-distr-my"/> <param name="distr_sigma" type="float" optional="True" label="GEV distribution parameter sigma" argument="-distr-sigma"/> <param name="distr_xi" type="float" optional="True" label="GEV distribution parameter xi" argument="-distr-xi"/> </section> </when> </conditional> </when> <when value="select_model_from_repo"> <param name="repo_model" type="select" label="Select model" help="Select model from a list of pre-trained GraphProt models"> <option value="EWSR1_eCLIP_K562_ENCSR887LPK" selected="true">EWSR1_eCLIP_K562_ENCSR887LPK.model</option> <option value="FMR1_eCLIP_K562_ENCSR331VNX">FMR1_eCLIP_K562_ENCSR331VNX.model</option> <option value="HNRNPC_eCLIP_HepG2_ENCSR550DVK">HNRNPC_eCLIP_HepG2_ENCSR550DVK.model</option> <option value="HUR_PAR-CLIP_HEK293_Mukherjee">HUR_PAR-CLIP_HEK293_Mukherjee.model</option> <option value="IGF2BP1-3_PAR-CLIP_HEK293_Hafner">IGF2BP1-3_PAR-CLIP_HEK293_Hafner.model</option> <option value="IGF2BP1-3_PAR-CLIP_HEK293_Hafner_structure">IGF2BP1-3_PAR-CLIP_HEK293_Hafner_structure.model</option> <option value="KHDRBS1_eCLIP_K562_ENCSR628IDK">KHDRBS1_eCLIP_K562_ENCSR628IDK.model</option> <option value="KHDRBS1_eCLIP_K562_ENCSR628IDK_structure">KHDRBS1_eCLIP_K562_ENCSR628IDK_structure.model</option> <option value="PUM2_PAR-CLIP_HEK293_Hafner">PUM2_PAR-CLIP_HEK293_Hafner.model</option> <option value="PUM2_eCLIP_K562_exonized_3utr">PUM2_eCLIP_K562_exonized_3utr.model</option> <option value="QKI_PAR-CLIP_HEK293_Hafner">QKI_PAR-CLIP_HEK293_Hafner.model</option> <option value="QKI_eCLIP_HepG2_ENCSR570WLM">EWSR1_eCLIP_K562_ENCSR887LPK.model</option> <option value="QKI_eCLIP_HepG2_ENCSR570WLM_structure">QKI_eCLIP_HepG2_ENCSR570WLM_structure.model</option> </param> </when> </conditional> <section name="peak_region_options" title="Peak region options"> <param name="merge_dist" type="integer" optional="True" label="Maximum distance between two peak regions for merging" argument="-merge-dist" help="By default all non-overlapping regions will be reported. E.g. a distance of 1 means that two regions above the set threshold (score or p-value) will be merged if they is 1 nucleotide that separates the two regions"/> <param name="p_val_thr" type="float" optional="True" label="Set p-value threshold for reporting peak regions" help="Regions with p-value lower or equal the given value are reported. If no distribution parameters are given (either manually or in .params file), no p-values will be calculated and peak regions will be filtered by their peak scores instead (default: 0.05)"/> <param name="score_thr" type="float" optional="True" label="Set GraphProt peak score threshold for reporting peak regions" help="Regions with peak score higher or equal the given value are reported. If distribution parameters are given (either manually or in .params file), p-value threshold will be used instead of the peak score threshold for filtering (default: 0)"/> <param name="p50_output" label="Also output p50 score filtered peak regions file" type="boolean" truevalue="-p50-out" falsevalue="" checked="False" help="Output is a peak regions BED file using the best average score found in at least 50 % of the positive training sites (p50) as threshold for defining peak regions. NOTE that if the score is not given in .params file, an empty file will be output if selected."/> </section> </inputs> <outputs> <data format="txt" name="average_profile_outfile" label="${tool.name} on ${on_string} (average profile)" from_work_dir="GraphProt.average_profile"/> <data format="bed" name="peak_regions_outfile" label="${tool.name} on ${on_string} (peak regions)" from_work_dir="GraphProt.peak_regions.bed"/> <data format="bed" name="peak_regions_p50_outfile" label="${tool.name} on ${on_string} (peak regions p50)" from_work_dir="GraphProt.peak_regions_p50.bed"/> </outputs> <tests> <test> <param name="fasta_file" value="test.fa" ftype="fasta"/> <param name="model_selector" value="select_model_from_history"/> <param name="model_file" value="test.model"/> <param name="set_params_selector" value="supply_params_file"/> <param name="params_file" value="test.params"/> <param name="p50_output" value="True"/> <output name="average_profile_outfile" file="GraphProt_predict_profile_test_out1.average_profile"/> <output name="peak_regions_outfile" file="GraphProt_predict_profile_test_out1.peak_regions.bed"/> <output name="peak_regions_p50_outfile" file="GraphProt_predict_profile_test_out1.peak_regions_p50.bed"/> </test> <test> <param name="fasta_file" value="test.fa" ftype="fasta"/> <param name="model_selector" value="select_model_from_history"/> <param name="model_file" value="test.model"/> <param name="set_params_selector" value="manual_params_setting"/> <param name="model_type_selector" value="sequence"/> <param name="gp_r" value="1"/> <param name="gp_d" value="4"/> <param name="gp_epochs" value="20"/> <param name="gp_lambda" value="0.001"/> <param name="gp_bitsize" value="14"/> <param name="merge_dist" value="1"/> <param name="score_thr" value="1"/> <output name="average_profile_outfile" file="GraphProt_predict_profile_test_out2.average_profile"/> <output name="peak_regions_outfile" file="GraphProt_predict_profile_test_out2.peak_regions.bed"/> </test> <test> <param name="fasta_file" value="test.fa" ftype="fasta"/> <param name="model_selector" value="select_model_from_history"/> <param name="model_file" value="structure_test.model"/> <param name="set_params_selector" value="supply_params_file"/> <param name="params_file" value="structure_test.params"/> <param name="model_type_selector" value="structure"/> <param name="gp_abstraction" value="3"/> <param name="score_thr" value="2"/> <output name="average_profile_outfile" file="GraphProt_predict_profile_test_out3.average_profile"/> <output name="peak_regions_outfile" file="GraphProt_predict_profile_test_out3.peak_regions.bed"/> </test> <test> <param name="fasta_file" value="test.fa" ftype="fasta"/> <param name="model_selector" value="select_model_from_repo"/> <param name="repo_model" value="FMR1_eCLIP_K562_ENCSR331VNX"/> <param name="p50_output" value="True"/> <output name="average_profile_outfile" file="GraphProt_predict_profile_test_out4.average_profile"/> <output name="peak_regions_outfile" file="GraphProt_predict_profile_test_out4.peak_regions.bed"/> <output name="peak_regions_p50_outfile" file="GraphProt_predict_profile_test_out4.peak_regions_p50.bed"/> </test> </tests> <help> Use GraphProt (-action predict_profile) to predict binding profiles for a given RBP model (supplied as .model and .params file) on a given set of FASTA sequences. After predicting position-wise scores, the scores are averaged over small windows (11 nt with averaged score position in center) to smooth out the profiles and peak regions are extracted based on the set thresholds (p-value or score) and merge distance. **Output files** The procedure has three output files (third is optional): 1) An average_profile file containing averaged position-scores over all supplied sequences 2) A peak regions BED file which contains peak-scoring regions above the supplied threshold (p-value default: 0.05, score default: 0) 3) A peak regions BED file using the best average score found in at least 50 % of the positive training sites (p50). NOTE that this requires the p50 score to be given in the .params file, otherwise if set an empty file will be output. **Model selection** The GraphProt model used for profile prediction can either be uploaded to history or chosen from an example collection of models (Select model from repository). For the repository models, the corresponding parameter file is selected automatically, providing all model parameters necessary for prediction and p-value calculation. If you choose to upload a model to the history, it is recommended to use the corresponding .params file for automatically setting the model parameters. Otherwise the model parameters have to be entered manually. **p-value calculation** Signifying the GraphProt scores is done by fitting a generalized extreme value (GEV) distribution on a set of scores derived from 10000 transcript sequences for each GraphProt model. The GEV distribution has three parameters: my (location), sigma (scale), and xi (shape). The fitted parameter values usually are read in from the .params file, but can also be entered manually. Parameter fitting was done in R using the minpack.lm_ package, using the probability density function (PDF) described here_. If no GEV parameter values are specified (either in .params file or manually), p-value calculation for the scores will be skipped and the peak regions will be extracted based on the set threshold score. .. _here: https://en.wikipedia.org/wiki/Generalized_extreme_value_distribution .. _minpack.lm: https://cran.r-project.org/web/packages/minpack.lm </help> <citations> <citation type="doi">10.1186/gb-2014-15-1-r17</citation> </citations> </tool>