Mercurial > repos > bgruening > psortb
changeset 0:b97ecda36fc4 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/psortb commit 9275472791234aa6df4ed6e0e533bd9d74d83bdd
| author | bgruening |
|---|---|
| date | Thu, 18 Dec 2025 16:15:09 +0000 |
| parents | |
| children | |
| files | psortb.xml test-data/psortb_arch.fa test-data/psortb_neg.fa test-data/psortb_pos.fa test-data/psortb_pos_output.txt |
| diffstat | 5 files changed, 143 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/psortb.xml Thu Dec 18 16:15:09 2025 +0000 @@ -0,0 +1,115 @@ +<tool id="psortb" name="PSORTb" version="3.0.6" profile="24.0"> + <description>Protein subcellular localization prediction for prokaryotes</description> + <xrefs> + <xref type="bio.tools">psortb</xref> + </xrefs> + <requirements> + <container type="docker">quay.io/galaxy/psortb-cli:3.0.6</container> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + mkdir \$TMPDIR/results && + /usr/local/psortb/bin/psort + $gram.gram_choice + #if $cutoff + -c $cutoff + #end if + #if $divergent + -d $divergent + #end if + -f fasta + $exact + -o $output_format + -i $input_fasta + && + mv \$TMPDIR/results/*_psortb_*.txt $output + ]]> + </command> + + <inputs> + <param name="input_fasta" type="data" format="fasta" label="Protein sequences (FASTA)" help="Submit protein sequences in FASTA format."/> + + <section name="gram" title="Organism classification" expanded="true"> + <param name="gram_choice" type="select" label="Organism type"> + <option value="--positive" selected="true">Gram-positive (Bacteria)</option> + <option value="--negative">Gram-negative (Bacteria)</option> + <option value="--archaea">Archaea</option> + </param> + </section> + + <param name="output_format" type="select" label="Output format" help="Choose PSORTb output format."> + <option value="normal">Normal (human-readable)</option> + <option value="terse">3-column (terse)</option> + <option value="long" selected="true">30-column (long)</option> + </param> + <param argument="--exact" type="boolean" truevalue="--exact" falsevalue="" checked="false" label="Skip SCLBLASTe" help="Useful for batch runs of data against itself in SCLBLAST"/> + <param argument="--cutoff" type="float" optional="true" label="Prediction cutoff" help="Sets a cutoff value for reported results (default: 7.5 used internally)."/> + <param argument="--divergent" type="float" optional="true" label="Multiple localization cutoff" help="Sets a cutoff for flagging potential multiple localization sites."/> + </inputs> + + <outputs> + <data name="output" format="txt" label="PSORTb results on ${on_string}"/> + </outputs> + + <tests> + <test expect_num_outputs="1"> + <param name="input_fasta" value="psortb_pos.fa"/> + <param name="gram|gram_choice" value="--positive"/> + <param name="output_format" value="normal"/> + <output name="output" value="psortb_pos_output.txt"/> + </test> + <test expect_num_outputs="1"> + <param name="input_fasta" value="psortb_neg.fa"/> + <param name="gram|gram_choice" value="--negative"/> + <param name="output_format" value="terse"/> + <output name="output"> + <assert_contents> + <has_text_matching expression="SeqID\tLocalization\tScore" /> + <has_text_matching expression="NP_949347\.1 \tUnknown\t7\.0" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_fasta" value="psortb_arch.fa"/> + <param name="gram|gram_choice" value="--archaea"/> + <param name="output_format" value="long"/> + <output name="output"> + <assert_contents> + <has_text_matching expression="SeqID\s+CMSVM_a_Localization\s+CMSVM_a_Details\s+CWSVM_a_Localization\s+CWSVM_a_Details\s+CytoSVM_a_Localization\s+CytoSVM_a_Details\s+ECSVM_a_Localization\s+ECSVM_a_Details\s+ModHMM_a_Localization\s+ModHMM_a_Details\s+Motif_a_Localization\s+Motif_a_Details\s+Profile_a_Localization\s+Profile_a_Details\s+SCL-BLAST_a_Localization\s+SCL-BLAST_a_Details\s+SCL-BLASTe_a_Localization\s+SCL-BLASTe_a_Details\s+Signal_a_Localization\s+Signal_a_Details\s+Cytoplasmic_Score\s+CytoplasmicMembrane_Score\s+Cellwall_Score\s+Extracellular_Score\s+Final_Localization\s+Final_Localization_Details\s+Final_Score\s+Secondary_Localization\s+PSortb_Version" /> + <has_text_matching expression="YP_001689002\.1\s+Unknown\s+Unknown\s+Unknown\s+Extracellular\s+Unknown\s+1 internal helix found\s+Unknown\s+No motifs found\s+Unknown\s+No matches to profiles found\s+Extracellular\s+matched 47117675: Flagellin B1 precursor\s+Unknown\s+No matches against database\s+Unknown\s+No signal peptide detected\s+0.01\s+0.00\s+0.02\s+9.97\s+Extracellular\s+9.97\s+Flagellar\s+PSORTb version" /> + </assert_contents> + </output> + </test> + </tests> + + <help> + <![CDATA[ +PSORTb predicts the subcellular localization of bacterial and archaeal proteins. + +Input requirements + +- Protein sequences in FASTA format. All sequences in one run should belong to the same organism class. + +Options + +- Organism type: select Gram-positive (`--positive`), Gram-negative (`--negative`), or Archaea (`--archaea`). +- Output format: `normal` (human-readable), `terse` (tab-delimited), or `long` (tab-delimited with module details). +- Cutoff (`-c`): threshold for final localization assignment (documentation suggests ~7.5). +- Multiple localization cutoff (`-d`): threshold to flag possible multiple localization sites. +- Exact (`--exact`): skip SCL-BLASTe step. + +Notes + +- PSORTb emphasizes precision; proteins with ambiguous signals may be reported as Unknown. +- Long format includes module outputs and localization scores; terse/long are suitable for bulk processing. + +Reference + +https://psort.org/documentation/index.html + ]]> + </help> + + <citations> + <citation type="doi">10.1093/bioinformatics/btq249</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/psortb_arch.fa Thu Dec 18 16:15:09 2025 +0000 @@ -0,0 +1,2 @@ +>YP_001689002.1 +MFEFITDEDERGQVGIGTLIVFIAMVLVAAIAAGVLINTAGYLQSKGSATGEEASAQVSNRINIVSAYGNVNNEKVDYVNLTVRQAAGADNINLTKSTIQWIGPDRATTLTYSSNSPSSLGENFTTESIKGSSADVLVDQSDRIKVIMYASGVSSNLGAGDEVQLTVTTQYGSKTTYWAQVPESLKDKNA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/psortb_neg.fa Thu Dec 18 16:15:09 2025 +0000 @@ -0,0 +1,2 @@ +>NP_949347.1 +MQGHHFGGDMSNSEAIDNTTAKLRLAQSSSLLALALLIGSAPAQAADTDWGWLAIGAPAATAQGWTGKGVVIGVVDTGIDFSHPALSGRAFDYNYGSFVAGSNHPHATHVAGIIGATDINRGMEGVAPDVRFSSMKIFTGAGGSYLGDAAVADAYDGAIGSGVRIFNNSWGSSDSIANFTSREELLAHEPLLVGAFTRAVNADAVLVWSTGNDGRSQPSWQAAAPYYIQELKANWIAVTSVGENGTIASYANACGVAKAWCLAAPGGDFNPGIYSTIPGKDYGYMSGTSMAAPYVTGATAIARQMFPKASGAQLAQIVLQTSRDIGAPGIDDVYGWGLLAVDNIVDTINPRGAALFASAAWGRFTTLSAIGNTVLDRISDLRNGRGDVVTAPLAFAG \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/psortb_pos.fa Thu Dec 18 16:15:09 2025 +0000 @@ -0,0 +1,2 @@ +>SAK_BPP42 +MLKRSLLFLTVLLLLFSFSSITNEVSASSSFDKGKYKKGDDASYFEPTGPYLMVNVTGVDGKRNELLSPRYVEFPIKPGTTLTKEKIEYYVEWALDATAYKEFRVVELDPSAKIEVTYYDKNKKKEETKSFPITEKGFVVPDLSEHIKNPGFNLITKVVIEKK
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/psortb_pos_output.txt Thu Dec 18 16:15:09 2025 +0000 @@ -0,0 +1,22 @@ +SeqID: SAK_BPP42 + Analysis Report: + CMSVM+ Unknown [No details] + CWSVM+ Unknown [No details] + CytoSVM+ Unknown [No details] + ECSVM+ Extracellular [No details] + ModHMM+ Unknown [1 internal helix found] + Motif+ Unknown [No motifs found] + Profile+ Unknown [No matches to profiles found] + SCL-BLAST+ Extracellular [matched 134189: Staphylokinase precursor (Neutral proteinase) (Protease III)] + SCL-BLASTe+ Unknown [No matches against database] + Signal+ Non-Cytoplasmic [Signal peptide detected] + Localization Scores: + Extracellular 9.98 + Cellwall 0.02 + CytoplasmicMembrane 0.00 + Cytoplasmic 0.00 + Final Prediction: + Extracellular 9.98 + +------------------------------------------------------------------------------- +
