Mercurial > repos > galaxyp > map_peptides_to_bed
changeset 1:db90662d26f9 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/map_peptides_to_bed commit a2d6625c1d0c65af83ce12614022f30ccd610444-dirty
author | galaxyp |
---|---|
date | Mon, 22 Feb 2016 17:30:45 -0500 |
parents | 51f8f9041724 |
children | 78b8213e122d |
files | map_peptides_to_bed.py map_peptides_to_bed.xml |
diffstat | 2 files changed, 72 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/map_peptides_to_bed.py Mon Jan 25 15:32:49 2016 -0500 +++ b/map_peptides_to_bed.py Mon Feb 22 17:30:45 2016 -0500 @@ -232,7 +232,7 @@ parser.add_option( '-t', '--translated_bed', dest='translated_bed', default=None, help='A bed file with added 13th column having a translation' ) parser.add_option( '-i', '--input', dest='input', default=None, help='Tabular file with peptide_sequence column' ) parser.add_option( '-p', '--peptide_column', type='int', dest='peptide_column', default=1, help='column ordinal with peptide sequence' ) - parser.add_option( '-n', '--name_column', type='int', dest='name_column', default=None, help='column ordinal with protein name' ) + parser.add_option( '-n', '--name_column', type='int', dest='name_column', default=2, help='column ordinal with protein name' ) parser.add_option( '-s', '--start_column', type='int', dest='start_column', default=None, help='column with peptide start position in protein' ) parser.add_option( '-B', '--bed', dest='bed', default=None, help='Output a bed file with added 13th column having translation' ) ## parser.add_option( '-G', '--gff3', dest='gff', default=None, help='Output translations to a GFF3 file' )
--- a/map_peptides_to_bed.xml Mon Jan 25 15:32:49 2016 -0500 +++ b/map_peptides_to_bed.xml Mon Feb 22 17:30:45 2016 -0500 @@ -1,4 +1,5 @@ <tool id="map_peptides_to_bed" name="Map peptides to a bed file" version="0.1.0"> + <description>for viewing in a genome browser</description> <requirements> <requirement type="package" version="1.62">biopython</requirement> </requirements> @@ -22,15 +23,22 @@ --bed="$mapped_peptides" ]]></command> <inputs> - <param name="translated_bed" type="data" format="bed" label="Translated bed with IDs to match in the input" help=""/> - <param name="input" type="data" format="tabular" label="Identified Peptides" help=""/> - <param name="peptide_column" type="data_column" data_ref="input" label="peptide column" optional="true" - help="Defaults to first column"/> - <param name="name_column" type="data_column" data_ref="input" label="protein name column" optional="true" - help="The name in this column must match the name column in the Translate bed"/> - <param name="start_column" type="data_column" data_ref="input" label="peptide offset column" optional="true" - help="The offset in AnimoAcids of the peptide from the start of the protein sequence"/> - <param name="gffTags" type="boolean" truevalue="--gffTags" falsevalue="" checked="true" label="Use #gffTags in output" help=""/> + <param name="translated_bed" type="data" format="bed" label="Translated BED" + help="mapping Protein IDs from a Protein Search fasta to a reference genome"/> + <param name="input" type="data" format="tabular" label="Identified Peptides" + help="Such as a PSM (Peptide Spectral Match) report from a Proteomics Search Application"/> + <param name="peptide_column" type="data_column" data_ref="input" label="PSM peptide column" optional="true" + help="Contains the peptide amino acid sequence. Defaults to first column"/> + <param name="name_column" type="data_column" data_ref="input" label="PSM protein name column" optional="true" + help="The name in this column must match the name column in the Translate BED input. Defaults to second column." /> + <param name="start_column" type="data_column" data_ref="input" label="PSM peptide offset column (optional)" optional="true"> + <help>The offset in AnimoAcids of the peptide from the start of the protein sequence. + If this column is not available, the application will expect the Translated BED file + to have a 13th column with the protein sequence from which to determine the offset + </help> + </param> + <param name="gffTags" type="boolean" truevalue="--gffTags" falsevalue="" checked="true" label="Use #gffTags in output" + help="and use the peptide as the display name for the entry"/> </inputs> <outputs> <data name="mapped_peptides" format="bed" /> @@ -45,7 +53,60 @@ </test> </tests> <help><![CDATA[ - Usage: map_peptides_to_bed.py [options] +**Map peptides to a bed file** + +This tool is intended to map peptides identified by Mass Spectrometry relative to the protein sequence in the Proteomics Search database. + +It generates a BED file that maps the location of peptides within proteins mapped to a reference genome so that they can be displayed in a genome browser. + + +The input is a tabular file that has columns containing: peptide, protein ID, and optionally the offset of the peptide from the start of the protein sequence. + +The other input is a BED file decribing the location of protein sequences relative to a reference genome. +This file can be produced by the Translate BED Sequences tool when generating the fasta file of translations. + +The output is a BED file with lines from the input BED file that had peptide matches. +The ID, thichStart, and thickEnd fields are changed to reflect the peptide match to that protein entry. + + +Inputs: + + - A BED file for the Proteins contained in a Proteomics Search Database. + (Should have a 13th column with the protein sequence.) + + - A tabular file (Pepetide Spectral Matach report) that includes columns that contain the protein identifer and the peptide sequence. + + +Output: + + - A BED file with an entry from the input BED file for each mapped input peptide, + with the ID, thichStart, and thickEnd fields are changed to reflect the peptide. + + +Example: + + Input BED file :: + + track name="novel_junction_peptides" type=bedDetail description="test" + 5 90931315 90932985 JUNC00034987_3 1 + 90931315 90932985 255,0,0 2 118,74 0,1596 AWRRGPAASRCSGAVWAEGLCEPRASPSARGAASGAAAGGPAERRDSIFQRLPLSASPFSHLFK + X 157593859 157598461 JUNC00080388_3 11 - 157593859 157598461 255,0,0 2 14,151 0,4451 SPGLVRMVLCRPRPFLFPFGVSAPGREPLRAPAASACALPRGASVRPSKEIICVF + + + Input PSM (Peptide Spectral Match) file :: + + 25895 JUNC00034987_3 ASPSARGAASGAAAGGPAER + 15253 JUNC00080388_3 APAASACALPR + + + Output BED file:: + + track name="novel_junction_peptides" type=bedDetail description="test" + #gffTags + 5 90931315 90932985 ID=JUNC00034987_3;Name=ASPSARGAASGAAAGGPAER 1 + 90931387 90932925 255,0,0 2 118,74 0,1596 ASPSARGAASGAAAGGPAER AWRRGPAASRCSGAVWAEGLCEPRASPSARGAASGAAAGGPAERRDSIFQRLPLSASPFSHLFK + X 157593859 157598461 ID=JUNC00080388_3;Name=APAASACALPR 11 - 157598338 157598371 255,0,0 2 14,151 0,4451 APAASACALPR SPGLVRMVLCRPRPFLFPFGVSAPGREPLRAPAASACALPRGASVRPSKEIICVF + + +Usage: map_peptides_to_bed.py [options] Options: -h, --help show this help message and exit