# HG changeset patch # User iracooke # Date 1340332248 14400 # Node ID a929e27eb20395d454f80882b783a6296408d2ce Uploaded diff -r 000000000000 -r a929e27eb203 display_applications/proteomics/PepXml.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/proteomics/PepXml.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,18 @@ + + + + + + + ${site_url}/init_local?file=${encoded_filename.qp}&type=pepxml + + + + #import binascii + ${binascii.hexlify( $pepxml_file.file_name )} + + + ${BASE_URL.split(":")[1][2:]} + + + diff -r 000000000000 -r a929e27eb203 display_applications/proteomics/ProtXml.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/proteomics/ProtXml.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,18 @@ + + + + + + + ${site_url}/init_local?file=${encoded_filename.qp}&type=protxml + + + + #import binascii + ${binascii.hexlify( $protxml_file.file_name )} + + + ${BASE_URL.split(":")[1][2:]} + + + \ No newline at end of file diff -r 000000000000 -r a929e27eb203 display_applications/proteomics/mzML.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/proteomics/mzML.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,18 @@ + + + + + + + ${site_url}/init_local?file=${encoded_filename.qp}&type=mzml + + + + #import binascii + ${binascii.hexlify( $mzml_file.file_name )} + + + ${BASE_URL.split(":")[1][2:]} + + + \ No newline at end of file diff -r 000000000000 -r a929e27eb203 interprophet.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interprophet.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,43 @@ + + protk + Combine Peptide Prophet results from multiple search engines + + + + interprophet_wrapper.rb $output $use_nss $use_nrs $use_nse $use_nsi $use_nsm --minprob $minprob + + + ## Inputs. + ${first_input} + #for $input_file in $input_files: + ${input_file.additional_input} + #end for + + + + + + + + + + + + + + + + + + + + + + + + + + Run InterProphet + + + diff -r 000000000000 -r a929e27eb203 interprophet_wrapper.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interprophet_wrapper.rb Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,57 @@ +require 'pathname' + +$VERBOSE=nil + +# Hard-Coded argument order and number of arguments +# +actual_output_path_string=ARGV[0] +use_nss=ARGV[1] +use_nrs=ARGV[2] +use_nse=ARGV[3] +use_nsi=ARGV[4] +use_nsm=ARGV[5] +minprob=ARGV[6] +minprob_val=ARGV[7] + +wd= Dir.pwd +original_input_files=ARGV.drop(7) +# End hard coded args # + +cmd="" + +output_substitution_cmds="" + +input_files=original_input_files.collect do |input| + + # We append ".pep.xml" to the input file name because interprophet can't handle anything else + # In order for this to work properly we need to create a symbolic link our working directory + # + original_input_path=Pathname.new("#{input}") + actual_input_path_string="#{wd}/#{original_input_path.basename}.pep.xml" + + cmd << "ln -s #{input} #{actual_input_path_string};" + output_substitution_cmds << "ruby -pi -e \"gsub('#{actual_input_path_string}', '#{input}.pep.xml')\" interprophet_output.pep.xml;" + actual_input_path_string +end + +interprophet_path=%x[which interprophet.rb] +cmd << interprophet_path.chomp + +cmd << " --no-nss" unless use_nss=="blank" +cmd << " --no-nrs" unless use_nrs=="blank" +cmd << " --no-nse" unless use_nse=="blank" +cmd << " --no-nsi" unless use_nsi=="blank" +cmd << " --no-nsm" unless use_nsm=="blank" + + +input_files.each { |input| + cmd << " #{input}" +} + + +cmd << " -o interprophet_output.pep.xml -r" + +cmd << ";#{output_substitution_cmds}" + +%x[#{cmd}] + diff -r 000000000000 -r a929e27eb203 lib/galaxy/datatypes/proteomics.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/proteomics.py Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,181 @@ +""" +Proteomics format classes +""" +import logging +import re +from galaxy.datatypes.data import * +from galaxy.datatypes.xml import * +from galaxy.datatypes.sniff import * +from galaxy.datatypes.binary import * + +log = logging.getLogger(__name__) + + +class Xls( Binary ): + """Class describing a binary excel spreadsheet file""" + file_ext = "xls" + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = "Excel Spreadsheet file" + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def display_peek( self, dataset ): + try: + return dataset.peek + except: + return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) ) + +class PepXml(GenericXml): + """pepXML data""" + file_ext = "pepxml" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'pepXML data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def sniff( self, filename ): + """ + Determines whether the file is pepXML + """ + #TODO - Use a context manager on Python 2.5+ to close handle + handle = open(filename) + xmlns_re = re.compile(".*pepXML\"") + for i in range(3): + line = handle.readline() + if xmlns_re.match(line.strip()): + handle.close() + return True + + handle.close() + return False + +class MzML( GenericXml ): + """mzML data""" + file_ext = "mzml" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'mzML Mass Spectrometry data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def sniff( self, filename ): + handle = open(filename) + xmlns_re = re.compile("^', + 'xmlns="http://regis-web.systemsbiology.net/protXML"' ] + + for i, line in enumerate( file( filename ) ): + if i >= len( pepxml_header ): + return True + line = line.rstrip( '\n\r' ) + if protxml_header[ i ] not in line: + return False + + + +class MzXML( Text ): + """mzXML data""" + file_ext = "mzXML" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'mzXML Mass Spectrometry data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def sniff( self, filename ): + mzxml_header = [ '', + '' ] + for i, line in enumerate( file( filename ) ): + if i >= len( mzxml_header ): + return True + line = line.rstrip( '\n\r' ) + if line != mzxml_header[ i ]: + return False + +class Mgf( Text ): + """Mascot Generic Format data""" + file_ext = "mgf" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'mgf Mascot Generic Format' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + + def sniff( self, filename ): + mgf_begin_ions = "BEGIN IONS" + max_lines=100 + + for i, line in enumerate( file( filename ) ): + line = line.rstrip( '\n\r' ) + if line==mgf_begin_ions: + return True + if i>max_lines: + return False + + +class MascotDat( Text ): + """Mascot search results """ + file_ext = "mascotdat" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'mascotdat Mascot Search Results' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + + def sniff( self, filename ): + mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)" + max_lines=10 + + for i, line in enumerate( file( filename ) ): + line = line.rstrip( '\n\r' ) + if line==mime_version: + return True + if i>max_lines: + return False diff -r 000000000000 -r a929e27eb203 make_decoy.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_decoy.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,26 @@ + + + protk + + + Creates a random protein database with similar properties to a real protein database + + make_decoy.rb $input_file -o $output -L $length -P $prefix $append + + + + + + + + + + + + + + + Create random protein sequences + + + diff -r 000000000000 -r a929e27eb203 mzml_to_mgf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mzml_to_mgf.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,25 @@ + + + protk + + + Converts an mzML file to mgf suitable for searching by omssa + + file_convert.rb $input_file -o $output $maldi + + + + + + + + + + + + + + Convert line spectra to Mascot Generic Format + + + diff -r 000000000000 -r a929e27eb203 omssa.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/omssa.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,150 @@ + + + protk + + + Run an OMSSA MS/MS Search + + #if $database.source_select=="built_in": + omssa_search.rb -d $database.dbkey + #else #omssa_search.rb -d $database.fasta_file + #end if + + --var-mods=' + $variable_mods + ' + + --fix-mods=' + $fixed_mods + ' + + --searched-ions=' + $searched_ions + ' + + $input_file -o $output -r --enzyme=$enzyme --precursor-ion-tol-units=$precursor_tolu -v $missed_cleavages -f $fragment_ion_tol -p $precursor_ion_tol --num-peaks-for-multi-isotope-search $multi_isotope $use_average_mass $respect_precursor_charges --max-hit-expect $max_hit_expect --intensity-cut-off $intensity_cut_off + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Run an OMSSA Search + + diff -r 000000000000 -r a929e27eb203 peptide_prophet.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptide_prophet.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,66 @@ + + protk + Calculate Peptide Prophet statistics on search results + + peptide_prophet_wrapper.rb ${output} ${input_file} -r $glyco $useicat $phospho $usepi $usert $accurate_mass $no_ntt $no_nmc $use_gamma $use_only_expect $force_fit $allow_alt_instruments $maldi + + + + + + + + + + + + + + + + + + + + + + + + + + + + Run Peptide Prophet + + + + + + diff -r 000000000000 -r a929e27eb203 peptide_prophet_wrapper.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptide_prophet_wrapper.rb Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,38 @@ +require 'pathname' + +$VERBOSE=nil + +peptide_prophet_path=%x[which peptide_prophet.rb] + +actual_output_path_string=ARGV.shift + +# Second argument is the original input file name ... we'll change this below +original_input_file=ARGV[0] + +# Before doing anything we append create a link to the input file in our working dir with ".pep.xml" appended to the input +# name because peptide prophet can't handle anything else + +wd= Dir.pwd + +original_input_path=Pathname.new("#{original_input_file}") +actual_input_path_string="#{wd}/#{original_input_path.basename}.pep.xml" +full_tmp_output_path_string="#{wd}/peptide_prophet_output.pep.xml" + +cmd = "ln -s #{original_input_file} #{actual_input_path_string};" + +cmd << peptide_prophet_path.chomp + + +ARGV[0]="#{actual_input_path_string}" + +ARGV.each { |a| + cmd << " #{a}" +} + +cmd << " -o peptide_prophet_output.pep.xml" + +# Finally we need to fix up the output file so any references to the temporary working file are changed to refs to the original input file +cmd << ";ruby -pi -e \"gsub('#{actual_input_path_string}', '#{original_input_file}')\" peptide_prophet_output.pep.xml" +cmd << ";ruby -pi -e \"gsub('#{full_tmp_output_path_string}', '#{actual_output_path_string}')\" peptide_prophet_output.pep.xml" + +%x[#{cmd}] diff -r 000000000000 -r a929e27eb203 pepxml_to_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pepxml_to_table.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,23 @@ + + protk + Converts a pepXML file to a tab delimited text file + + + +pepxml_to_table.rb $input_file -o $output + + + + + + + + + + + + + Convert a pepXML file to Tab delimited text + + + diff -r 000000000000 -r a929e27eb203 protein_prophet.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein_prophet.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,54 @@ + + protk + Calculate Protein Prophet statistics on search results + + + + protein_prophet_wrapper.rb $output $input_file -r $iproph $nooccam $groupwts $normprotlen $logprobs $confem $allpeps $unmapped $instances $delude --minprob=$minprob --minindep=$minindep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Run Peptide Prophet + + + diff -r 000000000000 -r a929e27eb203 protein_prophet_wrapper.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein_prophet_wrapper.rb Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,37 @@ +require 'pathname' + +$VERBOSE=nil + +protein_prophet_path=%x[which protein_prophet.rb] + +actual_output_path_string=ARGV.shift + +# Second argument is the original input file name ... we'll change this below +original_input_file=ARGV[0] + +# Before doing anything we append create a link to the input file in our working dir with ".pep.xml" appended to the input +# name because peptide prophet can't handle anything else + +wd= Dir.pwd + +original_input_path=Pathname.new("#{original_input_file}") +actual_input_path_string="#{wd}/#{original_input_path.basename}.pep.xml" + +cmd = "ln -s #{original_input_file} #{actual_input_path_string};" + +cmd << protein_prophet_path.chomp + + +ARGV[0]="#{actual_input_path_string}" + +ARGV.each { |a| + + cmd << " #{a}" +} + +cmd << " -o protein_prophet_results.prot.xml" + +cmd << ";ruby -pi -e \"gsub('#{actual_input_path_string}', '#{original_input_file}.pep.xml')\" protein_prophet_results.prot.xml" + +%x[#{cmd}] + diff -r 000000000000 -r a929e27eb203 tandem.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tandem.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,118 @@ + + protk + Run an X!Tandem Search + + + #if $database.source_select=="built_in": + tandem_search.rb -d $database.dbkey + #else #tandem_search.rb -d $database.fasta_file + #end if + + --var-mods=' + $variable_mods + #for $custom_variable_mod in $custom_variable_mods: + ,${custom_variable_mod.custom_mod} + #end for + ' + + --fix-mods=' + $fixed_mods + #for $custom_fix_mod in $custom_fix_mods: + ,${custom_fix_mod.custom_mod} + #end for + ' + + $input_file -o $output -r --enzyme=$enzyme --precursor-ion-tol-units=$precursor_tolu -v $missed_cleavages -f $fragment_ion_tol -p $precursor_ion_tol $allow_multi_isotope_search --keep-params-files + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Run an X!Tandem Search + + + diff -r 000000000000 -r a929e27eb203 tool-data/datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/datatypes_conf.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r a929e27eb203 tool-data/mascot_databases.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/mascot_databases.loc.sample Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,13 @@ +#This file lists the names of protein databases installed on Mascot +# +#In order to use interprophet to combine results from different search engines +#it is important that all searches are performed on the same database +#you should therefore ensure that each database installed on mascot has an equivalent +#database installed in the Protk databases directory (databases used by omssa and x!tandem) +#the mascot_to_pepxml tool will ask for this database when performing the conversion. +# +# Entries should follow the be structured as follows +# Display_name dbkey dbNameOnMascot dbkey +# +Swissprot spall_ SPAll spall_ +Swissprot Human sphuman_ SPHuman sphuman_ \ No newline at end of file diff -r 000000000000 -r a929e27eb203 tool-data/mascot_mods.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/mascot_mods.loc.sample Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,77 @@ +#This file lists the names of chemical modifications acceptable for proteomics search engines +# +# +Acetyl (K) acetyl_k_ Acetyl (K) acetyl_k_ +Acetyl (N-term) acetyl_n-term_ Acetyl (N-term) acetyl_n-term_ +Acetyl (Protein N-term) acetyl_proteinn-term_ Acetyl (Protein N-term) acetyl_proteinn-term_ +Amidated (C-term) amidated_c-term_ Amidated (C-term) amidated_c-term_ +Amidated (Protein C-term) amidated_proteinc-term_ Amidated (Protein C-term) amidated_proteinc-term_ +Ammonia-loss (N-term C) ammonia-loss_n-termc_ Ammonia-loss (N-term C) ammonia-loss_n-termc_ +Biotin (K) biotin_k_ Biotin (K) biotin_k_ +Biotin (N-term) biotin_n-term_ Biotin (N-term) biotin_n-term_ +Carbamidomethyl (C) carbamidomethyl_c_ Carbamidomethyl (C) carbamidomethyl_c_ +Carbamyl (K) carbamyl_k_ Carbamyl (K) carbamyl_k_ +Carbamyl (N-term) carbamyl_n-term_ Carbamyl (N-term) carbamyl_n-term_ +Carboxymethyl (C) carboxymethyl_c_ Carboxymethyl (C) carboxymethyl_c_ +Cation:Na (C-term) cation_na_c-term_ Cation:Na (C-term) cation_na_c-term_ +Cation:Na (DE) cation_na_de_ Cation:Na (DE) cation_na_de_ +Deamidated (NQ) deamidated_nq_ Deamidated (NQ) deamidated_nq_ +Deamidated-N (N) deamidated-n_n_ Deamidated-N (N) deamidated-n_n_ +Dehydrated (N-term C) dehydrated_n-termc_ Dehydrated (N-term C) dehydrated_n-termc_ +Dehydro (C) dehydro_c_ Dehydro (C) dehydro_c_ +Dioxidation (M) dioxidation_m_ Dioxidation (M) dioxidation_m_ +Ethanolyl (C) ethanolyl_c_ Ethanolyl (C) ethanolyl_c_ +ExacTagAmine (K) exactagamine_k_ ExacTagAmine (K) exactagamine_k_ +ExacTagThiol (C) exactagthiol_c_ ExacTagThiol (C) exactagthiol_c_ +Formyl (N-term) formyl_n-term_ Formyl (N-term) formyl_n-term_ +Formyl (Protein N-term) formyl_proteinn-term_ Formyl (Protein N-term) formyl_proteinn-term_ +Gln->pyro-Glu (N-term Q) gln_pyro-glu_n-termq_ Gln->pyro-Glu (N-term Q) gln_pyro-glu_n-termq_ +Glu->pyro-Glu (N-term E) glu_pyro-glu_n-terme_ Glu->pyro-Glu (N-term E) glu_pyro-glu_n-terme_ +Guanidinyl (K) guanidinyl_k_ Guanidinyl (K) guanidinyl_k_ +ICAT-C (C) icat-c_c_ ICAT-C (C) icat-c_c_ +ICAT-C:13C(9) (C) icat-c_13c_9__c_ ICAT-C:13C(9) (C) icat-c_13c_9__c_ +ICPL (K) icpl_k_ ICPL (K) icpl_k_ +ICPL (Protein N-term) icpl_proteinn-term_ ICPL (Protein N-term) icpl_proteinn-term_ +ICPL:13C(6) (K) icpl_13c_6__k_ ICPL:13C(6) (K) icpl_13c_6__k_ +ICPL:13C(6) (Protein N-term) icpl_13c_6__proteinn-term_ ICPL:13C(6) (Protein N-term) icpl_13c_6__proteinn-term_ +ICPL:13C(6)2H(4) (K) icpl_13c_6_2h_4__k_ ICPL:13C(6)2H(4) (K) icpl_13c_6_2h_4__k_ +ICPL:13C(6)2H(4) (N-term) icpl_13c_6_2h_4__n-term_ ICPL:13C(6)2H(4) (N-term) icpl_13c_6_2h_4__n-term_ +ICPL:13C(6)2H(4) (Protein N-term) icpl_13c_6_2h_4__proteinn-term_ ICPL:13C(6)2H(4) (Protein N-term) icpl_13c_6_2h_4__proteinn-term_ +ICPL:2H(4) (K) icpl_2h_4__k_ ICPL:2H(4) (K) icpl_2h_4__k_ +ICPL:2H(4) (Protein N-term) icpl_2h_4__proteinn-term_ ICPL:2H(4) (Protein N-term) icpl_2h_4__proteinn-term_ +iTRAQ4plex (K) itraq4plex_k_ iTRAQ4plex (K) itraq4plex_k_ +iTRAQ4plex (N-term) itraq4plex_n-term_ iTRAQ4plex (N-term) itraq4plex_n-term_ +iTRAQ4plex (Y) itraq4plex_y_ iTRAQ4plex (Y) itraq4plex_y_ +iTRAQ8plex (K) itraq8plex_k_ iTRAQ8plex (K) itraq8plex_k_ +iTRAQ8plex (N-term) itraq8plex_n-term_ iTRAQ8plex (N-term) itraq8plex_n-term_ +iTRAQ8plex (Y) itraq8plex_y_ iTRAQ8plex (Y) itraq8plex_y_ +Label:18O(1) (C-term) label_18o_1__c-term_ Label:18O(1) (C-term) label_18o_1__c-term_ +Label:18O(2) (C-term) label_18o_2__c-term_ Label:18O(2) (C-term) label_18o_2__c-term_ +Met->Hse (C-term M) met_hse_c-termm_ Met->Hse (C-term M) met_hse_c-termm_ +Met->Hsl (C-term M) met_hsl_c-termm_ Met->Hsl (C-term M) met_hsl_c-termm_ +Methyl (C-term) methyl_c-term_ Methyl (C-term) methyl_c-term_ +Methyl (DE) methyl_de_ Methyl (DE) methyl_de_ +Methylthio (C) methylthio_c_ Methylthio (C) methylthio_c_ +mTRAQ (K) mtraq_k_ mTRAQ (K) mtraq_k_ +mTRAQ (N-term) mtraq_n-term_ mTRAQ (N-term) mtraq_n-term_ +mTRAQ (Y) mtraq_y_ mTRAQ (Y) mtraq_y_ +mTRAQ:13C(3)15N(1) (K) mtraq_13c_3_15n_1__k_ mTRAQ:13C(3)15N(1) (K) mtraq_13c_3_15n_1__k_ +mTRAQ:13C(3)15N(1) (N-term) mtraq_13c_3_15n_1__n-term_ mTRAQ:13C(3)15N(1) (N-term) mtraq_13c_3_15n_1__n-term_ +mTRAQ:13C(3)15N(1) (Y) mtraq_13c_3_15n_1__y_ mTRAQ:13C(3)15N(1) (Y) mtraq_13c_3_15n_1__y_ +NIPCAM (C) nipcam_c_ NIPCAM (C) nipcam_c_ +Oxidation (HW) oxidation_hw_ Oxidation (HW) oxidation_hw_ +Oxidation (M) oxidation_m_ Oxidation (M) oxidation_m_ +Phospho (ST) phospho_st_ Phospho (ST) phospho_st_ +Phospho (Y) phospho_y_ Phospho (Y) phospho_y_ +Propionamide (C) propionamide_c_ Propionamide (C) propionamide_c_ +Pyridylethyl (C) pyridylethyl_c_ Pyridylethyl (C) pyridylethyl_c_ +Pyro-carbamidomethyl (N-term C) pyro-carbamidomethyl_n-termc_ Pyro-carbamidomethyl (N-term C) pyro-carbamidomethyl_n-termc_ +Sulfo (S) sulfo_s_ Sulfo (S) sulfo_s_ +Sulfo (T) sulfo_t_ Sulfo (T) sulfo_t_ +Sulfo (Y) sulfo_y_ Sulfo (Y) sulfo_y_ +TMT (K) tmt_k_ TMT (K) tmt_k_ +TMT (N-term) tmt_n-term_ TMT (N-term) tmt_n-term_ +TMT2plex (K) tmt2plex_k_ TMT2plex (K) tmt2plex_k_ +TMT2plex (N-term) tmt2plex_n-term_ TMT2plex (N-term) tmt2plex_n-term_ +TMT6plex (K) tmt6plex_k_ TMT6plex (K) tmt6plex_k_ +TMT6plex (N-term) tmt6plex_n-term_ TMT6plex (N-term) tmt6plex_n-term_ diff -r 000000000000 -r a929e27eb203 tool-data/omssa_mods.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/omssa_mods.loc.sample Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,181 @@ +#This file lists the names of chemical modifications accepted by OMMSA +# +# +2-amino-3-oxo-butanoic acid T 2-amino-3-oxo-butanoicacidt_ 23 2-amino-3-oxo-butanoicacidt_ +Asparagine HexNAc asparaginehexnac_ 182 asparaginehexnac_ +Asparagine dHexHexNAc asparaginedhexhexnac_ 183 asparaginedhexhexnac_ +CAMthiopropanoyl K camthiopropanoylk_ 131 camthiopropanoylk_ +CHD2-di-methylation of K chd2-di-methylationofk_ 189 chd2-di-methylationofk_ +CHD2-di-methylation of peptide n-term chd2-di-methylationofpeptiden-term_ 190 chd2-di-methylationofpeptiden-term_ +ICAT heavy icatheavy_ 130 icatheavy_ +ICAT light icatlight_ 129 icatlight_ +M cleavage from protein n-term mcleavagefromproteinn-term_ 9 mcleavagefromproteinn-term_ +MMTS on C mmtsonc_ 179 mmtsonc_ +Maleimide-PEO2-Biotin of C maleimide-peo2-biotinofc_ 191 maleimide-peo2-biotinofc_ +NEM C nemc_ 83 nemc_ +NIPCAM nipcam_ 84 nipcam_ +O18 on peptide n-term o18onpeptiden-term_ 87 o18onpeptiden-term_ +PNGasF in O18 water pngasfino18water_ 139 pngasfino18water_ +SeMet semet_ 113 semet_ +Serine HexNAc serinehexnac_ 184 serinehexnac_ +TMT 6-plex on K tmt6-plexonk_ 198 tmt6-plexonk_ +TMT 6-plex on n-term peptide tmt6-plexonn-termpeptide_ 199 tmt6-plexonn-termpeptide_ +Threonine HexNAc threoninehexnac_ 185 threoninehexnac_ +Uniblue A on K uniblueaonk_ 195 uniblueaonk_ +acetylation of K acetylationofk_ 24 acetylationofk_ +acetylation of protein n-term acetylationofproteinn-term_ 10 acetylationofproteinn-term_ +amidation of peptide c-term amidationofpeptidec-term_ 25 amidationofpeptidec-term_ +arginine to ornithine argininetoornithine_ 163 argininetoornithine_ +beta elimination of S betaeliminationofs_ 140 betaeliminationofs_ +beta elimination of T betaeliminationoft_ 141 betaeliminationoft_ +beta methythiolation of D betamethythiolationofd_ 13 betamethythiolationofd_ +beta-carboxylation of D beta-carboxylationofd_ 47 beta-carboxylationofd_ +beta-methylthiolation of D (duplicate of 13) beta-methylthiolationofd_duplicateof13__ 26 beta-methylthiolationofd_duplicateof13__ +carbamidomethyl C carbamidomethylc_ 3 carbamidomethylc_ +carbamylation of K carbamylationofk_ 31 carbamylationofk_ +carbamylation of n-term peptide carbamylationofn-termpeptide_ 32 carbamylationofn-termpeptide_ +carboxyamidomethylation of D carboxyamidomethylationofd_ 29 carboxyamidomethylationofd_ +carboxyamidomethylation of E carboxyamidomethylationofe_ 30 carboxyamidomethylationofe_ +carboxyamidomethylation of H carboxyamidomethylationofh_ 28 carboxyamidomethylationofh_ +carboxyamidomethylation of K carboxyamidomethylationofk_ 27 carboxyamidomethylationofk_ +carboxykynurenin of W carboxykynureninofw_ 165 carboxykynureninofw_ +carboxymethyl C carboxymethylc_ 2 carboxymethylc_ +carboxymethylated selenocysteine carboxymethylatedselenocysteine_ 207 carboxymethylatedselenocysteine_ +citrullination of R citrullinationofr_ 33 citrullinationofr_ +deamidation of N deamidationofn_ 196 deamidationofn_ +deamidation of N and Q deamidationofnandq_ 4 deamidationofnandq_ +dehydro of S and T dehydroofsandt_ 164 dehydroofsandt_ +di-O18 on peptide n-term di-o18onpeptiden-term_ 88 di-o18onpeptiden-term_ +di-iodination of Y di-iodinationofy_ 35 di-iodinationofy_ +di-methylation of K di-methylationofk_ 36 di-methylationofk_ +di-methylation of R di-methylationofr_ 37 di-methylationofr_ +di-methylation of peptide n-term di-methylationofpeptiden-term_ 38 di-methylationofpeptiden-term_ +farnesylation of C farnesylationofc_ 42 farnesylationofc_ +fluorophenylalanine fluorophenylalanine_ 46 fluorophenylalanine_ +formylation of K formylationofk_ 43 formylationofk_ +formylation of peptide n-term formylationofpeptiden-term_ 44 formylationofpeptiden-term_ +formylation of protein n-term formylationofproteinn-term_ 82 formylationofproteinn-term_ +gamma-carboxylation of E gamma-carboxylationofe_ 48 gamma-carboxylationofe_ +gammathiopropionylation of K gammathiopropionylationofk_ 40 gammathiopropionylationofk_ +gammathiopropionylation of peptide n-term gammathiopropionylationofpeptiden-term_ 41 gammathiopropionylationofpeptiden-term_ +geranyl-geranyl geranyl-geranyl_ 49 geranyl-geranyl_ +glucuronylation of protein n-term glucuronylationofproteinn-term_ 50 glucuronylationofproteinn-term_ +glutathione disulfide glutathionedisulfide_ 51 glutathionedisulfide_ +guanidination of K guanidinationofk_ 53 guanidinationofk_ +heavy arginine-13C6 heavyarginine-13c6_ 136 heavyarginine-13c6_ +heavy arginine-13C6-15N4 heavyarginine-13c6-15n4_ 137 heavyarginine-13c6-15n4_ +heavy lysine - 13C6 15N2 heavylysine-13c615n2_ 181 heavylysine-13c615n2_ +heavy lysine - 2H4 heavylysine-2h4_ 180 heavylysine-2h4_ +heavy lysine-13C6 heavylysine-13c6_ 138 heavylysine-13c6_ +homoserine homoserine_ 56 homoserine_ +homoserine lactone homoserinelactone_ 57 homoserinelactone_ +hydroxylation of Y hydroxylationofy_ 64 hydroxylationofy_ +hydroxylation of D hydroxylationofd_ 59 hydroxylationofd_ +hydroxylation of F hydroxylationoff_ 63 hydroxylationoff_ +hydroxylation of K hydroxylationofk_ 60 hydroxylationofk_ +hydroxylation of N hydroxylationofn_ 61 hydroxylationofn_ +hydroxylation of P hydroxylationofp_ 62 hydroxylationofp_ +iTRAQ114 on K itraq114onk_ 168 itraq114onk_ +iTRAQ114 on Y itraq114ony_ 169 itraq114ony_ +iTRAQ114 on nterm itraq114onnterm_ 167 itraq114onnterm_ +iTRAQ115 on K itraq115onk_ 171 itraq115onk_ +iTRAQ115 on Y itraq115ony_ 172 itraq115ony_ +iTRAQ115 on nterm itraq115onnterm_ 170 itraq115onnterm_ +iTRAQ116 on K itraq116onk_ 174 itraq116onk_ +iTRAQ116 on Y itraq116ony_ 175 itraq116ony_ +iTRAQ116 on nterm itraq116onnterm_ 173 itraq116onnterm_ +iTRAQ117 on K itraq117onk_ 177 itraq117onk_ +iTRAQ117 on Y itraq117ony_ 178 itraq117ony_ +iTRAQ117 on nterm itraq117onnterm_ 176 itraq117onnterm_ +iTRAQ8plex itraq8plex_ 204 itraq8plex_ +iTRAQ8plex itraq8plex_ 205 itraq8plex_ +iTRAQ8plex itraq8plex_ 203 itraq8plex_ +iTRAQ8plex itraq8plex_ 201 itraq8plex_ +iTRAQ8plex itraq8plex_ 202 itraq8plex_ +iTRAQ8plex itraq8plex_ 200 itraq8plex_ +iodination of Y iodinationofy_ 65 iodinationofy_ +lipoyl K lipoylk_ 67 lipoylk_ +methyl C methylc_ 73 methylc_ +methyl H methylh_ 74 methylh_ +methyl N methyln_ 75 methyln_ +methyl R methylr_ 77 methylr_ +methyl ester of D methylesterofd_ 69 methylesterofd_ +methyl ester of E (duplicate of 17) methylesterofe_duplicateof17__ 70 methylesterofe_duplicateof17__ +methyl ester of S methylesterofs_ 71 methylesterofs_ +methyl ester of Y methylesterofy_ 72 methylesterofy_ +methyl ester of peptide c-term (duplicate of 18) methylesterofpeptidec-term_duplicateof18__ 68 methylesterofpeptidec-term_duplicateof18__ +methylation of D methylationofd_ 16 methylationofd_ +methylation of E methylationofe_ 17 methylationofe_ +methylation of K methylationofk_ 0 methylationofk_ +methylation of Q methylationofq_ 14 methylationofq_ +methylation of peptide c-term methylationofpeptidec-term_ 18 methylationofpeptidec-term_ +methylation of peptide n-term methylationofpeptiden-term_ 76 methylationofpeptiden-term_ +methylation of protein n-term methylationofproteinn-term_ 11 methylationofproteinn-term_ +myristoleylation of G myristoleylationofg_ 78 myristoleylationofg_ +myristoyl-4H of G myristoyl-4hofg_ 79 myristoyl-4hofg_ +myristoylation of K myristoylationofk_ 81 myristoylationofk_ +myristoylation of peptide n-term G myristoylationofpeptiden-termg_ 80 myristoylationofpeptiden-termg_ +n-acyl diglyceride cysteine n-acyldiglyceridecysteine_ 118 n-acyldiglyceridecysteine_ +n-formyl met addition n-formylmetaddition_ 22 n-formylmetaddition_ +oxidation of C oxidationofc_ 193 oxidationofc_ +oxidation of C to cysteic acid oxidationofctocysteicacid_ 34 oxidationofctocysteicacid_ +oxidation of C to sulfinic acid oxidationofctosulfinicacid_ 162 oxidationofctosulfinicacid_ +oxidation of F to dihydroxyphenylalanine oxidationofftodihydroxyphenylalanine_ 39 oxidationofftodihydroxyphenylalanine_ +oxidation of H oxidationofh_ 89 oxidationofh_ +oxidation of H to D oxidationofhtod_ 55 oxidationofhtod_ +oxidation of H to N oxidationofhton_ 54 oxidationofhton_ +oxidation of M oxidationofm_ 1 oxidationofm_ +oxidation of P to pyroglutamic acid oxidationofptopyroglutamicacid_ 111 oxidationofptopyroglutamicacid_ +oxidation of W oxidationofw_ 90 oxidationofw_ +oxidation of W to formylkynurenin oxidationofwtoformylkynurenin_ 45 oxidationofwtoformylkynurenin_ +oxidation of W to hydroxykynurenin oxidationofwtohydroxykynurenin_ 58 oxidationofwtohydroxykynurenin_ +oxidation of W to kynurenin oxidationofwtokynurenin_ 66 oxidationofwtokynurenin_ +oxidation of W to nitro oxidationofwtonitro_ 85 oxidationofwtonitro_ +oxidation of Y (duplicate of 64) oxidationofy_duplicateof64__ 194 oxidationofy_duplicateof64__ +oxidation of Y to nitro oxidationofytonitro_ 86 oxidationofytonitro_ +palmitoleyl of C palmitoleylofc_ 187 palmitoleylofc_ +palmitoleyl of S palmitoleylofs_ 186 palmitoleylofs_ +palmitoleyl of T palmitoleyloft_ 188 palmitoleyloft_ +palmitoylation of C palmitoylationofc_ 92 palmitoylationofc_ +palmitoylation of K palmitoylationofk_ 93 palmitoylationofk_ +palmitoylation of S palmitoylationofs_ 94 palmitoylationofs_ +palmitoylation of T palmitoylationoft_ 95 palmitoylationoft_ +phosphopantetheine S phosphopantetheines_ 91 phosphopantetheines_ +phosphorylation of H phosphorylationofh_ 192 phosphorylationofh_ +phosphorylation of S phosphorylationofs_ 6 phosphorylationofs_ +phosphorylation of S with ETD loss phosphorylationofswithetdloss_ 134 phosphorylationofswithetdloss_ +phosphorylation of S with prompt loss phosphorylationofswithpromptloss_ 96 phosphorylationofswithpromptloss_ +phosphorylation of T phosphorylationoft_ 7 phosphorylationoft_ +phosphorylation of T with ETD loss phosphorylationoftwithetdloss_ 135 phosphorylationoftwithetdloss_ +phosphorylation of T with prompt loss phosphorylationoftwithpromptloss_ 97 phosphorylationoftwithpromptloss_ +phosphorylation of Y phosphorylationofy_ 8 phosphorylationofy_ +phosphorylation with neutral loss on C phosphorylationwithneutrallossonc_ 99 phosphorylationwithneutrallossonc_ +phosphorylation with neutral loss on D phosphorylationwithneutrallossond_ 100 phosphorylationwithneutrallossond_ +phosphorylation with neutral loss on H phosphorylationwithneutrallossonh_ 101 phosphorylationwithneutrallossonh_ +phosphorylation with neutral loss on S phosphorylationwithneutrallossons_ 132 phosphorylationwithneutrallossons_ +phosphorylation with neutral loss on T phosphorylationwithneutrallossont_ 133 phosphorylationwithneutrallossont_ +phosphorylation with prompt loss on Y phosphorylationwithpromptlossony_ 98 phosphorylationwithpromptlossony_ +propionamide C propionamidec_ 5 propionamidec_ +propionyl heavy K propionylheavyk_ 104 propionylheavyk_ +propionyl heavy peptide n-term propionylheavypeptiden-term_ 105 propionylheavypeptiden-term_ +propionyl light K propionyllightk_ 102 propionyllightk_ +propionyl light on peptide n-term propionyllightonpeptiden-term_ 103 propionyllightonpeptiden-term_ +pyridyl K pyridylk_ 106 pyridylk_ +pyridyl peptide n-term pyridylpeptiden-term_ 107 pyridylpeptiden-term_ +pyro-cmC pyro-cmc_ 108 pyro-cmc_ +pyro-glu from n-term E pyro-glufromn-terme_ 109 pyro-glufromn-terme_ +pyro-glu from n-term Q pyro-glufromn-termq_ 110 pyro-glufromn-termq_ +s-pyridylethylation of C s-pyridylethylationofc_ 112 s-pyridylethylationofc_ +selenocysteine selenocysteine_ 206 selenocysteine_ +sulfation of Y sulfationofy_ 114 sulfationofy_ +sulphone of M sulphoneofm_ 115 sulphoneofm_ +sumoylation of K sumoylationofk_ 166 sumoylationofk_ +tri-deuteromethylation of D tri-deuteromethylationofd_ 19 tri-deuteromethylationofd_ +tri-deuteromethylation of E tri-deuteromethylationofe_ 20 tri-deuteromethylationofe_ +tri-deuteromethylation of peptide c-term tri-deuteromethylationofpeptidec-term_ 21 tri-deuteromethylationofpeptidec-term_ +tri-iodination of Y tri-iodinationofy_ 116 tri-iodinationofy_ +tri-methylation of K tri-methylationofk_ 15 tri-methylationofk_ +tri-methylation of R tri-methylationofr_ 117 tri-methylationofr_ +tri-methylation of protein n-term tri-methylationofproteinn-term_ 12 tri-methylationofproteinn-term_ +trideuteration of L (SILAC) trideuterationofl_silac__ 197 trideuterationofl_silac__ +ubiquitinylation residue ubiquitinylationresidue_ 52 ubiquitinylationresidue_ diff -r 000000000000 -r a929e27eb203 tool-data/pepxml_databases.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/pepxml_databases.loc.sample Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,13 @@ +#This file lists the names of protein databases installed locally in protk. +# These are used by omssa and x!tandem as well as the "mascot to pepxml" tool +# In order to combine search results with Interprophet searches must be run against an identical database +# +# Entries should follow the be structured as follows +# Display_name omssa_tandem_dbname dbkey +# +# +Swissprot spall_ spall spall_ +Combined PlasmboDB (falciparum) and Swissprot Human plasmodb_pfalciparum_sphuman_ plasmodb_pfalciparum_sphuman plasmodb_pfalciparum_sphuman_ +Swissprot Human sphuman_ sphuman sphuman_ +Combined Swissprot/TRembl Human sptrhuman_ sptrhuman sptrhuman_ +Swissprot Mouse spmouse_ spmouse spmouse_ diff -r 000000000000 -r a929e27eb203 tool-data/protk_display_site.txt.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/protk_display_site.txt.sample Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,3 @@ +#Proteomic Visualization application should be hosted on the same server as galaxy +#Entries in this file are of the format "site_id" site_url +Proteomics Visualize http://127.0.0.1:8500 diff -r 000000000000 -r a929e27eb203 tool-data/tandem_mods.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/tandem_mods.loc.sample Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,6 @@ +#This file lists the names of inbuilt chemical modifications accepted by X!Tandem +# +# +Carbamidomethyl C carbamidomethyl_c_ 57.021464@C carbamidomethyl_c_ +Glycocapture-N glycocapture_n_ 0.998@N!{P}[ST] glycocapture_n_ +Oxidation M oxidation_m_ 15.994915@M oxidation_m_ \ No newline at end of file diff -r 000000000000 -r a929e27eb203 xls_to_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xls_to_table.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,23 @@ + + protk + Converts an excel spreadsheet to a tab delimited text file + + + +xls_to_table.rb $input_file -o $output + + + + + + + + + + + + + Convert an Excel Spreadsheet to Tab delimited text + + +