Mercurial > repos > pieterlukasse > prims_proteomics

Binary file Csv2Apml.jar has changed
Binary file MsFilt.jar has changed
Binary file NapQ.jar has changed
Binary file PRIMS.jar has changed
Binary file ProgenesisConv.jar has changed
Binary file Quantifere.jar has changed
Binary file Quantiline.jar has changed
Binary file SedMat_cli.jar has changed
--- a/napq.xml	Fri Aug 01 17:22:37 2014 +0200
+++ b/napq.xml	Mon Jan 26 06:24:15 2015 +0100
@@ -27,8 +27,25 @@
 		<param name="namingConventionCodesForSamples" type="text" size="100" value=""
 		label="Part of run/file name that identifies the sample"
 		help="Add the CSV list of codes that occur in the file names
-			and that stand for a sample code. E.g. '_S1,_S2,_S3,etc.' "/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
-
+			and that stand for a sample code. E.g. '_S1,_S2,_S3,etc.' "> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
+			<sanitizer>
+				<!-- adding more characters to the set of "valid" ones: -->
+				<valid>
+					<add preset="string.printable"/>
+					<add value="#"/>
+					<add value="@"/>
+					<add value="$"/>
+					<add value="%"/>
+					<add value="&"/>
+					<add value="*"/>
+					<add value="["/>
+					<add value="]"/>
+					<add value="|"/>
+					<add value="{"/>
+					<add value="}"/>
+				</valid>
+			</sanitizer>
+		</param>

    		<conditional name="is2D_LC_MS">
      		<param name="fractions" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
@@ -39,7 +56,26 @@
      			label="Part of run/file name that identifies the 2D LC-MS fraction"
      			help="Add the CSV list of codes that occur in the file names
      				and that stand for a fraction code. E.g. '_F1,_F2,_F3,etc.' Use this to avoid
-     				that each (fraction) file is seen as a separate run."/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
+     				that each (fraction) file is seen as a separate run."> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
+     					<sanitizer>
+							<!-- adding more characters to the set of "valid" ones: -->
+							<valid>
+								<add preset="string.printable"/>
+								<add value="#"/>
+								<add value="@"/>
+								<add value="$"/>
+								<add value="%"/>
+								<add value="&"/>
+								<add value="*"/>
+								<add value="["/>
+								<add value="]"/>
+								<add value="|"/>
+								<add value="{"/>
+								<add value="}"/>
+							</valid>
+						</sanitizer>
+					</param>
+
      		</when>
      		<when value="No">
      		</when>
--- a/quantifere.xml	Fri Aug 01 17:22:37 2014 +0200
+++ b/quantifere.xml	Mon Jan 26 06:24:15 2015 +0100
@@ -56,6 +56,8 @@
      				way different peptide identifications from the same sample but measured
      				in different fractions can be merged together. Otherwise each (fraction) file
      				is seen as a separate sample."/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
+     				<!-- on help above: the given codes are removed from source name...separate features are clustered, not peptides, peptides
+     				     are quantified based on summing features (raw), or summing patterns : TODO document the quantification columns present in the output CSV -->
      		</when>
      		<when value="No">
      		</when>
@@ -89,15 +91,17 @@
 		sample intensity values correlation. Set here the minimum correlation expected between grouped members. This is used to guide the clustering algorithm."/>

 		<!--  simple extra heuristics to remove some "noise" protein hits  -->
-		<param name="minProtCoverage" type="float" size="10" value="5.0" label="Minimum protein coverage (%)" help="This will remove proteins that have a too small
-		portion of their sequence covered by peptide matches."/>
+		<param name="minProtCoverage" type="float" size="10" value="0.0" label="Minimum protein coverage (%)" help="Set this to e.g. 5.0 if you have protein coverage
+		information in your data. This will remove proteins that have a too small portion of their sequence covered by peptide matches."/>
+		<!-- TODO : ADD warning to report if this is left 0 and no coverage is found ...or maybe validate the other way around-->

 		<param name="minAboveAverageHits" type="integer" size="10" value="1" label="Minimum number of different peptide matches with a score above average"
 		help="This will remove proteins that do not have enough reasonable peptides hits."/>

 		<param name="minNrIdsForInferencePeptide" type="integer" size="10" value="1" label="Minimum number of peptide identifications for inference peptides"
 		help="Minimum number of peptide identifications a peptide needs to be used as inference peptide for secondary proteins."/>
-
+		<!--  currently, when one feature clusters with foreign peptide, then it is not inference peptide anymore...quite strict, could be less strict
+		      by letting user indicate for example: 90% of features should be inference features...then it is an inference pep. See QuantifereTool.inferSecondaryProteins() -->

      	<param name="functionalAnnotationCSV" type="data" format="csv,txt,tsv" optional="true"
      	label="(Functional)annotation mapping file (csv or tsv format)"
@@ -207,6 +211,20 @@
 .. _Cytoscape chartplugin: http://apps.cytoscape.org/apps/chartplugin


+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Pieter N. J. Lukasse and Antoine H. P. America (2014).
+Protein Inference Using Peptide Quantification Patterns.
+http://dx.doi.org/10.1021/pr401072g
+

   </help>
+  <citations>
+        <citation type="doi">10.1021/pr401072g</citation> <!-- example
+        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
+        -->
+   </citations>
 </tool>
Binary file static/images/napq_overview.png has changed