diff tools/rgenetics/rgFastQC.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rgenetics/rgFastQC.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,88 @@
+<tool name="Fastqc: Fastqc QC" id="fastqc" version="0.1">
+  <description>using FastQC from Babraham</description>
+  <command interpreter="python">
+    rgFastQC.py -i $input_file -d $html_file.files_path -o $html_file -n "$out_prefix" -f $input_file.ext -e ${GALAXY_DATA_INDEX_DIR}/shared/jars/FastQC/fastqc
+#if $contaminants.dataset and str($contaminants) > ''
+-c "$contaminants"
+#end if
+  </command>
+  <requirements>
+    <requirement type="package">FastQC</requirement>
+  </requirements>
+  <inputs>
+    <param format="fastqsanger,fastq,bam,sam" name="input_file" type="data" label="Short read data from your current history" />
+    <param name="out_prefix" value="FastQC" type="text" label="Title for the output file - to remind you what the job was for" size="80" />
+    <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list" 
+           help="tab delimited file with 2 columns: name and sequence.  For example: Illumina Small RNA RT Primer	CAAGCAGAAGACGGCATACGA"/>
+  </inputs>
+  <outputs>
+    <data format="html" name="html_file"  label="${out_prefix}.html" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="input_file" value="1000gsample.fastq" />
+      <param name="out_prefix" value="fastqc_out" />
+      <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" />
+      <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="100"/>
+    </test>
+  </tests>
+  <help>
+
+.. class:: infomark
+
+**Purpose**
+
+FastQC aims to provide a simple way to do some quality control checks on raw
+sequence data coming from high throughput sequencing pipelines. 
+It provides a modular set of analyses which you can use to give a quick
+impression of whether your data has any problems of 
+which you should be aware before doing any further analysis.
+
+The main functions of FastQC are:
+
+- Import of data from BAM, SAM or FastQ files (any variant)
+- Providing a quick overview to tell you in which areas there may be problems
+- Summary graphs and tables to quickly assess your data
+- Export of results to an HTML based permanent report
+- Offline operation to allow automated generation of reports without running the interactive application
+
+**FastQC documentation**
+
+This is a Galaxy interface to the external package FastQC_.
+Specific documentation on FastQC can be found on that site.
+FastQC incorporates the Picard-tools_ libraries for sam/bam processing.
+
+ .. _FastQC: http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/
+ .. _Picard-tools: http://picard.sourceforge.net/index.shtml
+
+The contaminants file parameter was borrowed from the independently developed
+fastqcwrapper contributed to the Galaxy Community Tool Shed by J. Johnson.
+
+-----
+
+.. class:: infomark
+
+**Inputs and outputs**
+
+This wrapper will accept any fastq file as well as sam or bam as the primary file to check.
+It will also take an optional file containing a list of contaminants information, in the form of
+a tab-delimited file with 2 columns, name and sequence.
+
+The tool produces a single HTML output file that contains all of the results, including the following:
+
+- Basic Statistics
+- Per base sequence quality
+- Per sequence quality scores
+- Per base sequence content
+- Per base GC content
+- Per sequence GC content
+- Per base N content
+- Sequence Length Distribution
+- Sequence Duplication Levels
+- Overrepresented sequences
+- Kmer Content
+
+All except Basic Statistics and Overrepresented sequences are plots.
+
+</help>
+</tool>