Mercurial > repos > greg > affy2vcf2

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Thu May 16 12:42:19 2019 -0400
@@ -0,0 +1,11 @@
+name: affy2vcf
+owner: greg
+description: |
+  Contains a tool that converts Affymetrix genotype calls and intensity files to VCF format.
+homepage_url: https://github.com/freeseek/gtc2vcf
+long_description: |
+  Contains a tool that converts Affymetrix genotype calls and intensity files to VCF format.
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/convert_formats/affy2vcf
+type: unrestricted
+categories:
+  - Convert Formats
--- a/README	Thu May 16 10:59:34 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-init commit
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/affy2vcf.xml	Thu May 16 12:42:19 2019 -0400
@@ -0,0 +1,132 @@
+<tool id="affy2vcf" name="Convert Affymetrix" version="@WRAPPER_VERSION@">
+    <description>genotype calls and intensities to VCF</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="1.9">bcftools-gtc2vcf-plugin</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+@PREPARE_ENV@
+#set text_outputs_dir = 'text_outputs'
+mkdir $text_outputs_dir &&
+bcftools +\$BCFTOOLS_PLUGINS/affy2vcf.so
+#if str($reference_genome_source_cond.reference_genome_source) == "history":
+    --fasta-ref '$reference_genome_source_cond.history_item'
+#else:
+    --fasta-ref '$reference_genome_source_cond.locally_cached_item.fields.path'
+#end if
+--annot '$annot'
+--snp-posteriors '$snp_posteriors'
+--summary '$summary'
+--report '$report'
+--calls '$calls'
+--confidences '$confidences'
+#if str($output_gender_estimate_cond.output_gender_estimate) == "yes"
+    --sex '$output_gender_estimate'
+#end if
+#if str($append_version) == "no":
+    --no-version
+#end if
+--output '$output'
+--output-type $output_type
+@THREADS@
+    ]]></command>
+    <inputs>
+        <param name="annot" type="data" format="csv" label="Probeset annotation file" />
+        <param name="summary" type="data" format="txt" label="Apt-probeset genotype summary file" />
+        <param name="snp_posteriors" type="data" format="txt" label="Apt-probeset genotype snp-posteriors file" />
+        <param name="report" type="data" format="txt" label="Apt-probeset genotype report file" />
+        <param name="confidences" type="data" format="txt" label="Apt-probeset genotype confidences file" />
+        <param name="calls" type="data" format="txt" label="Apt-probeset genotype calls file" />
+        <conditional name="reference_genome_source_cond">
+            <param name="reference_genome_source" type="select" label="Choose the source for the reference genome">
+                <option value="history" selected="true">Use a reference genome from my history</option>
+                <option value="cached">Use a locally cached genome index</option>
+            </param>
+            <when value="history">
+                <param name="history_item" type="data" format="fasta" label="Select reference genome" />
+            </when>
+            <when value="cached">
+                <param name="locally_cached_item" type="select" format="fasta" label="Fasta reference sequence">
+                    <options from_data_table="all_fasta">
+                        <column name="name" index="2"/>
+                        <column name="value" index="0"/>
+                        <column name="path" index="3"/>
+                        <filter type="sort_by" column="1"/>
+                        <validator type="no_options" message="No cached Fasta genome references are available for the build associated with the selected probeset annotation file." />
+                    </options>
+                </param>
+            </when>
+        </conditional>
+        <conditional name="output_gender_estimate_cond">
+            <param name="output_gender_estimate" type="select" force_select="true" label="Output apt-probeset-genotype gender estimate?">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no"/>
+            <when value="yes"/>
+        </conditional>
+        <param name="append_version" type="select" force_select="true" label="Append version and command line to the header?">
+            <option value="no" selected="true">No</option>
+            <option value="yes">Yes</option>
+        </param>
+        <param name="output_type" type="select" force_select="true" label="Select format for output">
+            <option value="v" selected="true">Uncompressed VCF</option>
+            <option value="z">Compressed VCF</option>
+            <option value="u" selected="true">Uncompressed BCF</option>
+            <option value="b">Compressed BCF</option>
+        </param>
+    </inputs>
+    <outputs>
+        <expand macro="vcf_output"/>
+        <data name="output_gender_estimate" format="txt" label="${tool.name} (gender estimate) on ${on_string}">
+            <filter>output_gender_estimate_cond['output_gender_estimate'] == "yes"</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="reference_genome_source" value="history"/>
+            <param name="history_item" value="GCF_000222465.1_Adig_1.1_genomic.fna" ftype="fasta"/>
+            <param name="annot" value="Axion_Acropsnp_coral_Annotation_r1.csv" ftype="csv"/>
+            <param name="summary" value="AxiomGT1.summary.txt" ftype="txt"/>
+            <param name="snp_posteriors" value="AxiomGT1.snp-posteriors.txt" ftype="txt"/>
+            <param name="report" value="AxiomGT1.report.txt" ftype="txt"/>
+            <param name="confidences" value="AxiomGT1.confidences.txt" ftype="txt"/>
+            <param name="calls" value="AxiomGT1.calls.txt" ftype="txt"/>
+            <output name="output" value="output.vcf" ftype="vcf"/>
+        </test>
+    </tests>
+    <help>
+This tool converts Affymetrix genotype calls and intensity files to VCF format.
+
+-----
+
+**Required options**
+
+ * **Probeset annotation file** - probeset annotation file produced by the sequencing run.
+ * **Apt-probeset genotype summary file** - apt-probeset genotype summary file produced by the sequencing run.
+ * **Apt-probeset genotype snp-posteriors file** - apt-probeset genotype snp-posteriors file produced by the sequencing run.
+ * **Apt-probeset genotype report file** - apt-probeset genotype report file produced by the sequencing run.
+ * **Apt-probeset genotype confidences file** - apt-probeset genotype confidences file produced by the sequencing run.
+ * **Apt-probeset genotype calls file** - apt-probeset genotype calls file produced by the sequencing run.
+ * **Choose the source for the reference genome** - select a reference genome from your history or one installed into your local Galaxy environment by a data manager tool.
+
+**Other options**
+
+ * **Output apt-probeset-genotype gender estimate** - output apt-probeset-genotype gender estimate into an additional dataset.
+ * **Append version and command line to the header** - append version and command line to the header of the output VCF dataset.
+ * **Select format for output** - select one of uncompressed/compressed VCF/BCF.
+    </help>
+    <citations>
+        <expand macro="citation1"/>
+        <citation type="bibtex">
+            @misc{None,
+            journal = {None},
+            author = {Genovese, Giulio},
+            title = {None},
+            year = {None},
+            url = {https://github.com/freeseek/gtc2vcf},}
+        </citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/all_fasta.loc.sample	Thu May 16 12:42:19 2019 -0400
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3	/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu May 16 12:42:19 2019 -0400
@@ -0,0 +1,25 @@
+<macros>
+    <token name="@WRAPPER_VERSION@">1.9</token>
+    <xml name="citation1">
+        <citation type="doi">10.1093/bioinformatics/btp352</citation>
+    </xml>
+    <token name="@PREPARE_ENV@">
+<![CDATA[
+export BCFTOOLS_PLUGINS=\$(dirname `which bcftools`)/../libexec/bcftools;
+]]>
+    </token>
+    <token name="@THREADS@">
+        --threads \${GALAXY_SLOTS:-4}
+    </token>
+    <xml name="vcf_output">
+        <data name="output" format="vcf">
+            <change_format>
+                <when input="output_type" value="b" format="bcf" />
+                <when input="output_type" value="u" format="bcf" />
+                <when input="output_type" value="z" format="vcf_bgzip" />
+                <when input="output_type" value="v" format="vcf" />
+            </change_format>
+        </data>
+    </xml>
+</macros>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu May 16 12:42:19 2019 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>