Mercurial > repos > cstrittmatter > mitokmer

--- a/README.md	Thu Apr 25 07:59:05 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-# kmer_id
-Mitichondrial read identification by kmer database for Galaxy server
-
-File kmer_read_m3.cpp
-uses gzip library, compile with:
-g++ -O3 -std=c++0x kmer_read_m3.cpp -o kmerread -lz
-
-database files needed in same directory:
-(can create with kmer_build, but not described here yet)
-mitochondria_data.txt
-mitochondria_refkey.txt
-mitochondria_count.txt
-mitochondria_tree.txt
-mitochondria_probes.txt.gz
-1a.fasta (test input)
-
-File kmer_read_m3.py
-(Python 2.7)
-Run with:
-python kmer_read_m3.py -w [working directory] -d [output directory] -i [input filename1] [input filename2]
-
-Input files can be two paired files (.fastq, .fastq.gz, .fasta, .fasta.gz) or a single file with none as filename2
-Output is .csv file with read count and % abundance for each species.
--- a/kmer_read_m3.py	Thu Apr 25 07:59:05 2019 -0400
+++ b/kmer_read_m3.py	Sun Apr 28 00:42:24 2019 -0400
@@ -3,7 +3,7 @@

 import re
 import sys
-#import numpy as np -- removed numpy requirement
+import numpy as np
 from subprocess import Popen, PIPE

 if __name__ == '__main__':
@@ -65,57 +65,49 @@
             if use == "1":
                 name_list.append(name)
                 factor_list.append(tested / hit / gensize)
-    #factor_arr = np.array(factor_list)
+    factor_arr = np.array(factor_list)
     num_targs = len(name_list)
+    print wdir
     process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2])

     (stdout, stderr) = process.communicate()

     #read in output files
     noid_list = []
-    num_cols = 1 #only one sample, no matrix needed
-    #m = np.zeros((num_targs,num_cols))
-    m = [0.0 for _ in range(num_targs)]
+    read_ct = []
+    num_cols = 1
+    m = np.zeros((num_targs,num_cols))
     col = 0
     data_file = open(cname, 'r')
     data = ''.join(data_file.readlines())
     data_file.close()
     lines = data.split('\n')
-    read_ct = 0.0
+    read_ct.append(0.0)
     index = 0
     for line in lines:
         if len(line) > 1:
             t_s, count, uniq = line.split(',')
             target = int(t_s)
-            read_ct += float(count)
+            read_ct[col] += float(count)
             if target > 0:
                 if in_use[target]:
-                    m[index] = float(count)
+                    m[index, col] = float(count)
                     index += 1
             else:
                 noid_list.append(int(count))

-    #b = m * factor_arr[:,None] #normalize each row by kmer coverage
-    #sums = np.sum(b, axis=0)
-    #b = b / sums[None,:]
-    #b = b * 100.0
-    sum = 0.0
-    b = []
-    for i in range(num_targs):
-        b1 = m[i] * factor_list[i]
-        sum += b1
-        b.append(b1)
-    sum /= 100.0
-    for i in range(num_targs):
-        b[i] /=sum
-    #rowmax = b.max(axis=1)
+    b = m * factor_arr[:,None] #normalize each row by kmer coverage
+    sums = np.sum(b, axis=0)
+    b = b / sums[None,:]
+    b = b * 100.0
+    rowmax = b.max(axis=1)

     out_file = open(oname, 'w')
     output = "taxid,reads,abundance\n"
     out_file.write(output)
     output = "total,"
-    #for i in range(num_cols):
-    output += str(read_ct) + ",,"
+    for i in range(num_cols):
+        output += str(read_ct[i]) + ",,"
     output += "\n"
     out_file.write(output)
     output = "no_id,"
@@ -125,10 +117,10 @@
     out_file.write(output)
     for i in range(num_targs):
         #l = order_row[i]
-        if m[i] > 0: #only output non-zero results
+        if rowmax[i] > 0.000: #only output non-zero results
             output = name_list[i]
             for j in range(num_cols):
-                output += ',' + "{0:.0f}".format(m[i]) + ',' + "{0:.3f}".format(b[i])
+                output += ',' + "{0:.0f}".format(m[i,j]) + ',' + "{0:.3f}".format(b[i,j])
             output += "\n"
             out_file.write(output)
     out_file.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mitokmer.1.xml	Sun Apr 28 00:42:24 2019 -0400
@@ -0,0 +1,82 @@
+<tool id="mitokmer_v1" name="mitokmer 1" version="1.0">
+    <description>Eukaryotic abundance prediction by mitochondrial content</description>
+    <requirements>
+      <requirement type="package" version="2.7">python</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+
+        rm $__tool_directory__/R*.fastq;
+        #if $reads.reads_select == "paired"
+        #set $name = $reads.forward.name.split('.')[0].replace(' ','_')
+        #set $forward = $reads.forward
+        #set $reverse = $reads.reverse
+        #else
+        #set $name = $reads.allreads.name.split('.')[0].replace(' ','_')
+        #set $forward = $reads.allreads
+        #set $reverse = $reads.allreads
+        #end if
+        echo $name;
+        echo "-=-=-";
+        #if $forward.is_of_type('fastq.gz', 'fastqsanger.gz')
+        #set $fwd = "R1.fastq.gz"
+        #set $rev = "R2.fastq.gz"
+        #else if $forward.is_of_type('fastq', 'fastqsanger')
+        #set $fwd = "R1.fastq"
+        #set $rev = "R2.fastq"
+        #else if $forward.is_of_type('fasta.gz')
+        #set $fwd = "R1.fasta.gz"
+        #set $rev = "R2.fasta.gz"
+        #else
+        #set $fwd = "R1.fasta"
+        #set $rev = "R2.fasta"
+        #end if
+        ln -s $forward $__tool_directory__/$fwd;
+        ln -s $reverse $__tool_directory__/$rev;
+        #if $reads.reads_select == "single"
+        #set $rev = "none"
+        #end if
+        python $__tool_directory__/kmer_read_m3.py
+        -w $__tool_directory__
+        -d $__tool_directory__/output
+        -i $__tool_directory__/$fwd $__tool_directory__/$rev
+    ]]></command>
+    <inputs>
+
+        <conditional name="reads">
+            <param name="reads_select" type="select" label="One FASTQ dataset from your history, or two datasets from your history">
+                <option value="single">One FASTQ dataset from your history</option>
+                <option value="paired">Two FASTQ datasets from your history</option>
+            </param>
+            <when value="single">
+                <param label="all reads" type="data" name="allreads" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger,fastqsanger.gz"/>
+            </when>
+            <when value="paired">
+                <param label="Forward reads" type="data" name="forward" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger,fastqsanger.gz" />
+                <param label="Reverse reads" type="data" name="reverse" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger,fastqsanger.gz" />
+            </when>
+        </conditional>
+
+    </inputs>
+    <outputs>
+      <data format="tabular" label="mitokmer Results" name="results" from_work_dir="output/mitokmer_result.csv"/>
+    </outputs>
+    <tests>
+        <test>
+         <param name="reads_select" value="single" />
+         <param name="allreads" value="1a.fasta" format="fasta"/>
+         <output name="output1" file="mitokmer_result.csv"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+
+**Usage: mitokmer.py**
+
+    ]]></help>
+    <citations>
+       <citation type="bibtex">
+        meh
+       }</citation>
+    </citations>
+
+</tool>
\ No newline at end of file
--- a/mitokmer.xml	Thu Apr 25 07:59:05 2019 -0400
+++ b/mitokmer.xml	Sun Apr 28 00:42:24 2019 -0400
@@ -38,7 +38,10 @@
         python $__tool_directory__/kmer_read_m3.py
         -w $__tool_directory__
         -d $__tool_directory__/output
-        -i $__tool_directory__/$fwd $__tool_directory__/$rev
+        -i $__tool_directory__/$fwd $__tool_directory__/$rev;
+        echo "run cat";
+        rm $__tool_directory__/results*;
+        cat $__tool_directory__/output/mitokmer_result.csv > $__tool_directory__/results.csv;
     ]]></command>
     <inputs>

@@ -58,13 +61,13 @@

     </inputs>
     <outputs>
-      <data format="tabular" label="mitokmer Results" name="results" from_work_dir="output/mitokmer_result.csv"/>
+      <data format="tabular" label="mitokmer Results" name="results" from_work_dir="results.csv"/>
     </outputs>
     <tests>
         <test>
          <param name="reads_select" value="single" />
          <param name="allreads" value="1a.fasta" format="fasta"/>
-         <output name="output1" file="mitokmer_result.csv"/>
+         <output name="output1" file="output/mitokmer_result.csv"/>
         </test>
     </tests>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/output/mitokmer_result.1.csv	Sun Apr 28 00:42:24 2019 -0400
@@ -0,0 +1,8 @@
+axid,reads,abundance,test
+total,4192977.0,tes,test
+no_id,4192954,test,test
+Mammalia_Rodentia_Cricetidae_Neotomodon_alstoni,17,88.932
+Mammalia_Rodentia_Cricetidae_Peromyscus_crinitus,1,5.185
+Mesostigmatophyceae_Mesostigmatales_Mesostigmataceae_Mesostigma_viride,1,2.228
+Oligohymenophorea_Hymenostomatida_none_Ichthyophthirius_multifiliis,1,1.674
+Oligohymenophorea_Peniculida_Parameciidae_Paramecium_caudatum,1,1.980
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/output/mitokmer_result.txt	Sun Apr 28 00:42:24 2019 -0400
@@ -0,0 +1,8 @@
+axid,reads,abundance,test
+total,4192977.0,tes,test
+no_id,4192954,test,test
+Mammalia_Rodentia_Cricetidae_Neotomodon_alstoni,17,88.932
+Mammalia_Rodentia_Cricetidae_Peromyscus_crinitus,1,5.185
+Mesostigmatophyceae_Mesostigmatales_Mesostigmataceae_Mesostigma_viride,1,2.228
+Oligohymenophorea_Hymenostomatida_none_Ichthyophthirius_multifiliis,1,1.674
+Oligohymenophorea_Peniculida_Parameciidae_Paramecium_caudatum,1,1.980
\ No newline at end of file