Mercurial > repos > cstrittmatter > mitokmer
changeset 3:c9d98f5bc240 draft
planemo upload commit 003cdb83fd17248ef57959332d58a3c96311332a-dirty
author | cstrittmatter |
---|---|
date | Sun, 28 Apr 2019 00:42:24 -0400 |
parents | a0852bb4b09b |
children | 3c3520289da4 |
files | README.md kmer_read_m3.py mitokmer.1.xml mitokmer.xml output/mitokmer_result.1.csv output/mitokmer_result.txt results.csv |
diffstat | 6 files changed, 122 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/README.md Thu Apr 25 07:59:05 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -# kmer_id -Mitichondrial read identification by kmer database for Galaxy server - -File kmer_read_m3.cpp -uses gzip library, compile with: -g++ -O3 -std=c++0x kmer_read_m3.cpp -o kmerread -lz - -database files needed in same directory: -(can create with kmer_build, but not described here yet) -mitochondria_data.txt -mitochondria_refkey.txt -mitochondria_count.txt -mitochondria_tree.txt -mitochondria_probes.txt.gz -1a.fasta (test input) - -File kmer_read_m3.py -(Python 2.7) -Run with: -python kmer_read_m3.py -w [working directory] -d [output directory] -i [input filename1] [input filename2] - -Input files can be two paired files (.fastq, .fastq.gz, .fasta, .fasta.gz) or a single file with none as filename2 -Output is .csv file with read count and % abundance for each species.
--- a/kmer_read_m3.py Thu Apr 25 07:59:05 2019 -0400 +++ b/kmer_read_m3.py Sun Apr 28 00:42:24 2019 -0400 @@ -3,7 +3,7 @@ import re import sys -#import numpy as np -- removed numpy requirement +import numpy as np from subprocess import Popen, PIPE if __name__ == '__main__': @@ -65,57 +65,49 @@ if use == "1": name_list.append(name) factor_list.append(tested / hit / gensize) - #factor_arr = np.array(factor_list) + factor_arr = np.array(factor_list) num_targs = len(name_list) + print wdir process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2]) (stdout, stderr) = process.communicate() #read in output files noid_list = [] - num_cols = 1 #only one sample, no matrix needed - #m = np.zeros((num_targs,num_cols)) - m = [0.0 for _ in range(num_targs)] + read_ct = [] + num_cols = 1 + m = np.zeros((num_targs,num_cols)) col = 0 data_file = open(cname, 'r') data = ''.join(data_file.readlines()) data_file.close() lines = data.split('\n') - read_ct = 0.0 + read_ct.append(0.0) index = 0 for line in lines: if len(line) > 1: t_s, count, uniq = line.split(',') target = int(t_s) - read_ct += float(count) + read_ct[col] += float(count) if target > 0: if in_use[target]: - m[index] = float(count) + m[index, col] = float(count) index += 1 else: noid_list.append(int(count)) - #b = m * factor_arr[:,None] #normalize each row by kmer coverage - #sums = np.sum(b, axis=0) - #b = b / sums[None,:] - #b = b * 100.0 - sum = 0.0 - b = [] - for i in range(num_targs): - b1 = m[i] * factor_list[i] - sum += b1 - b.append(b1) - sum /= 100.0 - for i in range(num_targs): - b[i] /=sum - #rowmax = b.max(axis=1) + b = m * factor_arr[:,None] #normalize each row by kmer coverage + sums = np.sum(b, axis=0) + b = b / sums[None,:] + b = b * 100.0 + rowmax = b.max(axis=1) out_file = open(oname, 'w') output = "taxid,reads,abundance\n" out_file.write(output) output = "total," - #for i in range(num_cols): - output += str(read_ct) + ",," + for i in range(num_cols): + output += str(read_ct[i]) + ",," output += "\n" out_file.write(output) output = "no_id," @@ -125,10 +117,10 @@ out_file.write(output) for i in range(num_targs): #l = order_row[i] - if m[i] > 0: #only output non-zero results + if rowmax[i] > 0.000: #only output non-zero results output = name_list[i] for j in range(num_cols): - output += ',' + "{0:.0f}".format(m[i]) + ',' + "{0:.3f}".format(b[i]) + output += ',' + "{0:.0f}".format(m[i,j]) + ',' + "{0:.3f}".format(b[i,j]) output += "\n" out_file.write(output) out_file.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mitokmer.1.xml Sun Apr 28 00:42:24 2019 -0400 @@ -0,0 +1,82 @@ +<tool id="mitokmer_v1" name="mitokmer 1" version="1.0"> + <description>Eukaryotic abundance prediction by mitochondrial content</description> + <requirements> + <requirement type="package" version="2.7">python</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + + rm $__tool_directory__/R*.fastq; + #if $reads.reads_select == "paired" + #set $name = $reads.forward.name.split('.')[0].replace(' ','_') + #set $forward = $reads.forward + #set $reverse = $reads.reverse + #else + #set $name = $reads.allreads.name.split('.')[0].replace(' ','_') + #set $forward = $reads.allreads + #set $reverse = $reads.allreads + #end if + echo $name; + echo "-=-=-"; + #if $forward.is_of_type('fastq.gz', 'fastqsanger.gz') + #set $fwd = "R1.fastq.gz" + #set $rev = "R2.fastq.gz" + #else if $forward.is_of_type('fastq', 'fastqsanger') + #set $fwd = "R1.fastq" + #set $rev = "R2.fastq" + #else if $forward.is_of_type('fasta.gz') + #set $fwd = "R1.fasta.gz" + #set $rev = "R2.fasta.gz" + #else + #set $fwd = "R1.fasta" + #set $rev = "R2.fasta" + #end if + ln -s $forward $__tool_directory__/$fwd; + ln -s $reverse $__tool_directory__/$rev; + #if $reads.reads_select == "single" + #set $rev = "none" + #end if + python $__tool_directory__/kmer_read_m3.py + -w $__tool_directory__ + -d $__tool_directory__/output + -i $__tool_directory__/$fwd $__tool_directory__/$rev + ]]></command> + <inputs> + + <conditional name="reads"> + <param name="reads_select" type="select" label="One FASTQ dataset from your history, or two datasets from your history"> + <option value="single">One FASTQ dataset from your history</option> + <option value="paired">Two FASTQ datasets from your history</option> + </param> + <when value="single"> + <param label="all reads" type="data" name="allreads" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger,fastqsanger.gz"/> + </when> + <when value="paired"> + <param label="Forward reads" type="data" name="forward" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger,fastqsanger.gz" /> + <param label="Reverse reads" type="data" name="reverse" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger,fastqsanger.gz" /> + </when> + </conditional> + + </inputs> + <outputs> + <data format="tabular" label="mitokmer Results" name="results" from_work_dir="output/mitokmer_result.csv"/> + </outputs> + <tests> + <test> + <param name="reads_select" value="single" /> + <param name="allreads" value="1a.fasta" format="fasta"/> + <output name="output1" file="mitokmer_result.csv"/> + </test> + </tests> + + <help><![CDATA[ + +**Usage: mitokmer.py** + + ]]></help> + <citations> + <citation type="bibtex"> + meh + }</citation> + </citations> + +</tool> \ No newline at end of file
--- a/mitokmer.xml Thu Apr 25 07:59:05 2019 -0400 +++ b/mitokmer.xml Sun Apr 28 00:42:24 2019 -0400 @@ -38,7 +38,10 @@ python $__tool_directory__/kmer_read_m3.py -w $__tool_directory__ -d $__tool_directory__/output - -i $__tool_directory__/$fwd $__tool_directory__/$rev + -i $__tool_directory__/$fwd $__tool_directory__/$rev; + echo "run cat"; + rm $__tool_directory__/results*; + cat $__tool_directory__/output/mitokmer_result.csv > $__tool_directory__/results.csv; ]]></command> <inputs> @@ -58,13 +61,13 @@ </inputs> <outputs> - <data format="tabular" label="mitokmer Results" name="results" from_work_dir="output/mitokmer_result.csv"/> + <data format="tabular" label="mitokmer Results" name="results" from_work_dir="results.csv"/> </outputs> <tests> <test> <param name="reads_select" value="single" /> <param name="allreads" value="1a.fasta" format="fasta"/> - <output name="output1" file="mitokmer_result.csv"/> + <output name="output1" file="output/mitokmer_result.csv"/> </test> </tests>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/output/mitokmer_result.1.csv Sun Apr 28 00:42:24 2019 -0400 @@ -0,0 +1,8 @@ +axid,reads,abundance,test +total,4192977.0,tes,test +no_id,4192954,test,test +Mammalia_Rodentia_Cricetidae_Neotomodon_alstoni,17,88.932 +Mammalia_Rodentia_Cricetidae_Peromyscus_crinitus,1,5.185 +Mesostigmatophyceae_Mesostigmatales_Mesostigmataceae_Mesostigma_viride,1,2.228 +Oligohymenophorea_Hymenostomatida_none_Ichthyophthirius_multifiliis,1,1.674 +Oligohymenophorea_Peniculida_Parameciidae_Paramecium_caudatum,1,1.980 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/output/mitokmer_result.txt Sun Apr 28 00:42:24 2019 -0400 @@ -0,0 +1,8 @@ +axid,reads,abundance,test +total,4192977.0,tes,test +no_id,4192954,test,test +Mammalia_Rodentia_Cricetidae_Neotomodon_alstoni,17,88.932 +Mammalia_Rodentia_Cricetidae_Peromyscus_crinitus,1,5.185 +Mesostigmatophyceae_Mesostigmatales_Mesostigmataceae_Mesostigma_viride,1,2.228 +Oligohymenophorea_Hymenostomatida_none_Ichthyophthirius_multifiliis,1,1.674 +Oligohymenophorea_Peniculida_Parameciidae_Paramecium_caudatum,1,1.980 \ No newline at end of file