# HG changeset patch # User cstrittmatter # Date 1556426544 14400 # Node ID c9d98f5bc24035ec418e08a28ee53939b7431d0e # Parent a0852bb4b09b4f3ae7e201915897e4f67d30c373 planemo upload commit 003cdb83fd17248ef57959332d58a3c96311332a-dirty diff -r a0852bb4b09b -r c9d98f5bc240 README.md --- a/README.md Thu Apr 25 07:59:05 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -# kmer_id -Mitichondrial read identification by kmer database for Galaxy server - -File kmer_read_m3.cpp -uses gzip library, compile with: -g++ -O3 -std=c++0x kmer_read_m3.cpp -o kmerread -lz - -database files needed in same directory: -(can create with kmer_build, but not described here yet) -mitochondria_data.txt -mitochondria_refkey.txt -mitochondria_count.txt -mitochondria_tree.txt -mitochondria_probes.txt.gz -1a.fasta (test input) - -File kmer_read_m3.py -(Python 2.7) -Run with: -python kmer_read_m3.py -w [working directory] -d [output directory] -i [input filename1] [input filename2] - -Input files can be two paired files (.fastq, .fastq.gz, .fasta, .fasta.gz) or a single file with none as filename2 -Output is .csv file with read count and % abundance for each species. diff -r a0852bb4b09b -r c9d98f5bc240 kmer_read_m3.py --- a/kmer_read_m3.py Thu Apr 25 07:59:05 2019 -0400 +++ b/kmer_read_m3.py Sun Apr 28 00:42:24 2019 -0400 @@ -3,7 +3,7 @@ import re import sys -#import numpy as np -- removed numpy requirement +import numpy as np from subprocess import Popen, PIPE if __name__ == '__main__': @@ -65,57 +65,49 @@ if use == "1": name_list.append(name) factor_list.append(tested / hit / gensize) - #factor_arr = np.array(factor_list) + factor_arr = np.array(factor_list) num_targs = len(name_list) + print wdir process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2]) (stdout, stderr) = process.communicate() #read in output files noid_list = [] - num_cols = 1 #only one sample, no matrix needed - #m = np.zeros((num_targs,num_cols)) - m = [0.0 for _ in range(num_targs)] + read_ct = [] + num_cols = 1 + m = np.zeros((num_targs,num_cols)) col = 0 data_file = open(cname, 'r') data = ''.join(data_file.readlines()) data_file.close() lines = data.split('\n') - read_ct = 0.0 + read_ct.append(0.0) index = 0 for line in lines: if len(line) > 1: t_s, count, uniq = line.split(',') target = int(t_s) - read_ct += float(count) + read_ct[col] += float(count) if target > 0: if in_use[target]: - m[index] = float(count) + m[index, col] = float(count) index += 1 else: noid_list.append(int(count)) - #b = m * factor_arr[:,None] #normalize each row by kmer coverage - #sums = np.sum(b, axis=0) - #b = b / sums[None,:] - #b = b * 100.0 - sum = 0.0 - b = [] - for i in range(num_targs): - b1 = m[i] * factor_list[i] - sum += b1 - b.append(b1) - sum /= 100.0 - for i in range(num_targs): - b[i] /=sum - #rowmax = b.max(axis=1) + b = m * factor_arr[:,None] #normalize each row by kmer coverage + sums = np.sum(b, axis=0) + b = b / sums[None,:] + b = b * 100.0 + rowmax = b.max(axis=1) out_file = open(oname, 'w') output = "taxid,reads,abundance\n" out_file.write(output) output = "total," - #for i in range(num_cols): - output += str(read_ct) + ",," + for i in range(num_cols): + output += str(read_ct[i]) + ",," output += "\n" out_file.write(output) output = "no_id," @@ -125,10 +117,10 @@ out_file.write(output) for i in range(num_targs): #l = order_row[i] - if m[i] > 0: #only output non-zero results + if rowmax[i] > 0.000: #only output non-zero results output = name_list[i] for j in range(num_cols): - output += ',' + "{0:.0f}".format(m[i]) + ',' + "{0:.3f}".format(b[i]) + output += ',' + "{0:.0f}".format(m[i,j]) + ',' + "{0:.3f}".format(b[i,j]) output += "\n" out_file.write(output) out_file.close() diff -r a0852bb4b09b -r c9d98f5bc240 mitokmer.1.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mitokmer.1.xml Sun Apr 28 00:42:24 2019 -0400 @@ -0,0 +1,82 @@ + + Eukaryotic abundance prediction by mitochondrial content + + python + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + meh + } + + + \ No newline at end of file diff -r a0852bb4b09b -r c9d98f5bc240 mitokmer.xml --- a/mitokmer.xml Thu Apr 25 07:59:05 2019 -0400 +++ b/mitokmer.xml Sun Apr 28 00:42:24 2019 -0400 @@ -38,7 +38,10 @@ python $__tool_directory__/kmer_read_m3.py -w $__tool_directory__ -d $__tool_directory__/output - -i $__tool_directory__/$fwd $__tool_directory__/$rev + -i $__tool_directory__/$fwd $__tool_directory__/$rev; + echo "run cat"; + rm $__tool_directory__/results*; + cat $__tool_directory__/output/mitokmer_result.csv > $__tool_directory__/results.csv; ]]> @@ -58,13 +61,13 @@ - + - + diff -r a0852bb4b09b -r c9d98f5bc240 output/mitokmer_result.1.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/output/mitokmer_result.1.csv Sun Apr 28 00:42:24 2019 -0400 @@ -0,0 +1,8 @@ +axid,reads,abundance,test +total,4192977.0,tes,test +no_id,4192954,test,test +Mammalia_Rodentia_Cricetidae_Neotomodon_alstoni,17,88.932 +Mammalia_Rodentia_Cricetidae_Peromyscus_crinitus,1,5.185 +Mesostigmatophyceae_Mesostigmatales_Mesostigmataceae_Mesostigma_viride,1,2.228 +Oligohymenophorea_Hymenostomatida_none_Ichthyophthirius_multifiliis,1,1.674 +Oligohymenophorea_Peniculida_Parameciidae_Paramecium_caudatum,1,1.980 \ No newline at end of file diff -r a0852bb4b09b -r c9d98f5bc240 output/mitokmer_result.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/output/mitokmer_result.txt Sun Apr 28 00:42:24 2019 -0400 @@ -0,0 +1,8 @@ +axid,reads,abundance,test +total,4192977.0,tes,test +no_id,4192954,test,test +Mammalia_Rodentia_Cricetidae_Neotomodon_alstoni,17,88.932 +Mammalia_Rodentia_Cricetidae_Peromyscus_crinitus,1,5.185 +Mesostigmatophyceae_Mesostigmatales_Mesostigmataceae_Mesostigma_viride,1,2.228 +Oligohymenophorea_Hymenostomatida_none_Ichthyophthirius_multifiliis,1,1.674 +Oligohymenophorea_Peniculida_Parameciidae_Paramecium_caudatum,1,1.980 \ No newline at end of file diff -r a0852bb4b09b -r c9d98f5bc240 results.csv