changeset 0:d56631911cc1 draft

Uploaded
author tyty
date Mon, 15 Sep 2014 14:41:13 -0400
parents
children b6d9b0059499
files Iterative_mapping/.DS_Store Iterative_mapping/iterative_map.py Iterative_mapping/iterative_map.xml Iterative_mapping/map_ex.py Iterative_mapping/read_file.py Iterative_mapping/read_s_file.py Iterative_mapping/remove_map.py Iterative_mapping/seq_track.py Iterative_mapping/tool_dependencies.xml Iterative_mapping/truncate.py Iterative_mapping/unmap.py
diffstat 11 files changed, 402 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file Iterative_mapping/.DS_Store has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/iterative_map.py	Mon Sep 15 14:41:13 2014 -0400
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+from read_file import *
+from read_s_file import *
+
+type_input = sys.argv[1]
+seq_file = sys.argv[2]
+ref_file = sys.argv[3]
+shift = sys.argv[4]
+length = sys.argv[5]
+map_type = sys.argv[6]
+output_file = sys.argv[7]
+
+
+if map_type!="default":
+    s = ""
+    s = s+"-v "+sys.argv[8]
+    s = s+" -5 "+sys.argv[9]
+    s = s+" -3 "+sys.argv[10]
+    s = s+" -k "+sys.argv[11]
+    if sys.argv[12]:
+        s = s+" -a"
+    if int(sys.argv[13])>=1:
+        s = s+" -m "+sys.argv[13]
+    if sys.argv[14]:
+        s = s+" --best --strata "
+    
+else:
+    s = "-v 3 -a --best --strata "
+
+ospath = os.path.realpath(sys.argv[0])
+ost = ospath.split('/')
+syspath = ""
+for i in range(len(ost)-1):
+    syspath = syspath+ost[i].strip()
+    syspath = syspath+'/'
+
+os.system("bowtie-build -f "+ref_file+" "+syspath+"ref > "+syspath+"log.txt")
+
+os.system("cp "+seq_file+" "+syspath+"seq0.fa")
+
+if type_input == "fasta":
+    tp = 'fasta'
+if type_input == "fastq":
+    tp = 'fastq'
+
+k = 0
+print(type_input)
+while(True):
+    if type_input == "fasta":
+        os.system("bowtie "+s+"-f "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam")
+    if type_input == "fastq":
+        os.system("bowtie "+s+"-q "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam")
+    os.system("samtools view -Sb -F 0xfff "+syspath+"map"+str(k)+".sam > "+syspath+"mapped"+str(k)+".bam 2>"+syspath+"log.txt") #get mapped reads
+    os.system("samtools view -Sb -f 0x4 "+syspath+"map"+str(k)+".sam > "+syspath+"umapped"+str(k)+".bam 2>"+syspath+"log.txt") #get unmapped reads
+    os.system("samtools view -Sb -f 0x10 "+syspath+"map"+str(k)+".sam > "+syspath+"rmapped"+str(k)+".bam 2>"+syspath+"log.txt") #get reversed mapped reads
+    os.system("samtools merge -f "+syspath+"unmapped"+str(k)+".bam "+syspath+"umapped"+str(k)+".bam "+syspath+"rmapped"+str(k)+".bam") #get reversed mapped reads
+    os.system("samtools view -h -o "+syspath+"unmapped"+str(k)+".sam "+syspath+"unmapped"+str(k)+".bam") #get reversed mapped reads
+    if k>0:
+        os.system("samtools view -h -o "+syspath+"mapped"+str(k)+".sam "+syspath+"mapped"+str(k)+".bam") #get reversed mapped reads
+        os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"unmapped"+str(k)+".txt")
+        os.system("cut -f 1 "+syspath+"mapped"+str(k)+".sam > "+syspath+"mapped"+str(k)+".txt")
+        os.system("python "+syspath+"remove_map.py "+syspath+"unmapped"+str(k)+".txt "+syspath+"mapped"+str(k)+".txt "+syspath+"runmapped"+str(k)+".txt")
+        os.system("rm "+syspath+"mapped"+str(k)+".sam")
+        os.system("rm "+syspath+"mapped"+str(k)+".txt")
+        os.system("rm "+syspath+"unmapped"+str(k)+".txt")
+    else:
+        os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"runmapped"+str(k)+".txt")
+    
+    os.system("rm "+syspath+"unmapped"+str(k)+".bam")
+    os.system("rm "+syspath+"umapped"+str(k)+".bam")
+    os.system("rm "+syspath+"rmapped"+str(k)+".bam")
+    os.system("python "+syspath+"seq_track.py "+syspath+"runmapped"+str(k)+".txt "+syspath+"seq"+str(k)+".fa "+syspath+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence
+    os.system("python "+syspath+"truncate.py "+syspath+"unmap_seq"+str(k)+".fa "+shift+" "+syspath+"seq"+str(k+1)+".fa "+length) #truncate unmapped sequence
+    os.system("rm "+syspath+"seq"+str(k)+".fa") #Remove sequences being mapped
+    os.system("rm "+syspath+"map"+str(k)+".sam") #Remove mapping file
+    os.system("rm "+syspath+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce
+    os.system("rm "+syspath+"runmapped"+str(k)+".txt")
+    os.system("rm "+syspath+"unmapped"+str(k)+".sam")
+    
+    os.system("wc -l "+syspath+"seq"+str(k+1)+".fa > "+syspath+"count"+str(k+1)+".txt")
+    c = read_sp_file(syspath+"count"+str(k+1)+".txt")
+    if c[0][0] == '0': #If no reads is in the sequence file, stop
+        os.system("rm "+syspath+"count"+str(k+1)+".txt")
+        os.system("rm "+syspath+"seq"+str(k+1)+".fa")
+        break
+    os.system("rm "+syspath+"count"+str(k+1)+".txt")
+    k = k+1
+
+ss = ""
+for i in range(0,k+1):
+    ss = ss+" "+syspath+"mapped"+str(i)+".bam"
+
+
+os.system("samtools merge -f "+output_file+" "+ss)
+#print("samtools merge mapped_all.bam"+ss)
+os.system("rm "+syspath+"mapped*.bam")
+os.system("rm "+syspath+"ref*")
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/iterative_map.xml	Mon Sep 15 14:41:13 2014 -0400
@@ -0,0 +1,80 @@
+<tool id="iterative_map_pipeline" name="Iterative mapping" version="1.0">
+	<description></description>
+	<command interpreter="python">
+        #if $mapping_file.type == "user"
+            iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best  
+        #else
+            iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $mapping_file.type $output
+        #end if
+    </command>
+        <requirements>
+                <requirement type="package" version="1.61">biopython</requirement>
+                <requirement type="package" version="1.7">numpy</requirement>
+                <requirement type="package" version="0.1.18">samtools</requirement>
+                <requirement type="package" version="0.12.7">bowtie</requirement>
+        </requirements>
+	<inputs>
+                <conditional name="file_format">
+                  <param name="type" type="select" label="Format of the file of the reads (Default FASTQ)">
+                    <option value="fastq">FASTQ</option>
+                    <option value="fasta">FASTA</option>
+                  </param>
+                  <when value="fastq">
+                    <param name="seq_file" type="data" format="fastq" label="Fastq file"/>
+                  </when>
+                  <when value="fasta">
+                    <param name="seq_file" type="data" format="fasta" label="Fasta file"/>
+                  </when>
+                </conditional>
+		<param name="reference_file" type="data" format="fasta" label="Reference genome/transcriptome"/>
+                <param name="shift" type="integer" value="1" label="Number of nucleotide trimmed each round"/>
+                <param name="length" type="integer" value="21" label="Minimum requirement of read length for mapping"/>
+                <conditional name="mapping_file">
+                  <param name="type" type="select" label="Bowtie mapping flags (Default -v 0 -a --best --strata)">
+                    <option value="default">Default</option>
+                    <option value="user">User specified</option>
+                  </param>
+                  <when value="default"/>
+                  <when value="user"> 
+                    <param name="param_v" type="integer" value="0" label="Number of mismatches for SOAP-like alignment policy (-v)"/>
+                    <param name="param_five" type="integer" value="0" label="Trim n bases from high-quality (left) end of each read before alignment (-5)"/>
+                    <param name="param_three" type="integer" value="0" label="Trim n bases from high-quality (right) end of each read before alignment (-3)"/>
+                    <param name="param_k" type="integer" value="1" label="Report up to n valid alignments per read (-k)"/>
+                    <param name="param_a" type="boolean" checked="False" truevalue = "1" falsevalue = "0" label="Whether or not to report all valid alignments per read (-a)"/>
+                    <param name="param_m" type="integer" value="-1" label="Suppress all alignments for a read if more than n reportable alignments exist (-m), -1 for unlimited"/>
+                    <param name="param_best" type="boolean" checked="False" truevalue = "1" falsevalue = "0" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best --strata)"/>
+                  </when>
+                </conditional>
+
+	</inputs>
+	<outputs>
+		<data name="output" type="data" format="bam"/>
+	</outputs>
+
+	<help>
+
+
+**TIPS**:
+
+-----
+
+**Input**:
+
+* 1. Sequence file type (FASTA/FASTQ)
+* 2. Sequence file (fasta/fastq format) {Default: fastq file}
+* 3. Reference file (e.g. cDNA library [fasta])
+* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping)
+* 5. “Length” (The minimum length of the reads for mapping after trimming)
+* [Optional]
+* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. use -5/-3 flag to trim nucleotides from 5'/3' end of the reads)
+
+-----
+
+**Output**:
+
+A bam file with all of the reads that are mapped	
+
+
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/map_ex.py	Mon Sep 15 14:41:13 2014 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+from Bio import SeqIO
+
+map_file = sys.argv[1]
+result_file = sys.argv[2]
+
+
+#reads = read_t_file(read_file);
+
+f = open(map_file);
+h = file(result_file, 'w')
+
+for aline in f.readlines():
+    tline = aline.strip();
+    tl = tline.split('\t');
+    if len(tl)>4:
+        if int(tl[1].strip())== 0:
+            h.write(tline)
+            h.write('\n')
+
+
+f.close();
+h.close()
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/read_file.py	Mon Sep 15 14:41:13 2014 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+    f = open(in_file);
+    result = [];
+    for aline in f.readlines():
+        temp = [];
+        tline = aline.strip();
+        tl = tline.split('\t');
+        for i in range(0, len(tl)):
+            temp.append(tl[i].strip());
+        result.append(temp);
+    f.close();
+    return result;
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/read_s_file.py	Mon Sep 15 14:41:13 2014 -0400
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_sp_file(in_file):
+    f = open(in_file);
+    result = [];
+    for aline in f.readlines():
+        temp = [];
+        tline = aline.strip();
+        tl = tline.split(' ');
+        for i in range(0, len(tl)):
+            if len(tl[i].strip())>0:
+                temp.append(tl[i].strip());
+        result.append(temp);
+    f.close();
+    return result;
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/remove_map.py	Mon Sep 15 14:41:13 2014 -0400
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+
+
+unmap_file = sys.argv[1]
+map_file = sys.argv[2]
+result_file = sys.argv[3]
+
+
+unmap = read_t_file(unmap_file)
+mapped = read_t_file(map_file)
+h = file(result_file, 'w')
+
+maps = set()
+for i in range(len(mapped)):
+    maps.add(mapped[i][0])
+
+
+for i in range(len(unmap)):
+    name = unmap[i][0]
+    if name not in maps:
+        h.write(name)
+        h.write('\n')
+
+
+h.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/seq_track.py	Mon Sep 15 14:41:13 2014 -0400
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+from Bio import SeqIO
+
+unmap_file = sys.argv[1]
+reads_file = sys.argv[2]
+result_file = sys.argv[3]
+tp = sys.argv[4]
+
+
+unmap = read_t_file(unmap_file);
+
+h = file(result_file, 'w')
+
+reads = SeqIO.parse(reads_file,tp)
+um = set()
+for i in range(0, len(unmap)):
+    id_r = unmap[i][0]
+    um.add(id_r)
+
+for read in reads:
+    if read.id in um:
+        h.write('>')
+        h.write(read.id)
+        h.write('\n')
+        h.write(read.seq.tostring())
+        h.write('\n')
+    
+
+
+h.close()
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/tool_dependencies.xml	Mon Sep 15 14:41:13 2014 -0400
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="biopython" version="1.61">
+        <repository changeset_revision="ae9dda584395" name="package_biopython_1_61" owner="biopython" prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="numpy" version="1.7">
+        <repository changeset_revision="4:ef12a3a11d5b" name="package_numpy_1_7" owner="iuc" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="samtools" version="0.1.18">
+        <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="bowtie" version="0.12.7">
+        <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/truncate.py	Mon Sep 15 14:41:13 2014 -0400
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from Bio import SeqIO
+
+fasta_file = sys.argv[1]
+shift_in = sys.argv[2]
+result_file = sys.argv[3]
+length = sys.argv[4]
+
+shift = int(shift_in)
+    
+fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
+h = file(result_file,'w')
+for seq in fasta_sequences:
+        nuc = seq.id;
+        sequence = seq.seq.tostring();
+        if (len(sequence)-shift)>=int(length):
+                h.write('>'+nuc)
+                h.write('\n')
+                h.write(sequence[0:(len(sequence)-shift)])
+                h.write('\n')
+
+
+
+
+h.close()
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/unmap.py	Mon Sep 15 14:41:13 2014 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+from Bio import SeqIO
+
+map_file = sys.argv[1]
+result_file = sys.argv[2]
+
+
+#reads = read_t_file(read_file);
+
+f = open(map_file);
+h = file(result_file, 'w')
+
+for aline in f.readlines():
+    tline = aline.strip();
+    tl = tline.split('\t');
+    if len(tl)>4:
+        if int(tl[1].strip()) != 0:
+            h.write(tl[0].strip());
+            h.write('\n');
+
+
+f.close();
+h.close()
+
+
+
+