comparison cut_degen.py @ 0:716cdcccc919 draft default tip

planemo upload for repository https://github.com/mshortr/degenerateprimerremoval_fastq commit 7d51a7e3ccb0164b530bf1664068d86241f2f2f5-dirty
author megan-shortridge
date Tue, 15 Sep 2015 14:14:41 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:716cdcccc919
1 #This is a version that is meant to work with Fastq files
2 import Bio
3 from Bio import SeqUtils
4 from Bio import SeqIO
5 import sys
6
7
8 #for records and adapter, should be sys.argv[1 and 2]
9 fastqfile = sys.argv[1] #This is the input fasta file
10 adapter = sys.argv[2] #This is the input adapter as a string
11 keepreads = sys.argv[3] #True or false, this will determine whether or not reads are kept. If true, it will keep reads that do not have the adapter in it. If false, it will get rid of those reads.
12 removeadapters = sys.argv[4] #True or false, if this is True, the adapters will be removed. If true, removes the adapters from the sequences. If false, it keeps them.
13 end_defn = sys.argv[5] #If 5, the primer is removed from the 5' end of the sequence. If 3, then it is removed from the 3' end of the sequence.
14 adapter_name = sys.argv[6] #This is the name of the adapter that you can put into the output text file.
15
16
17 # Here is the command for the test: python cut_degen.py 'test.fastq' 'GAACWAYWYCT' 'True' 'True' '5' 'test'
18
19 keepreads = str(keepreads)
20 removeadapters = str(removeadapters)
21 fastqfile=str(fastqfile)
22 end_defn = str(end_defn)
23
24 fh = open(fastqfile, mode='r+')
25 len_adapter = len(adapter)
26 count_adapter_found = 0
27 count_adapter_not_found = 0
28 total_seq_count = 0
29
30 parsed = SeqIO.parse(fh, format="fastq")
31
32 output_fh_name = "output.fastq"
33
34 if fastqfile=="test3prime.fastq":
35 output_fh_name="output2.fastq"
36
37 output_fh = open(output_fh_name, mode='w+')
38
39 output_text_name = "output.txt"
40 if fastqfile=="test3prime.fastq":
41 output_text_name="output2.txt"
42 output_text_fh = open(output_text_name, mode='w+')
43
44
45 for record in parsed:
46 try:
47 sequence = str(record.seq)
48 search = SeqUtils.nt_search(sequence, adapter) #This will search the
49 index = int(search[1]) #If it finds the adapter, is the starting index from which it was found.
50 adapter_start = index
51 adapter_end = index+len_adapter
52 count_adapter_found +=1
53 total_seq_count+=1
54 if removeadapters == "True": #if the value is true, it removes the adapters from the sequences.
55 if end_defn=="5":
56 record = record[adapter_end:] #If a 5' adapter, you remove adapter from beginning
57 elif end_defn=="3":
58 record = record[:adapter_start] #If it is a 3' adapter, you remove the adapter at the end
59 elif removeadapters == "False": #if the value is false, it does not remove the adapters from the sequences.
60 record = record
61 SeqIO.write(record, output_fh, format="fastq") #No matter what, write the reads.
62 except IndexError:
63 count_adapter_not_found+=1
64 total_seq_count+=1
65 record = record
66 if keepreads=="True":
67 SeqIO.write(record, output_fh, format="fastq")
68 elif keepreads=="False":
69 pass
70 else:
71 pass
72
73 output_fh.close()
74
75 percent_cut = 100*(float(count_adapter_found)/float(total_seq_count))
76
77
78 output_text_fh.write("The total number of sequences that were analyzed was %i.\n\n"%total_seq_count)
79 output_text_fh.write("Adapter was found and removed for %i sequences (%i%% of total).\n\n"%(count_adapter_found, percent_cut))
80
81 if keepreads =="True":
82 output_text_fh.write("Sequences that did not contain the adapter were kept.\n\n")
83 elif keepreads=="False":
84 output_text_fh.write("Sequences that did not contain the adapter were removed from the dataset.\n\n")
85 if removeadapters=="True":
86 output_text_fh.write("The adapters were removed from the dataset.\n\n")
87 elif removeadapters=="False":
88 output_text_fh.write("The adapters were not removed from the dataset.\n\n")
89 if end_defn=="5":
90 output_text_fh.write("Adapters were removed from the 5\' end.\n\n")
91 elif end_defn=="3":
92 output_text_fh.write("Adapters were removed from the 3\'end.\n\n")
93
94 output_text_fh.write("The name of the adapter that was removed was named %s, and had the sequence %s.\n\n"%(adapter_name,adapter))
95 output_text_fh.close()