Mercurial > repos > davidvanzessen > mutation_analysis
comparison summary_to_fasta.py @ 0:8a5a2abbb870 draft default tip
Uploaded
| author | davidvanzessen | 
|---|---|
| date | Mon, 29 Aug 2016 05:36:10 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:8a5a2abbb870 | 
|---|---|
| 1 import argparse | |
| 2 | |
| 3 parser = argparse.ArgumentParser() | |
| 4 parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file") | |
| 5 parser.add_argument("--fasta", help="The output fasta file") | |
| 6 | |
| 7 args = parser.parse_args() | |
| 8 | |
| 9 infile = args.input | |
| 10 fasta = args.fasta | |
| 11 | |
| 12 with open(infile, 'r') as i, open(fasta, 'w') as o: | |
| 13 first = True | |
| 14 id_col = 0 | |
| 15 seq_col = 0 | |
| 16 no_results = 0 | |
| 17 no_seqs = 0 | |
| 18 passed = 0 | |
| 19 for line in i: | |
| 20 splt = line.split("\t") | |
| 21 if first: | |
| 22 id_col = splt.index("Sequence ID") | |
| 23 seq_col = splt.index("Sequence") | |
| 24 first = False | |
| 25 continue | |
| 26 if len(splt) < 5: | |
| 27 no_results += 1 | |
| 28 continue | |
| 29 | |
| 30 ID = splt[id_col] | |
| 31 seq = splt[seq_col] | |
| 32 | |
| 33 if not len(seq) > 0: | |
| 34 no_seqs += 1 | |
| 35 continue | |
| 36 | |
| 37 o.write(">" + ID + "\n" + seq + "\n") | |
| 38 passed += 1 | |
| 39 | |
| 40 print "No results:", no_results | |
| 41 print "No sequences:", no_seqs | |
| 42 print "Written to fasta file:", passed | 
