changeset 0:a1b46e339580 draft default tip

planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit b96b6e06f6eaa6ae8ef4c24630dbb72a4aed7dbe
author public-health-bioinformatics
date Thu, 04 Jul 2019 19:36:38 -0400
parents
children
files antigenic_site_extraction.py antigenic_site_extraction.xml test-data/14_H3_aa_seqs_aligned.fasta test-data/FluA_H1_antigenic_aa_indices.csv test-data/FluA_H3_antigenic_aa_indices.csv test-data/FluB_Victoria_antigenic_aa_indices.csv test-data/FluB_Yamagata_antigenic_aa_indices.csv test-data/output.fasta
diffstat 8 files changed, 221 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/antigenic_site_extraction.py	Thu Jul 04 19:36:38 2019 -0400
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+'''Accepts fasta files of amino acid sequence, extracts specific amino acids (defined in a csv index array),
+and outputs extracted sequences - representing flu antigenic sites - to fasta (default) or csv.'''
+
+'''Author: Diane Eisler, Molecular Microbiology & Genomics, BCCDC Public Health Laboratory,Sept 2017'''
+
+import sys,string,os, time, Bio, argparse
+from Bio import Seq, SeqIO, SeqUtils, Alphabet, SeqRecord
+from Bio.SeqRecord import SeqRecord
+from Bio.Alphabet import IUPAC
+from Bio.Seq import Seq
+
+#parse command line arguments
+parser = argparse.ArgumentParser()
+parser.add_argument("-c","--csv",help="export extracted antigenic sites to csv file",action="store_true")
+parser.add_argument("inFileHandle1") #batch fasta file with sequences to be parsed
+parser.add_argument("inFileHandle2") # .csv file containing positions of aa's to extract
+parser.add_argument("outFileHandle") #user-specified name for output file of extracted aa seq's
+args = parser.parse_args()
+
+#inFileHandle1 = sys.argv[1] #batch fasta file with sequences to be parsed
+#inFileHandle2 = sys.argv[2] # .csv file containing positions of aa's to extract
+#outFileHandle = sys.argv[3] #user-specified name for output file of extracted aa seq's
+
+outFile= open(args.outFileHandle,'w') #open a writable, appendable output file
+localtime = time.asctime(time.localtime(time.time())) #date and time of analysis
+seqList = [] #list of aa sequence objects to parse for oligo sequences
+indexArray = [] # .csv list of aa's corresponding to antigenic site positions
+extractedSeqList = [] #list of extracted antigenic sites extracted from seqList
+
+def extract_aa_from_sequence(record):
+    """Extract specific amino acids from SeqRecord, create new SeqRecord and append to list."""
+    original_sequence = str(record.seq) #pull out the SeqRecord's Seq object and ToString it
+    new_sequence = "" #set variable to empty
+    new_id = record.id #store the same sequence id as the original sequence
+    #iterate over each position in index array, extract corresponding aa and add to string
+    for pos in indexArray:
+        char = original_sequence[pos-1] #aa positions must be zero indexed
+        new_sequence = new_sequence + char
+    rec = SeqRecord(Seq(new_sequence,IUPAC.protein), id = record.id, name = "", description = "")
+    extractedSeqList.append(rec) #add new SeqRecord object to the list
+
+with open (args.inFileHandle2,'r') as inFile2:
+    '''Open csv file containing amino acid positions to extract and add to list.'''
+    #read items separated by comma's to position list
+    positionList = ""   
+    for line in inFile2:
+        #remove whitespace from the end of each line
+        strippedLine = line.rstrip()
+        #split the line at commas and assigned the returned list as indexArray
+        positionList = strippedLine.split(',')
+    #Convert string items in positionList from strings to int and add to indexArray
+    for item in positionList:
+        indexArray.append(int(item))
+    #print number of amino acids to extract and array to console as user check
+    print("Amino Acid positions to extract: %i " %(len(indexArray)))
+    print(indexArray)
+
+with open(args.inFileHandle1,'r') as inFile:
+    '''Open fasta of amino acid sequences to parse, uppercase and add to protein Sequence list.'''
+    #read in Sequences from fasta file, uppercase and add to seqList
+    for record in SeqIO.parse(inFile, "fasta", alphabet=IUPAC.protein):
+        record = record.upper()
+        seqList.append(record) #add Seq to list of Sequences
+    #print number of sequences to be process as user check
+    print("\n%i flu sequences will be extracted for antigenic sites..." % len(seqList))
+    #parse each target sequence object
+    for record in seqList:
+        extract_aa_from_sequence(record)
+
+#print original and extracted sequence
+for x in range(0, len(seqList)):
+    print("Original %s: %i amino acids,\tExtracted: %i" % (seqList[x].id,len(seqList[x]),len(extractedSeqList[x])))
+
+#determine if output format is fasta (default) or csv
+if args.csv:
+    #write csv file of extracted antigenic sits
+    for record in extractedSeqList:
+        #outFile.write(record.id),","
+        name_part = (record.id).rstrip() + ','
+        sequence = str(record.seq).strip()
+        csv_seq = ",".join(sequence)
+        comma_separated_sequence = name_part + csv_seq + "\n"
+        print(comma_separated_sequence)
+        outFile.write(comma_separated_sequence)
+else:
+    #write fasta file of extracted antigenic sites
+    SeqIO.write(extractedSeqList,outFile,"fasta")
+
+print("\n%i Sequences Extracted to Output file: %s"  % ((len(extractedSeqList),args.outFileHandle)))
+inFile.close()
+inFile2.close()
+outFile.close()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/antigenic_site_extraction.xml	Thu Jul 04 19:36:38 2019 -0400
@@ -0,0 +1,38 @@
+<tool id="antigenic_site_extraction" name="Antigenic Site Extraction" version="0.0.1">
+  <requirements>
+    <requirement type="package" version="1.70">biopython</requirement>
+  </requirements>
+  <command detect_errors="exit_code"><![CDATA[
+    python $__tool_directory__/antigenic_site_extraction.py
+    '$input_fasta'
+    '$index_array'
+    '$output_file'
+    #if $csv
+    	-c
+    #end if
+  ]]></command>
+  <inputs>
+    <param name="input_fasta" format="fasta" type="data" />
+    <param name="index_array" format="csv" type="data" />
+    <param name="csv" type="boolean" label="Output to csv ?" />
+  </inputs>
+  <outputs>
+      <data format="fasta" name="output_file">
+        <change_format>
+            <when input="csv" value="true" format="csv" />
+        </change_format>
+      </data>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input_fasta" value="14_H3_aa_seqs_aligned.fasta" />
+      <param name="index_array" value="FluA_H3_antigenic_aa_indices.csv" />
+      <output name="output_file" value="output.fasta" />
+    </test>
+  </tests>
+  <help><![CDATA[
+    Upload a fasta file containing full length flu sequences and an index array csv file.
+  ]]></help>
+  <citations>
+  </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/14_H3_aa_seqs_aligned.fasta	Thu Jul 04 19:36:38 2019 -0400
@@ -0,0 +1,28 @@
+>Seq1(3C.2a.3)
+QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVKQNGTSSACIRKSSSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIQSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW
+>Seq2(3C.2a.4)
+QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFKDESFNWTGVTQNGTSSACIRRSKSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNIIAPRGYFKIRNGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW
+>Seq3(3C.2a.3)
+QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICNSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVKQNGTSSACIRKSSSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQISLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIQSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW
+>Seq4(3C.2a.2)
+QKIPGNDNSMATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVTQNGTSSACMRRSSSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAKSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIQSGKSSIMRSNAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMMDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYDAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFHIKGVELKSGYKDW
+>Seq5(3C.2a.3)
+QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVKQNGTSSACIRKSSSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQISLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIQSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW
+>Seq5(3C.2a.4)
+QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASFGTLEFKNESFNWTGVTQNGTSSACIRRSKSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGYRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW
+>Seq6(3C.3a)
+QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERNKAYSSCYPYDVPDYASLRSLVASSGTLEFNNESFNWAGVTQNGTSSSCIRGSKSSFFSRLNWLTHLNSKYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQISLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKQSTLKLATGMRNVPERQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW
+>Seq7(3C.3a)
+QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERNKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWAGVTQNGTSSSCIRGSKSSFFSRLNWLTHLNSKYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQISLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKQSTLKLATGMRNVPERQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHSVYRDEALNNRFQIKGVELKSGYKDW
+>Seq8(3C.2a.1)
+QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFKNESFNWTGVTQNGKSSACIRRSSSSFFSRLNWLTHLNYTYPALNVTMPNKEQFDKLYIWGVHHPGTDKDQIFLYAQPSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRVQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIESIRNETYDHNVYRDEALNNRFQIKGVELKSGYKDW
+>Clade_3C.2a_A/Hong_Kong/5738/2014
+QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVTQNGTSSACIRRSSSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKH?TLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW
+>Clade_3C.3a_A/Switzerland/9715293/2013
+QKLPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWAGVTQNGTSSSCIRGSNSSFFSRLNWLTHLNSKYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKQSTLKLATGMRNVPERQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHDVYRDEALNNRFQIKGVELKSGYKDW
+>Seq9(3C.2a.1)
+QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVTQNGTSSACIRRSSSSFFSRLNWLTHLNYTYPALNVTMPNKEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRVQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNETYDHNVYRDEALNNRFQIKGVELKSGYKDW
+>Seq10(3C.2a.1)
+QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFKNESFNWTGVTQNGKSSACIRRSSSSFFSRLNWLTHLNYTYPALNVTMPNKEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRVQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIESIRNETYDHNVYRDEALNNRFQIKGVELKSGYKDW
+>Clade_3C.2a.1_A/Bolzano/7/2016
+QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVTQNGTSSACIRRSSSSFFSRLNWLTHLNYTYPALNVTMPNKEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQARGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRVQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNETYDHNVYRDEALNNRFQIKGVELKSGYKDW
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FluA_H1_antigenic_aa_indices.csv	Thu Jul 04 19:36:38 2019 -0400
@@ -0,0 +1,1 @@
+70,71,72,73,74,75,124,125,137,138,139,140,141,142,153,154,155,156,157,159,160,161,162,163,164,166,167,168,169,170,184,185,186,187,188,189,190,191,192,193,194,195,203,204,205,221,222,235,236,237
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FluA_H3_antigenic_aa_indices.csv	Thu Jul 04 19:36:38 2019 -0400
@@ -0,0 +1,1 @@
+44,45,46,47,48,50,51,53,54,57,59,62,63,67,75,78,80,81,82,83,86,87,88,91,92,94,96,102,103,109,117,121,122,124,126,128,129,130,131,132,133,135,137,138,140,142,143,144,145,146,150,152,155,156,157,158,159,160,163,164,165,167,168,170,171,172,173,174,175,176,177,179,182,186,187,188,189,190,192,193,194,196,197,198,201,203,207,208,209,212,213,214,215,216,217,218,219,226,227,228,229,230,238,240,242,244,246,247,248,260,261,262,265,273,275,276,278,279,280,294,297,299,300,304,305,307,308,309,310,311,312
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FluB_Victoria_antigenic_aa_indices.csv	Thu Jul 04 19:36:38 2019 -0400
@@ -0,0 +1,1 @@
+73,74,75,76,77,78,79,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,141,142,143,144,145,146,147,148,149,150,162,163,164,165,166,167,168,169,170,197,198,199,200,201,202,203,204,205,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FluB_Yamagata_antigenic_aa_indices.csv	Thu Jul 04 19:36:38 2019 -0400
@@ -0,0 +1,1 @@
+73,74,75,76,77,78,79,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,141,142,143,144,145,146,147,148,149,150,162,163,164,165,166,167,168,169,196,197,198,199,200,201,202,203,204,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.fasta	Thu Jul 04 19:36:38 2019 -0400
@@ -0,0 +1,56 @@
+>Seq1(3C.2a.3)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVKQNTSAIKSSSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF
+VRIACRYVKHS
+>Seq2(3C.2a.4)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKDSNTGVTQNTSAIRSKSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRNSPGKKSEF
+VRIACRYVKHS
+>Seq3(3C.2a.3)
+QNSSIEINSQLENIQGQNKKLFVSKYNVPRTNNSNTGVKQNTSAIKSSSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF
+VRIACRYVKHS
+>Seq4(3C.2a.2)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAMRSSSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDIFLAKSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF
+VRIACRYVKHS
+>Seq5(3C.2a.3)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVKQNTSAIKSSSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF
+VRIACRYVKHS
+>Seq5(3C.2a.4)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKNSNTGVTQNTSAIRSKSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGYIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKHS
+>Seq6(3C.3a)
+QNSSIEIDSQLENIQGQNKKLFVNKYSVPRTNNSNAGVTQNTSSIGSKSSRNTHLNSKAL
+NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKQS
+>Seq7(3C.3a)
+QNSSIEIDSQLENIQGQNKKLFVNKYNVPRTNNSNAGVTQNTSSIGSKSSRNTHLNSKAL
+NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKQS
+>Seq8(3C.2a.1)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKNSNTGVTQNKSAIRSSSSRNTHLNYTAL
+NTMNKEQFDKLIVGTDKDIFLAQPRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKHS
+>Clade_3C.2a_A/Hong_Kong/5738/2014
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAIRSSSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKH?
+>Clade_3C.3a_A/Switzerland/9715293/2013
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNAGVTQNTSSIGSNSSRNTHLNSKAL
+NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKQS
+>Seq9(3C.2a.1)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAIRSSSSRNTHLNYTAL
+NTMNKEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKHS
+>Seq10(3C.2a.1)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKNSNTGVTQNKSAIRSSSSRNTHLNYTAL
+NTMNKEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKHS
+>Clade_3C.2a.1_A/Bolzano/7/2016
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAIRSSSSRNTHLNYTAL
+NTMNKEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKHS