# HG changeset patch # User public-health-bioinformatics # Date 1562283398 14400 # Node ID a1b46e339580b7a9fbd03fc872513bae717fd906 planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit b96b6e06f6eaa6ae8ef4c24630dbb72a4aed7dbe diff -r 000000000000 -r a1b46e339580 antigenic_site_extraction.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/antigenic_site_extraction.py Thu Jul 04 19:36:38 2019 -0400 @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +'''Accepts fasta files of amino acid sequence, extracts specific amino acids (defined in a csv index array), +and outputs extracted sequences - representing flu antigenic sites - to fasta (default) or csv.''' + +'''Author: Diane Eisler, Molecular Microbiology & Genomics, BCCDC Public Health Laboratory,Sept 2017''' + +import sys,string,os, time, Bio, argparse +from Bio import Seq, SeqIO, SeqUtils, Alphabet, SeqRecord +from Bio.SeqRecord import SeqRecord +from Bio.Alphabet import IUPAC +from Bio.Seq import Seq + +#parse command line arguments +parser = argparse.ArgumentParser() +parser.add_argument("-c","--csv",help="export extracted antigenic sites to csv file",action="store_true") +parser.add_argument("inFileHandle1") #batch fasta file with sequences to be parsed +parser.add_argument("inFileHandle2") # .csv file containing positions of aa's to extract +parser.add_argument("outFileHandle") #user-specified name for output file of extracted aa seq's +args = parser.parse_args() + +#inFileHandle1 = sys.argv[1] #batch fasta file with sequences to be parsed +#inFileHandle2 = sys.argv[2] # .csv file containing positions of aa's to extract +#outFileHandle = sys.argv[3] #user-specified name for output file of extracted aa seq's + +outFile= open(args.outFileHandle,'w') #open a writable, appendable output file +localtime = time.asctime(time.localtime(time.time())) #date and time of analysis +seqList = [] #list of aa sequence objects to parse for oligo sequences +indexArray = [] # .csv list of aa's corresponding to antigenic site positions +extractedSeqList = [] #list of extracted antigenic sites extracted from seqList + +def extract_aa_from_sequence(record): + """Extract specific amino acids from SeqRecord, create new SeqRecord and append to list.""" + original_sequence = str(record.seq) #pull out the SeqRecord's Seq object and ToString it + new_sequence = "" #set variable to empty + new_id = record.id #store the same sequence id as the original sequence + #iterate over each position in index array, extract corresponding aa and add to string + for pos in indexArray: + char = original_sequence[pos-1] #aa positions must be zero indexed + new_sequence = new_sequence + char + rec = SeqRecord(Seq(new_sequence,IUPAC.protein), id = record.id, name = "", description = "") + extractedSeqList.append(rec) #add new SeqRecord object to the list + +with open (args.inFileHandle2,'r') as inFile2: + '''Open csv file containing amino acid positions to extract and add to list.''' + #read items separated by comma's to position list + positionList = "" + for line in inFile2: + #remove whitespace from the end of each line + strippedLine = line.rstrip() + #split the line at commas and assigned the returned list as indexArray + positionList = strippedLine.split(',') + #Convert string items in positionList from strings to int and add to indexArray + for item in positionList: + indexArray.append(int(item)) + #print number of amino acids to extract and array to console as user check + print("Amino Acid positions to extract: %i " %(len(indexArray))) + print(indexArray) + +with open(args.inFileHandle1,'r') as inFile: + '''Open fasta of amino acid sequences to parse, uppercase and add to protein Sequence list.''' + #read in Sequences from fasta file, uppercase and add to seqList + for record in SeqIO.parse(inFile, "fasta", alphabet=IUPAC.protein): + record = record.upper() + seqList.append(record) #add Seq to list of Sequences + #print number of sequences to be process as user check + print("\n%i flu sequences will be extracted for antigenic sites..." % len(seqList)) + #parse each target sequence object + for record in seqList: + extract_aa_from_sequence(record) + +#print original and extracted sequence +for x in range(0, len(seqList)): + print("Original %s: %i amino acids,\tExtracted: %i" % (seqList[x].id,len(seqList[x]),len(extractedSeqList[x]))) + +#determine if output format is fasta (default) or csv +if args.csv: + #write csv file of extracted antigenic sits + for record in extractedSeqList: + #outFile.write(record.id),"," + name_part = (record.id).rstrip() + ',' + sequence = str(record.seq).strip() + csv_seq = ",".join(sequence) + comma_separated_sequence = name_part + csv_seq + "\n" + print(comma_separated_sequence) + outFile.write(comma_separated_sequence) +else: + #write fasta file of extracted antigenic sites + SeqIO.write(extractedSeqList,outFile,"fasta") + +print("\n%i Sequences Extracted to Output file: %s" % ((len(extractedSeqList),args.outFileHandle))) +inFile.close() +inFile2.close() +outFile.close() + diff -r 000000000000 -r a1b46e339580 antigenic_site_extraction.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/antigenic_site_extraction.xml Thu Jul 04 19:36:38 2019 -0400 @@ -0,0 +1,38 @@ + + + biopython + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r a1b46e339580 test-data/14_H3_aa_seqs_aligned.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/14_H3_aa_seqs_aligned.fasta Thu Jul 04 19:36:38 2019 -0400 @@ -0,0 +1,28 @@ +>Seq1(3C.2a.3) +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVKQNGTSSACIRKSSSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIQSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW +>Seq2(3C.2a.4) +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFKDESFNWTGVTQNGTSSACIRRSKSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNIIAPRGYFKIRNGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW +>Seq3(3C.2a.3) +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICNSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVKQNGTSSACIRKSSSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQISLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIQSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW +>Seq4(3C.2a.2) +QKIPGNDNSMATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVTQNGTSSACMRRSSSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAKSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIQSGKSSIMRSNAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMMDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYDAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFHIKGVELKSGYKDW +>Seq5(3C.2a.3) +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVKQNGTSSACIRKSSSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQISLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIQSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW +>Seq5(3C.2a.4) +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASFGTLEFKNESFNWTGVTQNGTSSACIRRSKSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGYRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW +>Seq6(3C.3a) +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERNKAYSSCYPYDVPDYASLRSLVASSGTLEFNNESFNWAGVTQNGTSSSCIRGSKSSFFSRLNWLTHLNSKYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQISLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKQSTLKLATGMRNVPERQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW +>Seq7(3C.3a) +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERNKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWAGVTQNGTSSSCIRGSKSSFFSRLNWLTHLNSKYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQISLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKQSTLKLATGMRNVPERQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHSVYRDEALNNRFQIKGVELKSGYKDW +>Seq8(3C.2a.1) +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFKNESFNWTGVTQNGKSSACIRRSSSSFFSRLNWLTHLNYTYPALNVTMPNKEQFDKLYIWGVHHPGTDKDQIFLYAQPSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRVQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIESIRNETYDHNVYRDEALNNRFQIKGVELKSGYKDW +>Clade_3C.2a_A/Hong_Kong/5738/2014 +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVTQNGTSSACIRRSSSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKH?TLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW +>Clade_3C.3a_A/Switzerland/9715293/2013 +QKLPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWAGVTQNGTSSSCIRGSNSSFFSRLNWLTHLNSKYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKQSTLKLATGMRNVPERQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHDVYRDEALNNRFQIKGVELKSGYKDW +>Seq9(3C.2a.1) +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVTQNGTSSACIRRSSSSFFSRLNWLTHLNYTYPALNVTMPNKEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRVQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNETYDHNVYRDEALNNRFQIKGVELKSGYKDW +>Seq10(3C.2a.1) +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFKNESFNWTGVTQNGKSSACIRRSSSSFFSRLNWLTHLNYTYPALNVTMPNKEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRVQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIESIRNETYDHNVYRDEALNNRFQIKGVELKSGYKDW +>Clade_3C.2a.1_A/Bolzano/7/2016 +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVTQNGTSSACIRRSSSSFFSRLNWLTHLNYTYPALNVTMPNKEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQARGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRVQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNETYDHNVYRDEALNNRFQIKGVELKSGYKDW diff -r 000000000000 -r a1b46e339580 test-data/FluA_H1_antigenic_aa_indices.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/FluA_H1_antigenic_aa_indices.csv Thu Jul 04 19:36:38 2019 -0400 @@ -0,0 +1,1 @@ +70,71,72,73,74,75,124,125,137,138,139,140,141,142,153,154,155,156,157,159,160,161,162,163,164,166,167,168,169,170,184,185,186,187,188,189,190,191,192,193,194,195,203,204,205,221,222,235,236,237 diff -r 000000000000 -r a1b46e339580 test-data/FluA_H3_antigenic_aa_indices.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/FluA_H3_antigenic_aa_indices.csv Thu Jul 04 19:36:38 2019 -0400 @@ -0,0 +1,1 @@ +44,45,46,47,48,50,51,53,54,57,59,62,63,67,75,78,80,81,82,83,86,87,88,91,92,94,96,102,103,109,117,121,122,124,126,128,129,130,131,132,133,135,137,138,140,142,143,144,145,146,150,152,155,156,157,158,159,160,163,164,165,167,168,170,171,172,173,174,175,176,177,179,182,186,187,188,189,190,192,193,194,196,197,198,201,203,207,208,209,212,213,214,215,216,217,218,219,226,227,228,229,230,238,240,242,244,246,247,248,260,261,262,265,273,275,276,278,279,280,294,297,299,300,304,305,307,308,309,310,311,312 diff -r 000000000000 -r a1b46e339580 test-data/FluB_Victoria_antigenic_aa_indices.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/FluB_Victoria_antigenic_aa_indices.csv Thu Jul 04 19:36:38 2019 -0400 @@ -0,0 +1,1 @@ +73,74,75,76,77,78,79,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,141,142,143,144,145,146,147,148,149,150,162,163,164,165,166,167,168,169,170,197,198,199,200,201,202,203,204,205,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244 diff -r 000000000000 -r a1b46e339580 test-data/FluB_Yamagata_antigenic_aa_indices.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/FluB_Yamagata_antigenic_aa_indices.csv Thu Jul 04 19:36:38 2019 -0400 @@ -0,0 +1,1 @@ +73,74,75,76,77,78,79,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,141,142,143,144,145,146,147,148,149,150,162,163,164,165,166,167,168,169,196,197,198,199,200,201,202,203,204,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243 diff -r 000000000000 -r a1b46e339580 test-data/output.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fasta Thu Jul 04 19:36:38 2019 -0400 @@ -0,0 +1,56 @@ +>Seq1(3C.2a.3) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVKQNTSAIKSSSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF +VRIACRYVKHS +>Seq2(3C.2a.4) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKDSNTGVTQNTSAIRSKSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRNSPGKKSEF +VRIACRYVKHS +>Seq3(3C.2a.3) +QNSSIEINSQLENIQGQNKKLFVSKYNVPRTNNSNTGVKQNTSAIKSSSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF +VRIACRYVKHS +>Seq4(3C.2a.2) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAMRSSSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDIFLAKSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF +VRIACRYVKHS +>Seq5(3C.2a.3) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVKQNTSAIKSSSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF +VRIACRYVKHS +>Seq5(3C.2a.4) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKNSNTGVTQNTSAIRSKSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGYIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKHS +>Seq6(3C.3a) +QNSSIEIDSQLENIQGQNKKLFVNKYSVPRTNNSNAGVTQNTSSIGSKSSRNTHLNSKAL +NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKQS +>Seq7(3C.3a) +QNSSIEIDSQLENIQGQNKKLFVNKYNVPRTNNSNAGVTQNTSSIGSKSSRNTHLNSKAL +NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKQS +>Seq8(3C.2a.1) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKNSNTGVTQNKSAIRSSSSRNTHLNYTAL +NTMNKEQFDKLIVGTDKDIFLAQPRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKHS +>Clade_3C.2a_A/Hong_Kong/5738/2014 +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAIRSSSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKH? +>Clade_3C.3a_A/Switzerland/9715293/2013 +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNAGVTQNTSSIGSNSSRNTHLNSKAL +NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKQS +>Seq9(3C.2a.1) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAIRSSSSRNTHLNYTAL +NTMNKEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKHS +>Seq10(3C.2a.1) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKNSNTGVTQNKSAIRSSSSRNTHLNYTAL +NTMNKEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKHS +>Clade_3C.2a.1_A/Bolzano/7/2016 +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAIRSSSSRNTHLNYTAL +NTMNKEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKHS