changeset 1:d4075a2fc42b draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 3c91f421d26c8f42cf2671e47db735d2cf69dde8"
author jay
date Tue, 29 Dec 2020 04:29:54 +0000
parents 93f7668caa55
children cda6264267f7
files PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py PDAUG_Peptide_Data_Access/test-data/Out.tsv PDAUG_Peptide_Data_Access/test-data/out.tsv PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py readme.md readme.txt
diffstat 5 files changed, 461 insertions(+), 436 deletions(-) [+]
line wrap: on
line diff
--- a/PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py	Wed Oct 28 02:18:30 2020 +0000
+++ b/PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py	Tue Dec 29 04:29:54 2020 +0000
@@ -10,13 +10,13 @@
 
 def DataGen(DataBaseType, OutFile, IDs):
 
-    if DataBaseType == 'AMPvsTM':
+    if DataBaseType == 'AMPvsTMP':
         data = load_AMPvsTM()
 
     elif DataBaseType == 'AMPvsUniProt':
         data = load_AMPvsUniProt()
 
-    elif DataBaseType == 'ACPvsTM':
+    elif DataBaseType == 'ACPvsTMP':
         data = load_ACPvsTM()
 
     elif DataBaseType == 'ACPvsRandom':
@@ -39,16 +39,11 @@
         print ("Enter Correct Values")
         exit()
 
-    Target = data.target.tolist()
-    Target_list = set(Target)
-    df = data.sequences
-
-
-    Target = pd.DataFrame(Target, columns=['Target'])
-    df = pd.DataFrame(df, columns=['Peptide'])
-    
-    df = pd.DataFrame(df)
-    df = pd.concat([df, Target], axis=1)
+    peptide_data = data.sequences
+    class_label = int(len(peptide_data)/2)*[data.target_names[0]]+int(len(peptide_data)/2)*[data.target_names[1]]
+    peptide_data = pd.DataFrame(peptide_data, columns=['name'])
+    class_label = pd.DataFrame(class_label, columns=['class_label'])
+    df = pd.concat([peptide_data,class_label], axis=1)
 
     df.to_csv(OutFile, index=False, sep='\t')
 
@@ -69,9 +64,9 @@
                         help="Out put file name for str descriptors")   
 
     parser.add_argument("-L", "--List",
-    					required=False,
-    					default=None,
-    					help="List of integer as ID")
+                        required=False,
+                        default=None,
+                        help="List of integer as ID")
 
     args = parser.parse_args()
     DataGen(args.DataBaseType, args.OutFile, args.List)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PDAUG_Peptide_Data_Access/test-data/Out.tsv	Tue Dec 29 04:29:54 2020 +0000
@@ -0,0 +1,413 @@
+name	class_label
+AAGAATVLLVIVLLAGSYLAVLA	TM
+LWIVIACLACVGSAAALTLRA	TM
+FYRFYMLREGTAVPAVWFSIELIFGLFA	TM
+GTLELGVDYGRAN	TM
+KLFWRAVVAEFLATTLFVFISIGSALGFK	TM
+HGSIGAGVDW	TM
+ATIYLVCFCFFKQLAMIFMSVLAGNMYE	TM
+GKLSLAATRSSE	TM
+TSRLLLAGVALGIICSALMTWAIYF	TM
+LGFFLVTFGFIWGMMLL	TM
+YMLFTMIFVISSIIITVVVI	TM
+ACFSAKVN	TM
+LIWVAATLAGAIIAVLLVIYA	TM
+HIPFAFAFAILAYLTLVLFRPVM	TM
+CGLLVLLTLLLMGAIVTLGVF	TM
+TGVYILVGVVLWTA	TM
+LSMFIITPVMVLGTIFIFVMG	TM
+MTLVALLVIGVSLTGYLGLKA	TM
+QLYYQVLNFGMIVSSALMIW	TM
+ALGALCLLLSVGSATACLLLGA	TM
+LAVIFFFASALLYSQAAT	TM
+MGHQLLVQLESIAITIVWSGVVAFIGYKLADLT	TM
+ADILILSLLVIQCALGLLTIPFSA	TM
+RHQQATFAGFIKGATWVSILSIAVLVFLALAN	TM
+GTKWWTVGIRPMYKW	TM
+PAIRVFATYAKWDEK	TM
+PHVFLLFITFPILFIGWGSQS	TM
+AGLVLWGAIIFVGWNALLLLFFW	TM
+TQIVLLGLVTAALWAGLLTLLLLWHW	TM
+SFIGRVFLFLMIVLPLWCGLHRMHHAMHD	TM
+MYLGAGIALIPVIMSINYL	TM
+FQTGFDFSD	TM
+LISGTLGIICLSLMATLGILL	TM
+FPFLNNLSFWFTVVGVILVNVSLG	TM
+LDIYTRLGGMVWR	TM
+FLIFLLVIMTVITVALLTLLF	TM
+KDIGILYLFTAGIVGLISVCFTVYMRMELQH	TM
+PLFYIINILVPCVLISFMINLVFYL	TM
+HKALKTLGIIMGVFTLCWLPFFLVNIVNVF	TM
+ANLKKLKTLMSAFLIVLGLLTFGA	TM
+IWVGIFLLAALLAALFVCLKA	TM
+KGAAGITGAGFITLAATLS	TM
+VVLLAIVTLISVVQNGFFAHKV	TM
+IAIDVLAVLVPLAAFLLDGSPDWSLYCAVWLLKPLRDS	TM
+ALTVGTLLFLTGIGAASWAIV	TM
+ITLIIFGVMAGVIGTILLISYGI	TM
+WVWISLYYVAFYVVMSGIFAL	TM
+GSNVALHVN	TM
+GVLELGVDYGRAN	TM
+IYLLILIILSMLCLVYASVPL	TM
+LCSFLFVIVLFVATFYTL	TM
+PLYFIVNVVIPCLLFSFLTGLVFYL	TM
+ALLIAGGVGLLALAAALVLNA	TM
+IWTWLRTTL	TM
+PAHMIAISFFFTNALALALHGALVLSAA	TM
+GGSLYIVGIFLPIWTFMIYIF	TM
+GLTLGTGGR	TM
+STVTGGYAQS	TM
+DCFLLLVLLLYAPVGFCLLVL	TM
+FWRAVVAEFLAMILFIFISIGSALG	TM
+DPVLYQHILWFFGHPEVYIIILPGFGIISHVISTFAK	TM
+LTFTEKWN	TM
+IRDTLMRLVLTVRSN	TM
+GKLSLAATRSTE	TM
+GYNKFVVQYATDA	TM
+QFHTMLMIAASGAVLIALGILCLVIQMYVSIR	TM
+LILVLFVVLVSSVGVSLTLYA	TM
+MASLWADYT	TM
+DAWSGFVRGYGYDNRTN	TM
+RWLWDFVNA	TM
+MFVALLGLGLGQVVCSVALFF	TM
+NMSTYVDYII	TM
+IWVKMTFIVVLGLCFVFFWSF	TM
+SLLISVVLVAYYLYIPLPDAI	TM
+RKTTINGLIVAVILGVCFTGLQAYEYSHA	TM
+IPVQLLWVNLVTDGLPATALG	TM
+LAVIAGGVGFIMVAVLLVLFA	TM
+MNKRNIMNLILAGGAGLPITTLALGYGAFF	TM
+IALIVAGLSALGIATALVLNA	TM
+NGNMWRILDHGAISL	TM
+VAIICAAVVAVGLIVGLSVGL	TM
+FSFLIVAGATTLFCLLHFGVI	TM
+VGISIATIVAIIAAIYYVPW	TM
+PLFAWSVFITAWLILLSLPVLAGAITMLLMDRNF	TM
+LILIGAGLGVLALAAGLILTA	TM
+LGLAAGAIYYYNTSNVFA	TM
+QLGAGAFGGYQV	TM
+IAIALLVILVVCSLITMSVIL	TM
+LAFLIGGIIGGLLLLIGVSCCLW	TM
+NGHMLRILDHGAISM	TM
+MALILGIDRFMSECRALTNF	TM
+AVIAGTTLAITALAVTS	TM
+LYIVLAILCGVSIAVALALTA	TM
+AAVTLGVLCLGLLVTVILLILQL	TM
+LWLVIGVLTAAALAVTLIALA	TM
+GTLLLLTAIGAASWAIVAVLL	TM
+LLLVASVIQGLGLLLCFTYICLHFSAL	TM
+AMIVALIVICITAVVAALV	TM
+LVFIGTCGAVLAVALGLVLWA	TM
+TRFGIAAKYQ	TM
+IPWAVLIVVLITSLIIALIAL	TM
+LWVVCAVLAGLGLTTALVLYA	TM
+FAGRVLAGAVMMSGIGIFGLWAGIL	TM
+LLIVLAGLAVVAVASGLILNA	TM
+VGSIVGGIYLGFCFNAGAPAVEAFI	TM
+MLSLGVSYR	TM
+LTKWFFCCVCTILTMPFF	TM
+LATIAASAIVLVVAVGLGLMA	TM
+PIVVTGAVY	TM
+KPHNLPMVFTGTAILYIGWFGFNAGSA	TM
+DEFGEQLSKVISLICVAVWL	TM
+HTLLTGVDF	TM
+SVELIALLAISCTFFLFMHT	TM
+LLIALLIYWTLAFITKTIKF	TM
+FKLVIFVVLGIAIASGLMLYA	TM
+CTLSISVLLAQTIFLFLIA	TM
+FVIAGGTLAIPILAFVASFLL	TM
+LLVITAIVLILSAAVGLVMYA	TM
+LALATALIGGVAAIASLLLYA	TM
+NPVIVIINLITLAAALLHTKTWFEL	TM
+FIVVAGVVILAVTIALLVYFL	TM
+IGLMCFLSIIITTVCIIMIAT	TM
+FSVDTQLQS	TM
+EVYILLNWIGYVNSGFNPLIYCRS	TM
+LTLAVALIVGVSAIASLLLYA	TM
+LYLAVVVLIGIGLTTTLVLYA	TM
+MSTAISVLLAQAVFLLLT	TM
+ALPGLMNKMEKAGCKRSVV	TM
+KQFIRYLISSNVGEVVCIFL	TM
+KYVVSSLVLVYGLIKVLTWIF	TM
+TGVSPVFAGGVEYA	TM
+IVIVLGILCFLLLLTVAVLVI	TM
+KSLGILGILLGVAALCTIVALSVV	TM
+LCLFVVTPVMVVGTAWIFL	TM
+LYLAIVVLIGVALTATLMLYA	TM
+LTILLAIAPVLALAVGLALYG	TM
+FFVLLLMILILVNLAMTIWIL	TM
+FIVLIPSVVITVIFLFFWLFM	TM
+ILVLLILAVITIFALVCVLLV	TM
+TYFIVLIPSVVITVIFLFFWLFM	TM
+FLVLFIFLTSFFLNYSHTMVA	TM
+GDKIGMFFQAMATFFGGFIIGF	TM
+SNGVIVGTCLAFVAGMIGMAYAA	TM
+FTFEGAARSDD	TM
+MNYMVYFNFFACVLVPLLLMLGVYL	TM
+AGLILLVVTLIGMSVLVRVLI	TM
+VFLAVYLLGGITFLPLVLFTL	TM
+LIKISALVFVTVAFFYLG	TM
+GFFGVATFFFAALGIILIAWSAVL	TM
+GQWEISVIWGLGVAMAIYLTA	TM
+LWWIQAMTGFAMFFLGSVHLYIMMT	TM
+IAVVITVVFLTLLSVVILIFF	TM
+QVVATATFR	TM
+MLLCFAFLWVLGIAYYMY	TM
+IFCIIMLFALLGFL	TM
+PLCICVAFTCLALVLVTSIVL	TM
+LIIVLAIVVGVGAAVGLALSA	TM
+ILVPCVLGLLLLPILAMLMALCV	TM
+LGLLLAALICVGIATTLVLNA	TM
+GYAAYYLVRKNFALAMPYLVE	TM
+LPRTLAVLLVGAALAISGAVMQALF	TM
+ILLFYVIFYGCLAGIFIGTIQ	TM
+IMSTLLEVGYDNVKSQ	TM
+LLAVALIIAMSISLAWQAAGW	TM
+IVGQLLFVALGITFIYYLFTP	TM
+NFWMFGLFFFFYFFIMGAYFPFFPIWL	TM
+LVLIVGIVAAVGVAAALVLNA	TM
+LTLAVALIGGVAAITSLLLYA	TM
+ASGGIILIIAAILAMIMAN	TM
+GSAGGAALAVVVLALAFGLSG	TM
+LFVLLLLAILVVNLALTIWIL	TM
+SPPLVLAALVACIIVLGFNYWIA	TM
+LLFLILGIISFITFFLQGFTF	TM
+LIVKALGILCFLLLITVAVLAV	TM
+QYIHVAFQGSFACITVGLIVGAL	TM
+QGIAVFGYSMAVSIGGILASR	TM
+DHKRLGIMYIIVAIVMLLRGFADAIMMR	TM
+IFRLHLVLGMTLFLLF	TM
+LVSAIILTSFMTGLFILSLWK	TM
+AVVGGVIAAVFITLITVVVLI	TM
+INLGCDVDFD	TM
+ALSALCLLLSVGSAAACLLLGA	TM
+FTVIAGAVIVLLLTLNSNS	TM
+LLFVSLLFCLIAQTCWLALV	TM
+MVLVALLVIGVSLAGYLGLKA	TM
+LVIPHILRLC	TM
+LIRVLLGFVILFITYILFPSI	TM
+PLFYIINILAPCVLIALMANLVFYL	TM
+HKLGLGLEFQA	TM
+ILFVAVSFIALGCVSAFVLFE	TM
+AVVSAQIAITASPIS	TM
+LTIIGGALFVLAVAAGLVLNA	TM
+GCCGLLALALCSLALSLLA	TM
+LIVLLAIVTIIAIALVAILP	TM
+TEISAGWG	TM
+FWRYFAGNLASGGAAGATSLCFVYPLDFARTRLAA	TM
+IVAALGIIGLWMFFSSNELSIAT	TM
+MTVILFVLLGISIASALVLYA	TM
+SNATIAVACLSFFVCMIGAAYAS	TM
+LKLTFDSSFS	TM
+LNVLLSAAINFFLIAFAVYFLV	TM
+HSAMLWTIGFIVTFSVGGMTGVLLAVPGADFV	TM
+AVLSAKGQY	TM
+LWELVIEQFEDLLVRILLLAA	TM
+FFIVMGLVDAIPMIAVGLGLY	TM
+LAVIAGGMGFIATAVLLVLFA	TM
+ACYCRIPACLAGERRYGTCFYMGRVWAFCC	AMP
+AGRGKQGGKVRAKAKTRSSRAGLQFPVGRVHRLLRKGNY	AMP
+ALFSILRGLKKLGNMGQAFVNCKIYKKC	AMP
+ALSILKGLEKLAKMGIALTNCKATKKC	AMP
+ALWKDILKNVGKAAGKAVLNTVTDMVNQ	AMP
+ALWKNMLKGIGKLAGQAALGAVKTLVGAES	AMP
+ALWKTLLKNVGKAAGKAALNAVTDMVNQ	AMP
+ALWKTMLKKLGTMALHAGKAAFGAAADTISQ	AMP
+ALWKTMLKKLGTMALHAGKAALGAAADTISQGTQ	AMP
+APGNKAECEREKGYCGFLKCSFPFVVSGKCSRFFFCCKNIW	AMP
+ASIIKTTIKVSKAVCKTLTCICTGSCSNCK	AMP
+ATCDLLSGTGIKHSACAAHCLLRGNRGGYCNGRAICVCRN	AMP
+ATTGCSCPQCIIFDPICASSYKNGRRGFSSGCHMRCYNRCHGTDYFQISKGSKCI	AMP
+AYPGNGVHCGKYSCTVDKQTAIGNIGNNAA	AMP
+CANSCSYGPLTWSCDGNTK	AMP
+CRQSCSFGPLTFVCDGNTK	AMP
+DDTPSSRCGSGGWGPCLPIVDLLCIVHVTVGCSGGFGCCRIG	AMP
+DFASCHTNGGICLPNRCPGHMIQIGICFRPRVKCCRSW	AMP
+DFKDWMKTAGEWLKKKGPGILKAAMAAAT	AMP
+DGVKLCDVPSGTWSGHCGSSSKCSQQCKDREHFAYGGACHYQFPSVKCFCKRQC	AMP
+DKLIGSCVWGAVNYTSDCNGECKRRGYKGGHCGSFANVNCWCET	AMP
+ELCEKASKTWSGNCGNTGHCDNQCKSWEGAAHGACHVRNGKHMCFCYFNC	AMP
+FCKSLPLPLSVK	AMP
+FFGSLLSLGSKLLPSVFKLFQRKKE	AMP
+FFGSVLKLIPKIL	AMP
+FFGWLIKGAIHAGKAIHGLIHRRRH	AMP
+FFGWLIRGAIHAGKAIHGLIHRRRH	AMP
+FFPIVAGVAGQVLKKIYCTISKKC	AMP
+FGLPMLSILPKALCILLKRKC	AMP
+FIGLLISAGKAIHDLIRRRH	AMP
+FIGPIISALASLFG	AMP
+FKLGSFLKKAWKSKLAKKLRAKGKEMLKDYAKGLLEGGSEEVPGQ	AMP
+FLGGLMKAFPAIICAVTKKC	AMP
+FLGGLMKAFPALICAVTKKC	AMP
+FLNALKNFAKTAGKRLKSLLN	AMP
+FLPAIAGMAAKFLPKIFCAISKKC	AMP
+FLPAIAGVAAKFLPKIFCAISKKC	AMP
+FLPAIVGAAAKFLPKIFCVISKKC	AMP
+FLPAIVGAAGKFLPKIFCAISKKC	AMP
+FLPAIVGAAGQFLPKIFCAISKKC	AMP
+FLPAVLRVAAKIVPTVFCAISKKC	AMP
+FLPAVLRVAAKVVPTVFCLISKKC	AMP
+FLPAVLRVAAQVVPTVFCAISKKC	AMP
+FLPFIAGMAAKFLPKIFCAISKKC	AMP
+FLPFIAGMAANFLPKIFCAISKKC	AMP
+FLPFIAGVAAKFLPKIFCAISKKC	AMP
+FLPFLATLLSKVL	AMP
+FLPGLLAGLL	AMP
+FLPIASLLGKYL	AMP
+FLPIIAGVAAKVFPKIFCAISKKC	AMP
+FLPIIASVAAKVFPKIFCAISKKC	AMP
+FLPIIASVAAKVFSKIFCAISKKC	AMP
+FLPIIASVAANVFSKIFCAISKKC	AMP
+FLPILASLAAKFGPKLFCLVTKKC	AMP
+FLPILASLAAKLGPKLFCLVTKKC	AMP
+FLPILASLAATLGPKLLCLITKKC	AMP
+FLPLFASLIGKLL	AMP
+FLPLIGKVLSGIL	AMP
+FLPLIGRVLSGIL	AMP
+FLPLLAGLAANFFPKIFCKITRKC	AMP
+FLPLLAGLAANFLPKIFCKITRKC	AMP
+FLPLLAGLAANFLPTIICKISYKC	AMP
+FLPMLAGLAASMVPKFVCLITKKC	AMP
+FLPVVAGLAAKVLPSIICAVTKKC	AMP
+FMGGLIKAATKIVPAAYCAITKKC	AMP
+FSFKRLKGFAKKLWNSKLARKIRTKGLKYVKNFAKDMLSEGEEAPPAAEPPVEAPQ	AMP
+FVPYNPPRPGQSKPFPSFPGHGPFNPKIQWPYPLPNPGH	AMP
+GAIKDALKGAAKTVAVELLKKAQCKLEKTC	AMP
+GFFSLIKGVAKIATKGLAKNLGKMGLDLVGCKISKEC	AMP
+GFFSTVKNLATNVAGTVIDTLKCKVTGGCRS	AMP
+GFGALFKFLAKKVAKTVAKQAAKQGAKYVVNKQME	AMP
+GFISTVKNLATNVAGTVIDTIKCKVTGGC	AMP
+GFKGAFKNVMFGIAKSAGKSALNALACKIDKSC	AMP
+GFLDSFKNAMIGVAKSVGKTALSTLACKIDKSC	AMP
+GFLSILKKVLPKVMAHMK	AMP
+GFLSTVKNLATNVAGTVIDTLKCKVTGGCRS	AMP
+GFMKYIGPLIPHAVKAISDLI	AMP
+GFSSIFRGVAKFASKGLGKDLARLGVNLVACKISKQC	AMP
+GFVDLAKKVVGGIRNALGI	AMP
+GIFPKIIGKGIKTGIVNGIKSLVKGVGMKVFKAGLNNIGNTGCNEDEC	AMP
+GIFPKIIGKGIKTGIVNGIKSLVKGVGMKVFKAGLSNIGNTGCNEDEC	AMP
+GIFSKFGGKAIKNLFIKGAKNIGKEVGMDVIRTGIDVAGCKIKGEC	AMP
+GIFSKLAGKKLKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC	AMP
+GIFSKLGRKKIKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC	AMP
+GIFSLIKGAAQLIGKTVAKEAGKTGLELMACKVTKQC	AMP
+GIFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC	AMP
+GILDAIKAIAKAAG	AMP
+GILDFAKTVVGGIRNALGI	AMP
+GILSLFTGGIKALGKTLFKMAGKAGAEHLACKATNQC	AMP
+GILSSIKGVAKGVAKNVAAQLLDTLKCKITGC	AMP
+GIMDSVKGLAKNLAGKLLDSLKCKITGC	AMP
+GIMDTIKDTAKTVAVGLLNKLKCKITGC	AMP
+GINTLKKVIQGLHEVIKLVSNHA	AMP
+GINTLKKVIQGLHEVIKLVSNHE	AMP
+GIPCGESCVWIPCISAALGCSCKNKVCYRN	AMP
+GKLQAFLAKMKEIAAQTL	AMP
+GKVWDWIKSAAKKIWSSEPVSQLKGQVLNAAKNYVAEKIGATPT	AMP
+GLADFLNKAVGKVVDFVKS	AMP
+GLFDVVKGVLKGVGKNVAGSLLEQLKCKLSGGC	AMP
+GLFKVLGSVAKHLLPHVAPIIAEKL	AMP
+GLFLDTLKGAAKDVAGKLLEGLKCKIAGCKP	AMP
+GLFLDTLKGLAGKLLQGLKCIKAGCKP	AMP
+GLFSILRGAAKFASKGLGKDLTKLGVDLVACKISKQC	AMP
+GLFSKFNKKKIKSGLFKIIKTAGKEAGLEALRTGIDVIGCKIKGEC	AMP
+GLFSKFNKKKIKSGLIKIIKTAGKEAGLEALRTGIDVIGCKIKGEC	AMP
+GLFSVLGSVAKHLLPHVAPIIAEKL	AMP
+GLFSVLGSVAKHLLPHVVPVIAEKL	AMP
+GLFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC	AMP
+GLLDFVTGVGKDIFAQLIKQI	AMP
+GLLDSIKGMAISAGKGALQNLLKVASCKLDKTC	AMP
+GLLDSLKNLAINAAKGAGQSVLNTLSCKLSKTC	AMP
+GLLDTIKGVAKTVAASMLDKLKCKISGC	AMP
+GLLGGLLGPLLGGGGGGGGGLL	AMP
+GLLGPLLKIAAKVGSNLL	AMP
+GLLGSIFGAGKKIACALSGLC	AMP
+GLLGSLFGAGKKVACALSGLC	AMP
+GLLKRIKTLL	AMP
+GLLSGLKKVGKHVAKNVAVSLMDSLKCKISGDC	AMP
+GLLSKVLGVGKKVLCGVSGLC	AMP
+GLLSVLGSVAKHVLPHVVPVIAEHL	AMP
+GLMSSIGKALGGLIVDVLKPKTPAS	AMP
+GLNALKKVFQGIHEAIKLINNHVQ	AMP
+GLNTLKKVFQGLHEAIKLINNHVQ	AMP
+GLWNKIKEAASKAAGKAALGFVNEMV	AMP
+GLWSKIKAAGKEAAKAAAKAAGKAALNAVSEAV	AMP
+GLWSKIKEAAKTAGLMAMGFVNDMV	AMP
+GLWSTIKQKGKEAAIAAAKAAGQAALGAL	AMP
+GPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW	AMP
+GRLQAFLAKMKEIAAQTL	AMP
+GRPNPVNNKPTPHPRL	AMP
+GRPNPVNTKPTPYPRL	AMP
+GSGRGSCRSQCMRRHEDEPWRVQECVSQCRRRRGGGD	AMP
+GSKKPVPIIYCNRRTGKCQRM	AMP
+GVLDILKNAAKNILAHAAEQI	AMP
+GVVDILKGAGKDLLAHLVGKISEKV	AMP
+GWKDWAKKAGGWLKKKGPGMAKAALKAAMQ	AMP
+GWKDWLKKGKEWLKAKGPGIVKAALQAATQ	AMP
+GWKDWLNKGKEWLKKKGPGIMKAALKAATQ	AMP
+HGVSGHGQHGVHG	AMP
+IFGAILPLALGALKNLIK	AMP
+IIEKLVNTALGLLSGL	AMP
+IIGHLIKTALGMLGL	AMP
+ILGTILGLLKGL	AMP
+ILGTILGLLKSL	AMP
+ILPLVGNLLNDLL	AMP
+ILQKAVLDCLKAAGSSLSKAAITAIYNKIT	AMP
+INWKKIAEIGKQVLSAL	AMP
+INWKKIAEVGGKILSSL	AMP
+INWLKLGKAIIDAL	AMP
+IPRPLDPCIAQNGRCFTGICRYPYFWIGTCRNGKSCCRRR	AMP
+IWLTALKFLGKHAAKHLAKQQLSKL	AMP
+KDRPKKPGLCPPRPQKPCVKECKNDDSCPGQQKCCNYGCKDECRDPIFVG	AMP
+KTCEHLADTYRGVCFTNASCDDHCKNKAHLISGTCHNWKCFCTQNC	AMP
+KTCENLADTY	AMP
+KTCENLSGTFKGPCIPDGNCNKHCRNNEHLLSGRCRDDFRCWCTNRC	AMP
+KTCMTKKEGWGRCLIDTTCAHSCRKYGYMGGKCQGITRRCYCLLNC	AMP
+KWCFRVCYRGICYRKCR	AMP
+KWCFRVCYRGICYRRCR	AMP
+KYYGNGVSCNKKGCSVDWGKAIGIIGNNSAANLATGGAAGWSK	AMP
+LCNERPSQTWSGNCGNTAHCDKQCQDWEKASHGACHKRENHWKCFCYFNC	AMP
+LFCRKGTCHFGGCPAHLVKVGSCFGFRACCKWPWDV	AMP
+LLGRCKVKSNRFHGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC	AMP
+LLGRCKVKSNRFNGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC	AMP
+LLKELWTKIKGAGKAVLGKIKGLL	AMP
+LLKELWTKMKGAGKAVLGKIKGLL	AMP
+LLPILGNLLNGLL	AMP
+LLPNLLKSLL	AMP
+LMCTHPLDCSN	AMP
+LNLKGIFKKVASLLT	AMP
+LPVNEAQCRQVGGYCGLRICNFPSRFLGLCTRNHPCCSRVWV	AMP
+MSWLNFLKYIAKYGKKAVSAAWKYKGKVLEWLNVGPTLEWVWQKLKKIAGL	AMP
+QDKCKKVYENYPVSKCQLANQCNYDCKLDKHARSGECFYDEKRNLQCICDYCEY	AMP
+QGVRNHVTCRIYGGFCVPIRCPGRTRQIGTCFGRPVKCCRRW	AMP
+QKLCERPSGTWSGVCGNNNACKNQCINLEKARHGSCNYVFPAHKCICYFPC	AMP
+QQCGRQASGRLCGNRLCCSQWGYCGSTASYCGAGCQSQCRS	AMP
+QRFIHPTYRPPPQPRRPVIMRA	AMP
+RQRVEELSKFSKKGAAARRRK	AMP
+RSGRGECRRQCLRRHEGQPWETQECMRRCRRRG	AMP
+RSVCRQIKICRRRGGCYYKCTNRPY	AMP
+SAPRGCWTKSYPPKPCK	AMP
+SCTTCVCTCSCCTT	AMP
+SFGLCRLRRGFCARGRCRFPSIPIGRCSRFVQCCRRVW	AMP
+SGISGPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW	AMP
+SIVPIRCRSNRDCRRFCGFRGGRCTYARQCLCGY	AMP
+SKGKKANKDVELARG	AMP
+SMLSVLKNLGKVGLGFVACKINKQC	AMP
+TSYGNGVHCNKSKCWIDVSELETYKAGTVSNPKDILW	AMP
+VDKGSYLPRPTPPRPIYNRN	AMP
+VDKPDYRPRPRPPNM	AMP
+VDKPDYRPRPWPRNMI	AMP
+VDKPDYRPRPWPRPN	AMP
+VDKPDYRPRPWPRPNM	AMP
+VLPIIGNLLNSLL	AMP
+VLPLISMALGKLL	AMP
+VNPIILGVLPKFVCLITKKC	AMP
+VRNFVTCRINRGFCVPIRCPGHRRQIGTCLGPQIKCCR	AMP
+VRNHVTCRINRGFCVPIRCPGRTRQIGTCFGPRIKCCRSW	AMP
+VTCDLLSFEAKGFAANHSLCAAHCLAIGRRGGSCERGVCICRR	AMP
+VTCDLLSIKGVAEHSACAANCLSMGKAGGRCENGICLCRKTTFKELWDKRF	AMP
+VTCFCKRPVCDSGETQIGYCRLGNTFYRLCCRQ	AMP
+WLGSALKIGAKLLPSVVGLFKKKKQ	AMP
+WNPFKELERAGQRVRDAIISAGPAVATVGQAAAIARG	AMP
+WNPFKELERAGQRVRDAVISAAPAVATVGQAAAIARG	AMP
+YDLSKNCRLRGGICYIGKCPRRFFRSGSCSRGNVCCLRFG	AMP
+YSKSLPLSVLNP	AMP
+YVSCLFRGARCRVYSGRSCCFGYYCRRDFPGSIFGTCSRRNF	AMP
--- a/PDAUG_Peptide_Data_Access/test-data/out.tsv	Wed Oct 28 02:18:30 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,413 +0,0 @@
-Peptide	Target
-AAGAATVLLVIVLLAGSYLAVLA	0
-LWIVIACLACVGSAAALTLRA	0
-FYRFYMLREGTAVPAVWFSIELIFGLFA	0
-GTLELGVDYGRAN	0
-KLFWRAVVAEFLATTLFVFISIGSALGFK	0
-HGSIGAGVDW	0
-ATIYLVCFCFFKQLAMIFMSVLAGNMYE	0
-GKLSLAATRSSE	0
-TSRLLLAGVALGIICSALMTWAIYF	0
-LGFFLVTFGFIWGMMLL	0
-YMLFTMIFVISSIIITVVVI	0
-ACFSAKVN	0
-LIWVAATLAGAIIAVLLVIYA	0
-HIPFAFAFAILAYLTLVLFRPVM	0
-CGLLVLLTLLLMGAIVTLGVF	0
-TGVYILVGVVLWTA	0
-LSMFIITPVMVLGTIFIFVMG	0
-MTLVALLVIGVSLTGYLGLKA	0
-QLYYQVLNFGMIVSSALMIW	0
-ALGALCLLLSVGSATACLLLGA	0
-LAVIFFFASALLYSQAAT	0
-MGHQLLVQLESIAITIVWSGVVAFIGYKLADLT	0
-ADILILSLLVIQCALGLLTIPFSA	0
-RHQQATFAGFIKGATWVSILSIAVLVFLALAN	0
-GTKWWTVGIRPMYKW	0
-PAIRVFATYAKWDEK	0
-PHVFLLFITFPILFIGWGSQS	0
-AGLVLWGAIIFVGWNALLLLFFW	0
-TQIVLLGLVTAALWAGLLTLLLLWHW	0
-SFIGRVFLFLMIVLPLWCGLHRMHHAMHD	0
-MYLGAGIALIPVIMSINYL	0
-FQTGFDFSD	0
-LISGTLGIICLSLMATLGILL	0
-FPFLNNLSFWFTVVGVILVNVSLG	0
-LDIYTRLGGMVWR	0
-FLIFLLVIMTVITVALLTLLF	0
-KDIGILYLFTAGIVGLISVCFTVYMRMELQH	0
-PLFYIINILVPCVLISFMINLVFYL	0
-HKALKTLGIIMGVFTLCWLPFFLVNIVNVF	0
-ANLKKLKTLMSAFLIVLGLLTFGA	0
-IWVGIFLLAALLAALFVCLKA	0
-KGAAGITGAGFITLAATLS	0
-VVLLAIVTLISVVQNGFFAHKV	0
-IAIDVLAVLVPLAAFLLDGSPDWSLYCAVWLLKPLRDS	0
-ALTVGTLLFLTGIGAASWAIV	0
-ITLIIFGVMAGVIGTILLISYGI	0
-WVWISLYYVAFYVVMSGIFAL	0
-GSNVALHVN	0
-GVLELGVDYGRAN	0
-IYLLILIILSMLCLVYASVPL	0
-LCSFLFVIVLFVATFYTL	0
-PLYFIVNVVIPCLLFSFLTGLVFYL	0
-ALLIAGGVGLLALAAALVLNA	0
-IWTWLRTTL	0
-PAHMIAISFFFTNALALALHGALVLSAA	0
-GGSLYIVGIFLPIWTFMIYIF	0
-GLTLGTGGR	0
-STVTGGYAQS	0
-DCFLLLVLLLYAPVGFCLLVL	0
-FWRAVVAEFLAMILFIFISIGSALG	0
-DPVLYQHILWFFGHPEVYIIILPGFGIISHVISTFAK	0
-LTFTEKWN	0
-IRDTLMRLVLTVRSN	0
-GKLSLAATRSTE	0
-GYNKFVVQYATDA	0
-QFHTMLMIAASGAVLIALGILCLVIQMYVSIR	0
-LILVLFVVLVSSVGVSLTLYA	0
-MASLWADYT	0
-DAWSGFVRGYGYDNRTN	0
-RWLWDFVNA	0
-MFVALLGLGLGQVVCSVALFF	0
-NMSTYVDYII	0
-IWVKMTFIVVLGLCFVFFWSF	0
-SLLISVVLVAYYLYIPLPDAI	0
-RKTTINGLIVAVILGVCFTGLQAYEYSHA	0
-IPVQLLWVNLVTDGLPATALG	0
-LAVIAGGVGFIMVAVLLVLFA	0
-MNKRNIMNLILAGGAGLPITTLALGYGAFF	0
-IALIVAGLSALGIATALVLNA	0
-NGNMWRILDHGAISL	0
-VAIICAAVVAVGLIVGLSVGL	0
-FSFLIVAGATTLFCLLHFGVI	0
-VGISIATIVAIIAAIYYVPW	0
-PLFAWSVFITAWLILLSLPVLAGAITMLLMDRNF	0
-LILIGAGLGVLALAAGLILTA	0
-LGLAAGAIYYYNTSNVFA	0
-QLGAGAFGGYQV	0
-IAIALLVILVVCSLITMSVIL	0
-LAFLIGGIIGGLLLLIGVSCCLW	0
-NGHMLRILDHGAISM	0
-MALILGIDRFMSECRALTNF	0
-AVIAGTTLAITALAVTS	0
-LYIVLAILCGVSIAVALALTA	0
-AAVTLGVLCLGLLVTVILLILQL	0
-LWLVIGVLTAAALAVTLIALA	0
-GTLLLLTAIGAASWAIVAVLL	0
-LLLVASVIQGLGLLLCFTYICLHFSAL	0
-AMIVALIVICITAVVAALV	0
-LVFIGTCGAVLAVALGLVLWA	0
-TRFGIAAKYQ	0
-IPWAVLIVVLITSLIIALIAL	0
-LWVVCAVLAGLGLTTALVLYA	0
-FAGRVLAGAVMMSGIGIFGLWAGIL	0
-LLIVLAGLAVVAVASGLILNA	0
-VGSIVGGIYLGFCFNAGAPAVEAFI	0
-MLSLGVSYR	0
-LTKWFFCCVCTILTMPFF	0
-LATIAASAIVLVVAVGLGLMA	0
-PIVVTGAVY	0
-KPHNLPMVFTGTAILYIGWFGFNAGSA	0
-DEFGEQLSKVISLICVAVWL	0
-HTLLTGVDF	0
-SVELIALLAISCTFFLFMHT	0
-LLIALLIYWTLAFITKTIKF	0
-FKLVIFVVLGIAIASGLMLYA	0
-CTLSISVLLAQTIFLFLIA	0
-FVIAGGTLAIPILAFVASFLL	0
-LLVITAIVLILSAAVGLVMYA	0
-LALATALIGGVAAIASLLLYA	0
-NPVIVIINLITLAAALLHTKTWFEL	0
-FIVVAGVVILAVTIALLVYFL	0
-IGLMCFLSIIITTVCIIMIAT	0
-FSVDTQLQS	0
-EVYILLNWIGYVNSGFNPLIYCRS	0
-LTLAVALIVGVSAIASLLLYA	0
-LYLAVVVLIGIGLTTTLVLYA	0
-MSTAISVLLAQAVFLLLT	0
-ALPGLMNKMEKAGCKRSVV	0
-KQFIRYLISSNVGEVVCIFL	0
-KYVVSSLVLVYGLIKVLTWIF	0
-TGVSPVFAGGVEYA	0
-IVIVLGILCFLLLLTVAVLVI	0
-KSLGILGILLGVAALCTIVALSVV	0
-LCLFVVTPVMVVGTAWIFL	0
-LYLAIVVLIGVALTATLMLYA	0
-LTILLAIAPVLALAVGLALYG	0
-FFVLLLMILILVNLAMTIWIL	0
-FIVLIPSVVITVIFLFFWLFM	0
-ILVLLILAVITIFALVCVLLV	0
-TYFIVLIPSVVITVIFLFFWLFM	0
-FLVLFIFLTSFFLNYSHTMVA	0
-GDKIGMFFQAMATFFGGFIIGF	0
-SNGVIVGTCLAFVAGMIGMAYAA	0
-FTFEGAARSDD	0
-MNYMVYFNFFACVLVPLLLMLGVYL	0
-AGLILLVVTLIGMSVLVRVLI	0
-VFLAVYLLGGITFLPLVLFTL	0
-LIKISALVFVTVAFFYLG	0
-GFFGVATFFFAALGIILIAWSAVL	0
-GQWEISVIWGLGVAMAIYLTA	0
-LWWIQAMTGFAMFFLGSVHLYIMMT	0
-IAVVITVVFLTLLSVVILIFF	0
-QVVATATFR	0
-MLLCFAFLWVLGIAYYMY	0
-IFCIIMLFALLGFL	0
-PLCICVAFTCLALVLVTSIVL	0
-LIIVLAIVVGVGAAVGLALSA	0
-ILVPCVLGLLLLPILAMLMALCV	0
-LGLLLAALICVGIATTLVLNA	0
-GYAAYYLVRKNFALAMPYLVE	0
-LPRTLAVLLVGAALAISGAVMQALF	0
-ILLFYVIFYGCLAGIFIGTIQ	0
-IMSTLLEVGYDNVKSQ	0
-LLAVALIIAMSISLAWQAAGW	0
-IVGQLLFVALGITFIYYLFTP	0
-NFWMFGLFFFFYFFIMGAYFPFFPIWL	0
-LVLIVGIVAAVGVAAALVLNA	0
-LTLAVALIGGVAAITSLLLYA	0
-ASGGIILIIAAILAMIMAN	0
-GSAGGAALAVVVLALAFGLSG	0
-LFVLLLLAILVVNLALTIWIL	0
-SPPLVLAALVACIIVLGFNYWIA	0
-LLFLILGIISFITFFLQGFTF	0
-LIVKALGILCFLLLITVAVLAV	0
-QYIHVAFQGSFACITVGLIVGAL	0
-QGIAVFGYSMAVSIGGILASR	0
-DHKRLGIMYIIVAIVMLLRGFADAIMMR	0
-IFRLHLVLGMTLFLLF	0
-LVSAIILTSFMTGLFILSLWK	0
-AVVGGVIAAVFITLITVVVLI	0
-INLGCDVDFD	0
-ALSALCLLLSVGSAAACLLLGA	0
-FTVIAGAVIVLLLTLNSNS	0
-LLFVSLLFCLIAQTCWLALV	0
-MVLVALLVIGVSLAGYLGLKA	0
-LVIPHILRLC	0
-LIRVLLGFVILFITYILFPSI	0
-PLFYIINILAPCVLIALMANLVFYL	0
-HKLGLGLEFQA	0
-ILFVAVSFIALGCVSAFVLFE	0
-AVVSAQIAITASPIS	0
-LTIIGGALFVLAVAAGLVLNA	0
-GCCGLLALALCSLALSLLA	0
-LIVLLAIVTIIAIALVAILP	0
-TEISAGWG	0
-FWRYFAGNLASGGAAGATSLCFVYPLDFARTRLAA	0
-IVAALGIIGLWMFFSSNELSIAT	0
-MTVILFVLLGISIASALVLYA	0
-SNATIAVACLSFFVCMIGAAYAS	0
-LKLTFDSSFS	0
-LNVLLSAAINFFLIAFAVYFLV	0
-HSAMLWTIGFIVTFSVGGMTGVLLAVPGADFV	0
-AVLSAKGQY	0
-LWELVIEQFEDLLVRILLLAA	0
-FFIVMGLVDAIPMIAVGLGLY	0
-LAVIAGGMGFIATAVLLVLFA	0
-ACYCRIPACLAGERRYGTCFYMGRVWAFCC	1
-AGRGKQGGKVRAKAKTRSSRAGLQFPVGRVHRLLRKGNY	1
-ALFSILRGLKKLGNMGQAFVNCKIYKKC	1
-ALSILKGLEKLAKMGIALTNCKATKKC	1
-ALWKDILKNVGKAAGKAVLNTVTDMVNQ	1
-ALWKNMLKGIGKLAGQAALGAVKTLVGAES	1
-ALWKTLLKNVGKAAGKAALNAVTDMVNQ	1
-ALWKTMLKKLGTMALHAGKAAFGAAADTISQ	1
-ALWKTMLKKLGTMALHAGKAALGAAADTISQGTQ	1
-APGNKAECEREKGYCGFLKCSFPFVVSGKCSRFFFCCKNIW	1
-ASIIKTTIKVSKAVCKTLTCICTGSCSNCK	1
-ATCDLLSGTGIKHSACAAHCLLRGNRGGYCNGRAICVCRN	1
-ATTGCSCPQCIIFDPICASSYKNGRRGFSSGCHMRCYNRCHGTDYFQISKGSKCI	1
-AYPGNGVHCGKYSCTVDKQTAIGNIGNNAA	1
-CANSCSYGPLTWSCDGNTK	1
-CRQSCSFGPLTFVCDGNTK	1
-DDTPSSRCGSGGWGPCLPIVDLLCIVHVTVGCSGGFGCCRIG	1
-DFASCHTNGGICLPNRCPGHMIQIGICFRPRVKCCRSW	1
-DFKDWMKTAGEWLKKKGPGILKAAMAAAT	1
-DGVKLCDVPSGTWSGHCGSSSKCSQQCKDREHFAYGGACHYQFPSVKCFCKRQC	1
-DKLIGSCVWGAVNYTSDCNGECKRRGYKGGHCGSFANVNCWCET	1
-ELCEKASKTWSGNCGNTGHCDNQCKSWEGAAHGACHVRNGKHMCFCYFNC	1
-FCKSLPLPLSVK	1
-FFGSLLSLGSKLLPSVFKLFQRKKE	1
-FFGSVLKLIPKIL	1
-FFGWLIKGAIHAGKAIHGLIHRRRH	1
-FFGWLIRGAIHAGKAIHGLIHRRRH	1
-FFPIVAGVAGQVLKKIYCTISKKC	1
-FGLPMLSILPKALCILLKRKC	1
-FIGLLISAGKAIHDLIRRRH	1
-FIGPIISALASLFG	1
-FKLGSFLKKAWKSKLAKKLRAKGKEMLKDYAKGLLEGGSEEVPGQ	1
-FLGGLMKAFPAIICAVTKKC	1
-FLGGLMKAFPALICAVTKKC	1
-FLNALKNFAKTAGKRLKSLLN	1
-FLPAIAGMAAKFLPKIFCAISKKC	1
-FLPAIAGVAAKFLPKIFCAISKKC	1
-FLPAIVGAAAKFLPKIFCVISKKC	1
-FLPAIVGAAGKFLPKIFCAISKKC	1
-FLPAIVGAAGQFLPKIFCAISKKC	1
-FLPAVLRVAAKIVPTVFCAISKKC	1
-FLPAVLRVAAKVVPTVFCLISKKC	1
-FLPAVLRVAAQVVPTVFCAISKKC	1
-FLPFIAGMAAKFLPKIFCAISKKC	1
-FLPFIAGMAANFLPKIFCAISKKC	1
-FLPFIAGVAAKFLPKIFCAISKKC	1
-FLPFLATLLSKVL	1
-FLPGLLAGLL	1
-FLPIASLLGKYL	1
-FLPIIAGVAAKVFPKIFCAISKKC	1
-FLPIIASVAAKVFPKIFCAISKKC	1
-FLPIIASVAAKVFSKIFCAISKKC	1
-FLPIIASVAANVFSKIFCAISKKC	1
-FLPILASLAAKFGPKLFCLVTKKC	1
-FLPILASLAAKLGPKLFCLVTKKC	1
-FLPILASLAATLGPKLLCLITKKC	1
-FLPLFASLIGKLL	1
-FLPLIGKVLSGIL	1
-FLPLIGRVLSGIL	1
-FLPLLAGLAANFFPKIFCKITRKC	1
-FLPLLAGLAANFLPKIFCKITRKC	1
-FLPLLAGLAANFLPTIICKISYKC	1
-FLPMLAGLAASMVPKFVCLITKKC	1
-FLPVVAGLAAKVLPSIICAVTKKC	1
-FMGGLIKAATKIVPAAYCAITKKC	1
-FSFKRLKGFAKKLWNSKLARKIRTKGLKYVKNFAKDMLSEGEEAPPAAEPPVEAPQ	1
-FVPYNPPRPGQSKPFPSFPGHGPFNPKIQWPYPLPNPGH	1
-GAIKDALKGAAKTVAVELLKKAQCKLEKTC	1
-GFFSLIKGVAKIATKGLAKNLGKMGLDLVGCKISKEC	1
-GFFSTVKNLATNVAGTVIDTLKCKVTGGCRS	1
-GFGALFKFLAKKVAKTVAKQAAKQGAKYVVNKQME	1
-GFISTVKNLATNVAGTVIDTIKCKVTGGC	1
-GFKGAFKNVMFGIAKSAGKSALNALACKIDKSC	1
-GFLDSFKNAMIGVAKSVGKTALSTLACKIDKSC	1
-GFLSILKKVLPKVMAHMK	1
-GFLSTVKNLATNVAGTVIDTLKCKVTGGCRS	1
-GFMKYIGPLIPHAVKAISDLI	1
-GFSSIFRGVAKFASKGLGKDLARLGVNLVACKISKQC	1
-GFVDLAKKVVGGIRNALGI	1
-GIFPKIIGKGIKTGIVNGIKSLVKGVGMKVFKAGLNNIGNTGCNEDEC	1
-GIFPKIIGKGIKTGIVNGIKSLVKGVGMKVFKAGLSNIGNTGCNEDEC	1
-GIFSKFGGKAIKNLFIKGAKNIGKEVGMDVIRTGIDVAGCKIKGEC	1
-GIFSKLAGKKLKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC	1
-GIFSKLGRKKIKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC	1
-GIFSLIKGAAQLIGKTVAKEAGKTGLELMACKVTKQC	1
-GIFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC	1
-GILDAIKAIAKAAG	1
-GILDFAKTVVGGIRNALGI	1
-GILSLFTGGIKALGKTLFKMAGKAGAEHLACKATNQC	1
-GILSSIKGVAKGVAKNVAAQLLDTLKCKITGC	1
-GIMDSVKGLAKNLAGKLLDSLKCKITGC	1
-GIMDTIKDTAKTVAVGLLNKLKCKITGC	1
-GINTLKKVIQGLHEVIKLVSNHA	1
-GINTLKKVIQGLHEVIKLVSNHE	1
-GIPCGESCVWIPCISAALGCSCKNKVCYRN	1
-GKLQAFLAKMKEIAAQTL	1
-GKVWDWIKSAAKKIWSSEPVSQLKGQVLNAAKNYVAEKIGATPT	1
-GLADFLNKAVGKVVDFVKS	1
-GLFDVVKGVLKGVGKNVAGSLLEQLKCKLSGGC	1
-GLFKVLGSVAKHLLPHVAPIIAEKL	1
-GLFLDTLKGAAKDVAGKLLEGLKCKIAGCKP	1
-GLFLDTLKGLAGKLLQGLKCIKAGCKP	1
-GLFSILRGAAKFASKGLGKDLTKLGVDLVACKISKQC	1
-GLFSKFNKKKIKSGLFKIIKTAGKEAGLEALRTGIDVIGCKIKGEC	1
-GLFSKFNKKKIKSGLIKIIKTAGKEAGLEALRTGIDVIGCKIKGEC	1
-GLFSVLGSVAKHLLPHVAPIIAEKL	1
-GLFSVLGSVAKHLLPHVVPVIAEKL	1
-GLFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC	1
-GLLDFVTGVGKDIFAQLIKQI	1
-GLLDSIKGMAISAGKGALQNLLKVASCKLDKTC	1
-GLLDSLKNLAINAAKGAGQSVLNTLSCKLSKTC	1
-GLLDTIKGVAKTVAASMLDKLKCKISGC	1
-GLLGGLLGPLLGGGGGGGGGLL	1
-GLLGPLLKIAAKVGSNLL	1
-GLLGSIFGAGKKIACALSGLC	1
-GLLGSLFGAGKKVACALSGLC	1
-GLLKRIKTLL	1
-GLLSGLKKVGKHVAKNVAVSLMDSLKCKISGDC	1
-GLLSKVLGVGKKVLCGVSGLC	1
-GLLSVLGSVAKHVLPHVVPVIAEHL	1
-GLMSSIGKALGGLIVDVLKPKTPAS	1
-GLNALKKVFQGIHEAIKLINNHVQ	1
-GLNTLKKVFQGLHEAIKLINNHVQ	1
-GLWNKIKEAASKAAGKAALGFVNEMV	1
-GLWSKIKAAGKEAAKAAAKAAGKAALNAVSEAV	1
-GLWSKIKEAAKTAGLMAMGFVNDMV	1
-GLWSTIKQKGKEAAIAAAKAAGQAALGAL	1
-GPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW	1
-GRLQAFLAKMKEIAAQTL	1
-GRPNPVNNKPTPHPRL	1
-GRPNPVNTKPTPYPRL	1
-GSGRGSCRSQCMRRHEDEPWRVQECVSQCRRRRGGGD	1
-GSKKPVPIIYCNRRTGKCQRM	1
-GVLDILKNAAKNILAHAAEQI	1
-GVVDILKGAGKDLLAHLVGKISEKV	1
-GWKDWAKKAGGWLKKKGPGMAKAALKAAMQ	1
-GWKDWLKKGKEWLKAKGPGIVKAALQAATQ	1
-GWKDWLNKGKEWLKKKGPGIMKAALKAATQ	1
-HGVSGHGQHGVHG	1
-IFGAILPLALGALKNLIK	1
-IIEKLVNTALGLLSGL	1
-IIGHLIKTALGMLGL	1
-ILGTILGLLKGL	1
-ILGTILGLLKSL	1
-ILPLVGNLLNDLL	1
-ILQKAVLDCLKAAGSSLSKAAITAIYNKIT	1
-INWKKIAEIGKQVLSAL	1
-INWKKIAEVGGKILSSL	1
-INWLKLGKAIIDAL	1
-IPRPLDPCIAQNGRCFTGICRYPYFWIGTCRNGKSCCRRR	1
-IWLTALKFLGKHAAKHLAKQQLSKL	1
-KDRPKKPGLCPPRPQKPCVKECKNDDSCPGQQKCCNYGCKDECRDPIFVG	1
-KTCEHLADTYRGVCFTNASCDDHCKNKAHLISGTCHNWKCFCTQNC	1
-KTCENLADTY	1
-KTCENLSGTFKGPCIPDGNCNKHCRNNEHLLSGRCRDDFRCWCTNRC	1
-KTCMTKKEGWGRCLIDTTCAHSCRKYGYMGGKCQGITRRCYCLLNC	1
-KWCFRVCYRGICYRKCR	1
-KWCFRVCYRGICYRRCR	1
-KYYGNGVSCNKKGCSVDWGKAIGIIGNNSAANLATGGAAGWSK	1
-LCNERPSQTWSGNCGNTAHCDKQCQDWEKASHGACHKRENHWKCFCYFNC	1
-LFCRKGTCHFGGCPAHLVKVGSCFGFRACCKWPWDV	1
-LLGRCKVKSNRFHGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC	1
-LLGRCKVKSNRFNGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC	1
-LLKELWTKIKGAGKAVLGKIKGLL	1
-LLKELWTKMKGAGKAVLGKIKGLL	1
-LLPILGNLLNGLL	1
-LLPNLLKSLL	1
-LMCTHPLDCSN	1
-LNLKGIFKKVASLLT	1
-LPVNEAQCRQVGGYCGLRICNFPSRFLGLCTRNHPCCSRVWV	1
-MSWLNFLKYIAKYGKKAVSAAWKYKGKVLEWLNVGPTLEWVWQKLKKIAGL	1
-QDKCKKVYENYPVSKCQLANQCNYDCKLDKHARSGECFYDEKRNLQCICDYCEY	1
-QGVRNHVTCRIYGGFCVPIRCPGRTRQIGTCFGRPVKCCRRW	1
-QKLCERPSGTWSGVCGNNNACKNQCINLEKARHGSCNYVFPAHKCICYFPC	1
-QQCGRQASGRLCGNRLCCSQWGYCGSTASYCGAGCQSQCRS	1
-QRFIHPTYRPPPQPRRPVIMRA	1
-RQRVEELSKFSKKGAAARRRK	1
-RSGRGECRRQCLRRHEGQPWETQECMRRCRRRG	1
-RSVCRQIKICRRRGGCYYKCTNRPY	1
-SAPRGCWTKSYPPKPCK	1
-SCTTCVCTCSCCTT	1
-SFGLCRLRRGFCARGRCRFPSIPIGRCSRFVQCCRRVW	1
-SGISGPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW	1
-SIVPIRCRSNRDCRRFCGFRGGRCTYARQCLCGY	1
-SKGKKANKDVELARG	1
-SMLSVLKNLGKVGLGFVACKINKQC	1
-TSYGNGVHCNKSKCWIDVSELETYKAGTVSNPKDILW	1
-VDKGSYLPRPTPPRPIYNRN	1
-VDKPDYRPRPRPPNM	1
-VDKPDYRPRPWPRNMI	1
-VDKPDYRPRPWPRPN	1
-VDKPDYRPRPWPRPNM	1
-VLPIIGNLLNSLL	1
-VLPLISMALGKLL	1
-VNPIILGVLPKFVCLITKKC	1
-VRNFVTCRINRGFCVPIRCPGHRRQIGTCLGPQIKCCR	1
-VRNHVTCRINRGFCVPIRCPGRTRQIGTCFGPRIKCCRSW	1
-VTCDLLSFEAKGFAANHSLCAAHCLAIGRRGGSCERGVCICRR	1
-VTCDLLSIKGVAEHSACAANCLSMGKAGGRCENGICLCRKTTFKELWDKRF	1
-VTCFCKRPVCDSGETQIGYCRLGNTFYRLCCRQ	1
-WLGSALKIGAKLLPSVVGLFKKKKQ	1
-WNPFKELERAGQRVRDAIISAGPAVATVGQAAAIARG	1
-WNPFKELERAGQRVRDAVISAAPAVATVGQAAAIARG	1
-YDLSKNCRLRGGICYIGKCPRRFFRSGSCSRGNVCCLRFG	1
-YSKSLPLSVLNP	1
-YVSCLFRGARCRVYSGRSCCFGYYCRRDFPGSIFGTCSRRNF	1
--- a/PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py	Wed Oct 28 02:18:30 2020 +0000
+++ b/PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py	Tue Dec 29 04:29:54 2020 +0000
@@ -14,17 +14,23 @@
 
         n = 0
         m = 0
+        
+        l = []
+
+        for line in lines[1:]:
+            l.append(line.split('\t')[1].strip('\n').strip('\r'))
+        l = list(set(l))
 
         for line in lines:
 
-            if '1' in line.split('\t')[1].strip('\n'):
+            if l[0] in line.split('\t')[1].strip('\n').strip('\r'):
                 n= n+1
-                of1.write('>peptide_'+str(n)+'\n')
+                of1.write('>peptide_'+str(n)+'_'+str(l[0])+'\n')
                 of1.write(line.split('\t')[0]+'\n')
 
-            if '0' in line.split('\t')[1].strip('\n'):
+            if l[1] in line.split('\t')[1].strip('\n').strip('\r'):
                 m= m+1
-                of2.write('>peptide_'+str(m)+'\n')
+                of2.write('>peptide_'+str(m)+'_'+str(l[1])+'\n')
                 of2.write(line.split('\t')[0]+'\n')
 
     elif Method == 'NoClassLabel':
@@ -47,11 +53,10 @@
     parser = argparse.ArgumentParser()
 
     parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv")
-    parser.add_argument("-P", "--Postvs", required=False, default='Positive.fasta', help="Path to target tsv file")
-    parser.add_argument("-N", "--Negtvs", required=False, default='Negative.fasta', help="Path to target tsv file")
+    parser.add_argument("-P", "--Postvs", required=False, default='FirstDataFile.fasta', help="Path to target tsv file")
+    parser.add_argument("-N", "--Negtvs", required=False, default='SecondDataFile.fasta', help="Path to target tsv file")
     parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file")
     parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file")
     args = parser.parse_args()
 
-    TSVtoFASTA(args.InFile, args.Method, args.Postvs, args.Negtvs, args.OutFile)
-
+    TSVtoFASTA(args.InFile, args.Method, args.Postvs, args.Negtvs, args.OutFile)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.md	Tue Dec 29 04:29:54 2020 +0000
@@ -0,0 +1,25 @@
+# PDAUG - a Galaxy based toolset for peptide library analysis, visualization, and machine learning modeling.
+
+### Overview 
+
+Peptide Design and Analysis Under Galaxy (PDAUG) package, a Galaxy based python powered collection of tools, workflows, and datasets for a rapid in-silico peptide library analysis. PDAUG offers tools for peptide library generation, data visualization, in-built and public database based peptide sequence retrieval, peptide feature calculation, and machine learning modeling. PDAUG tool suite can be downloaded and install through galaxy toolshed as a standard galaxy tool. 
+
+
+# Prebuild Docker Image 
+
+A prebuild build docker image based on the recent galaxy release can be obtained by the link below for a quick installation. 
+
+ - [Docker Image](https://github.com/jaidevjoshi83/docker_pdaug)
+
+# Contributors
+ - Jayadev Joshi
+ 
+ - Daniel Blankenberg
+
+# History
+
+ - 0.1.0: First release!
+
+# Support & Bug Reports
+
+You can file an [github issue](https://github.com/jaidevjoshi83/docker_pdaug/issues).