changeset 0:0c6cfb9906f3 draft default tip

Uploaded
author cbib
date Wed, 10 Nov 2021 15:15:50 +0000
parents
children
files fibronectin/NWalign_PAM30 fibronectin/NWalign_PAM30.f fibronectin/args.py fibronectin/args.pyc fibronectin/fibronectin.py fibronectin/fibronectin.xml fibronectin/fibronectin_macros.xml fibronectin/fibronectin_wrapper.py fibronectin/test-data/distri.png fibronectin/test-data/fibronectin_datatest.fasta fibronectin/test-data/fibronectin_report.html
diffstat 11 files changed, 3073 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file fibronectin/NWalign_PAM30 has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fibronectin/NWalign_PAM30.f	Wed Nov 10 15:15:50 2021 +0000
@@ -0,0 +1,1258 @@
+*************************************************************************
+*     This is a program for protein sequence alignment using the standard
+*     Needleman-Wunsch dynamic programming. The mutation matrix is from 
+*     PAM30 with gap openning penaly=-11 and gap extension panalty=-1. 
+*     The program can be freely copied and modified provided the notices 
+*     on the head are retained. Comments and bug report should be addressed 
+*     to Yang Zhang (Email: zhng@umich.edu). Last update is in 2010/08/03.
+*
+*     Instructions:
+*     1, the program can be compiled by 
+*        >gfortran -static -O3 -ffast-math -lm -o align align.f
+*     2, simply running the program will give a brief note on how to use it
+*     3, You can run the program in following convenient ways:
+*        >align F1.fasta F2.fasta (align two sequences in fasta file)
+*        >align F1.pdb F2.pdb 1   (align two sequences in PDB file)
+*        >align F1.fasta F2.pdb 2 (align Sequence 1 in fasta and 2 in pdb)
+*        >align GKDGL EVADELVSE 3 (align sequences typed by keyboard)
+*        >align GKDGL F.fasta 4   (align Seq-1 by keyboard and 2 in fasta)
+*        >align GKDGL F.pdb 5     (align Seq-1 by keyboard and 2 in pdb)
+*************************************************************************
+      
+      program compares
+      PARAMETER(ndim=6000)
+      parameter(naa=24) !number of amino acid
+      common/dpc/score(ndim,ndim),gap_open,gap_extn,j2i(ndim)
+     &     ,nseq1,nseq2
+      common/matra/imut(naa,naa)     !b,z,x are additional
+
+      integer seq1(ndim),seq2(ndim)
+      character*10000 fnam1,fnam2,fnam3,fnam4
+      character*10000 s
+      character*3 aa(naa),aanam
+      character seqw(naa),upper
+      character*100 du,ad
+      character sequenceA(ndim),sequenceB(ndim),sequenceM(ndim)
+
+*---------------------- 24 amino acids ---------------------
+      data aa/'ALA','ARG','ASN','ASP','CYS','GLN','GLU',
+     &     'GLY','HIS','ILE','LEU','LYS','MET','PHE','PRO','SER',
+     &     'THR','TRP','TYR','VAL','ASX','GLX','UNK','STOP'/
+      data seqw/'A','R','N','D','C','Q','E','G','H','I','L','K',
+     &     'M','F','P','S','T','W','Y','V','B','Z','X','*'/
+
+      call getarg(1,fnam1)
+      call getarg(2,fnam2)
+      call getarg(3,fnam3)
+      call getarg(4,fnam4)
+
+      if(fnam1.eq.' ')then
+         write(*,*)'align F1.fasta F2.fasta ',
+     &        '(align two sequences in fasta file)'
+         write(*,*)'align F1.pdb F2.pdb 1   ',
+     &        '(align two sequences in PDB file)'
+         write(*,*)'align F1.fasta F2.pdb 2 ',
+     &        '(align Sequence 1 in fasta and 2 in pdb)'
+         write(*,*)'align GKDGL EVADELVSE 3 ',
+     &        '(align two sequences typed by keyboard)'
+         write(*,*)'align GKDGL F.fasta 4   ',
+     &        '(align Sequence 1 by keyboard and 2 in fasta)'
+         write(*,*)'align GKDGL F.pdb 5     ',
+     &        '(align Sequence 1 by keyboard and 2 in pdb)'
+         goto 999
+      endif
+      
+*1******* read sequences ------------------------->
+      if(fnam3.eq.'5')then      !direct, 555555555555555555
+***   read sequence1:
+         i=0
+         do k=1,10000
+            fnam1(k:k)=upper(fnam1(k:k))
+            do j=1,naa
+               if(fnam1(k:k).eq.seqw(j))then
+                  i=i+1
+                  seq1(i)=j
+                  goto 5
+               endif
+            enddo
+            if(fnam1(k:k).ne.'-')goto 55 !same time
+ 5          continue
+            if(i.ge.ndim)goto 55
+         enddo
+ 55      continue
+         nseq1=i
+***   read sequence2:
+         open(unit=10,file=fnam2,status='old')
+         i=0
+         do while (.true.)
+            read(10,1,end=551) s
+            if(i.gt.0.and.s(1:3).eq.'TER')goto 551
+            if(s(1:3).eq.'ATO')then
+               if(s(13:16).eq.'CA  '.or.s(13:16).eq.' CA '.
+     &              or.s(13:16).eq.'  CA')then
+                  i=i+1
+                  read(s,111)du,aanam
+                  do j=1,naa
+                     if(aanam.eq.aa(j))seq2(i)=j
+                  enddo
+               endif
+            endif
+            if(i.ge.ndim)goto 551
+         enddo
+ 551     continue
+         close(10)
+         nseq2=i
+      elseif(fnam3.eq.'4')then  !direct, 444444444444444444444444444
+***   read sequence1:
+         i=0
+         do k=1,10000
+            fnam1(k:k)=upper(fnam1(k:k))
+            do j=1,naa
+               if(fnam1(k:k).eq.seqw(j))then
+                  i=i+1
+                  seq1(i)=j
+                  goto 4
+               endif
+            enddo
+            if(fnam1(k:k).ne.'-')goto 44
+ 4          continue
+            if(i.ge.ndim)goto 44
+         enddo
+ 44      continue
+         nseq1=i
+***   read sequence2:
+         open(unit=10,file=fnam2,status='old')
+         i=0
+         do while(.true.)
+            read(10,1,end=443)s
+            if(s(1:1).eq.'>')goto 442
+            do k=1,10000
+               s(k:k)=upper(s(k:k))
+               do j=1,naa
+                  if(s(k:k).eq.seqw(j))then
+                     i=i+1
+                     seq2(i)=j
+                     goto 441
+                  endif
+               enddo
+               if(s(k:k).ne.'-')goto 442 !same time
+ 441           continue
+            enddo
+ 442        continue
+            if(i.ge.ndim)goto 443
+         enddo
+ 443     continue
+         close(10)
+         nseq2=i
+
+
+      elseif(fnam3.eq.'3')then  !direct, 33333333333333333333333333333333333
+***   read sequence1:
+         i=0
+         do k=1,10000
+            fnam1(k:k)=upper(fnam1(k:k))
+            do j=1,naa
+               if(fnam1(k:k).eq.seqw(j))then
+                  i=i+1
+                  seq1(i)=j
+                  goto 3
+               endif
+            enddo
+            if(fnam1(k:k).ne.'-')goto 33
+ 3          continue
+            if(i.ge.ndim)goto 33
+         enddo
+ 33      continue
+         nseq1=i
+***   read sequence2:
+         i=0
+         do k=1,10000
+            fnam2(k:k)=upper(fnam2(k:k))
+            do j=1,naa
+               if(fnam2(k:k).eq.seqw(j))then
+                  i=i+1
+                  seq2(i)=j
+                  goto 331
+               endif
+            enddo
+            if(fnam2(k:k).ne.'-')goto 332
+ 331        continue
+            if(i.ge.ndim)goto 332
+         enddo
+ 332     continue
+         nseq2=i
+      elseif(fnam3.eq.'1')then  !pdb,pdb, 11111111111111111111111111111
+***   read sequence1:
+         open(unit=10,file=fnam1,status='old')
+         i=0
+         do while (.true.)
+            read(10,1,end=11) s
+            if(i.gt.0.and.s(1:3).eq.'TER')goto 11
+            if(s(1:3).eq.'ATO')then
+               if(s(13:16).eq.'CA  '.or.s(13:16).eq.' CA '.
+     &              or.s(13:16).eq.'  CA')then
+                  i=i+1
+                  read(s,111)du,aanam
+                  do j=1,naa
+                     if(aanam.eq.aa(j))seq1(i)=j
+                  enddo
+               endif
+            endif
+            if(i.ge.ndim)goto 11
+         enddo
+ 1       format(A10000)
+ 11      continue
+ 111     format(A17,A3)
+         close(10)
+         nseq1=i
+***   read sequence2:
+         open(unit=10,file=fnam2,status='old')
+         i=0
+         do while (.true.)
+            read(10,1,end=112) s
+            if(i.gt.0.and.s(1:3).eq.'TER')goto 112
+            if(s(1:3).eq.'ATO')then
+               if(s(13:16).eq.'CA  '.or.s(13:16).eq.' CA '.
+     &              or.s(13:16).eq.'  CA')then
+                  i=i+1
+                  read(s,111)du,aanam
+                  do j=1,naa
+                     if(aanam.eq.aa(j))seq2(i)=j
+                  enddo
+               endif
+            endif
+            if(i.ge.ndim)goto 112
+         enddo
+ 112     continue
+         close(10)
+         nseq2=i
+      elseif(fnam3.eq.'2')then  !seq,pdb 2222222222222222222222222222222
+***   read sequence1:
+         open(unit=10,file=fnam1,status='old')
+         i=0
+         do while(.true.)
+            read(10,1,end=221)s
+            if(s(1:1).eq.'>')goto 22
+            do k=1,10000
+               s(k:k)=upper(s(k:k))
+               do j=1,naa
+                  if(s(k:k).eq.seqw(j))then
+                     i=i+1
+                     seq1(i)=j
+                     goto 2
+                  endif
+               enddo
+               if(s(k:k).ne.'-')goto 22
+ 2             continue
+            enddo
+ 22         continue
+            if(i.ge.ndim)goto 221
+         enddo
+ 221     continue
+         close(10)
+         nseq1=i
+***   read sequence2:
+         open(unit=10,file=fnam2,status='old')
+         i=0
+         do while (.true.)
+            read(10,1,end=222) s
+            if(i.gt.0.and.s(1:3).eq.'TER')goto 222
+            if(s(1:3).eq.'ATO')then
+               if(s(13:16).eq.'CA  '.or.s(13:16).eq.' CA '.
+     &              or.s(13:16).eq.'  CA')then
+                  i=i+1
+                  read(s,111)du,aanam
+                  do j=1,naa
+                     if(aanam.eq.aa(j))seq2(i)=j
+                  enddo
+               endif
+            endif
+            if(i.ge.ndim)goto 222
+         enddo
+ 222     continue
+         close(10)
+         nseq2=i
+      else                      !seq,seq 00000000000000000000000000000000
+***   read sequence1:
+         open(unit=10,file=fnam1,status='old')
+         i=0
+         do while(.true.)
+            read(10,1,end=881)s
+            if(s(1:1).eq.'>')goto 88
+            do k=1,10000
+               s(k:k)=upper(s(k:k))
+               do j=1,naa
+                  if(s(k:k).eq.seqw(j))then
+                     i=i+1
+                     seq1(i)=j
+                     goto 8
+                  endif
+               enddo
+               if(s(k:k).ne.'-')goto 88
+ 8             continue
+            enddo
+ 88         continue
+            if(i.ge.ndim)goto 881
+         enddo
+ 881     continue
+         close(10)
+         nseq1=i
+***   read sequence2:
+         open(unit=10,file=fnam2,status='old')
+         i=0
+         do while(.true.)
+            read(10,1,end=884)s
+            if(s(1:1).eq.'>')goto 883
+            do k=1,10000
+               s(k:k)=upper(s(k:k))
+               do j=1,naa
+                  if(s(k:k).eq.seqw(j))then
+                     i=i+1
+                     seq2(i)=j
+                     goto 882
+                  endif
+               enddo
+               if(s(k:k).ne.'-')goto 883
+ 882           continue
+            enddo
+ 883        continue
+            if(i.ge.ndim)goto 884
+         enddo
+ 884     continue
+         close(10)
+         nseq2=i
+      endif
+      
+*2**   read mutation matrix ---------->
+      call matrix               !take pam
+***   set unit mutation matrix ---------->
+c      do i=1,naa
+c         do j=1,naa
+c            imut(i,j)=0
+c         enddo
+c      enddo
+c      do i=1,naa
+c         imut(i,i)=1
+c      enddo
+
+*3**   score------------------>
+      do i=1,nseq1
+         do j=1,nseq2
+            score(i,j)=imut(seq1(i),seq2(j))
+         enddo
+      enddo
+
+*4*****************************************************************
+*     dynamatic program:
+******************************************************************
+      gap_open=-11
+      gap_extn=-1
+      call DP(score0)           !W(k)=Go+Ge*k1+Go+Ge*k2, standard NW
+c      call DPalt(score0)        !W(k)=Go+Ge*k1+Ge*k2, alternative NW
+      
+*5**   calculate sequence identity---------------------------->
+      L_id=0
+      L_ali=0
+      do j=1,nseq2
+         if(j2i(j).gt.0)then
+            i=j2i(j)
+            L_ali=L_ali+1
+            if(seq1(i).eq.seq2(j))L_id=L_id+1
+         endif
+      enddo
+      
+      write(*,*)
+      write(*,101)nseq1,fnam1
+ 101  format('Length of sequence 1: ',I4,' ->',A10)
+      write(*,102)nseq2,fnam2
+ 102  format('Length of sequence 2: ',I4,' ->',A10)
+      write(*,103)L_ali
+ 103  format('Aligned length: ',I4)
+      write(*,104)L_id
+ 104  format('Identical length: ',I4)
+      write(*,105)float(L_id)/(nseq2+0.00000001),L_id,nseq2
+ 105  format('Sequence identity: ',F8.3,' (=',I4,'/',I4,')')
+      write(*,*)
+
+*6******************************************************************
+***   output aligned sequences
+      k=0                       !final aligned order
+      i=1                       !on sequence 1
+      j=1                       !on sequence 2
+ 800  continue
+      if(i.gt.nseq1.and.j.gt.nseq2)goto 802
+      if(i.gt.nseq1.and.j.le.nseq2)then !unaligned C on 1
+         k=k+1
+         sequenceA(k)='-'
+         sequenceB(k)=seqw(seq2(j))
+         sequenceM(k)=' '
+         j=j+1
+         goto 800
+      endif
+      if(i.le.nseq1.and.j.gt.nseq2)then !unaligned C on 2
+         k=k+1
+         sequenceA(k)=seqw(seq1(i))
+         sequenceB(k)='-'
+         sequenceM(k)=' '
+         i=i+1
+         goto 800
+      endif
+      if(i.eq.j2i(j))then    !if aligned
+         k=k+1
+         sequenceA(k)=seqw(seq1(i))
+         sequenceB(k)=seqw(seq2(j))
+         if(seq1(i).eq.seq2(j))then !identical
+            sequenceM(k)=':'
+         else
+            sequenceM(k)=' '
+         endif
+         i=i+1
+         j=j+1
+         goto 800
+      elseif(j2i(j).lt.0)then !if gap on 1
+         k=k+1
+         sequenceA(k)='-'
+         sequenceB(k)=seqw(seq2(j))
+         sequenceM(k)=' '
+         j=j+1
+         goto 800
+      elseif(j2i(j).gt.0)then !if gap on 2
+         k=k+1
+         sequenceA(k)=seqW(seq1(i))
+         sequenceB(k)='-'
+         sequenceM(k)=' '
+         i=i+1
+         goto 800
+      endif
+ 802  continue
+
+      write(*,601)(sequenceA(i),i=1,k)
+      write(*,601)(sequenceM(i),i=1,k)
+      write(*,601)(sequenceB(i),i=1,k)
+      write(*,602)(mod(i,10),i=1,k)
+ 601  format(2000A1)
+ 602  format(2000I1)
+      write(*,*)
+
+c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+c      STOP
+ 999  END
+      
+********************************************************************
+*     This is a standard Needleman-Wunsch dynamic program (by Y. Zhang 2005)
+*     1. Count multiple-gap.
+*     2. The gap penality W(k)=Go+Ge*k1+Go+Ge*k2 if gap open on both sequences
+*     
+*     Input: score(i,j), gap_open, gap_extn
+*     Output: j2i(j)
+*     idir(i,j)=1,2,3, from diagonal, horizontal, vertical
+*     val(i,j) is the cumulative score of (i,j)
+********************************************************************
+      subroutine DP(score0)
+      PARAMETER(ndim=6000)
+      common/dpc/score(ndim,ndim),gap_open,gap_extn,j2i(ndim)
+     &     ,nseq1,nseq2
+      
+      dimension val(0:ndim,0:ndim),idir(0:ndim,0:ndim)
+      dimension jpV(0:ndim,0:ndim),jpH(0:ndim,0:ndim)
+      dimension preV(0:ndim,0:ndim),preH(0:ndim,0:ndim)
+      real D,V,H
+      
+ccc   initializations --------------->
+      val(0,0)=0.0 
+      do i=1,nseq1
+         val(i,0)=gap_extn*i
+         preV(i,0)=val(i,0) !not use preV at the beginning
+         idir(i,0)=0            !useless
+         jpV(i,0)=1             !useless
+         jpH(i,0)=i             !useless
+      enddo
+      do j=1,nseq2
+         val(0,j)=gap_extn*j
+         preH(0,j)=val(0,j)
+         idir(0,j)=0
+         jpV(0,j)=j
+         jpH(0,j)=1
+      enddo
+      
+ccc   DP ------------------------------>
+      do 111 j=1,nseq2
+         do 222 i=1,nseq1
+ccc   D=VAL(i-1,j-1)+SCORE(i,j)--------------->
+            D=val(i-1,j-1)+score(i,j) !from diagonal, val(i,j) is val(i-1,j-1)
+ccc   H=H+gap_open ------->
+            jpH(i,j)=1
+            val1=val(i-1,j)+gap_open !gap_open from both D and V
+            val2=preH(i-1,j)+gap_extn !gap_extn from horizontal
+            if(val1.gt.val2) then !last step from D or V
+               H=val1
+            else                !last step from H
+               H=val2
+               if(i.gt.1)jpH(i,j)=jpH(i-1,j)+1 !record long-gap
+            endif
+ccc   V=V+gap_open --------->
+            jpV(i,j)=1
+            val1=val(i,j-1)+gap_open
+            val2=preV(i,j-1)+gap_extn
+            if(val1.gt.val2) then
+               V=val1
+            else
+               V=val2
+               if(j.gt.1)jpV(i,j)=jpV(i,j-1)+1
+            endif
+            preH(i,j)=H         !unaccepted H
+            preV(i,j)=V         !unaccepted V
+            
+            if(D.gt.H.and.D.gt.V)then
+               idir(i,j)=1
+               val(i,j)=D
+            elseif(H.gt.V)then
+               idir(i,j)=2
+               val(i,j)=H
+            else
+               idir(i,j)=3
+               val(i,j)=V
+            endif
+ 222     continue
+ 111  continue
+      score0=val(nseq1,nseq2)   !alignment score
+      
+c     tracing back the pathway:
+      do j=1,nseq2
+         j2i(j)=-1              !all are not aligned
+      enddo
+      i=nseq1
+      j=nseq2
+      do while(i.gt.0.and.j.gt.0)
+         if(idir(i,j).eq.1)then !from diagonal
+            j2i(j)=i
+            i=i-1
+            j=j-1
+         elseif(idir(i,j).eq.2)then !from horizonal
+            it=jpH(i,j)
+            do me=1,it
+               if(i.gt.0) then
+                  i=i-1                       
+               endif
+            enddo
+         else
+            it=jpV(i,j)
+            do me=1,it
+               if(j.gt.0) then
+                  j=j-1                       
+               endif
+            enddo
+         endif
+      enddo
+      
+*^^^^^^^^^^^DP finished ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^      
+      return
+      end
+      
+********************************************************************
+*     This is an alternative implementation of Needleman-Wunsch dynamic program 
+*     (by Y. Zhang 2005)
+*     1. Count two-layer iteration and multiple-gaps
+*     2. The gap penality W(k)=Go+Ge*k1+Ge*k2 if gap open on both sequences
+*     
+*     Input: score(i,j), gap_open, gap_extn
+*     Output: j2i(j)
+*     idir(i,j)=1,2,3, from diagonal, horizontal, vertical
+*     val(i,j) is the cumulative score of (i,j)
+********************************************************************
+      subroutine DPalt(score0)
+      PARAMETER(ndim=6000)
+      common/dpc/score(ndim,ndim),gap_open,gap_extn,j2i(ndim)
+     &     ,nseq1,nseq2
+      
+      dimension val(0:ndim,0:ndim),idir(0:ndim,0:ndim)
+      dimension preV(0:ndim,0:ndim),preH(0:ndim,0:ndim),
+     &     preD(0:ndim,0:ndim)
+      dimension idirH(0:ndim,0:ndim),idirV(0:ndim,0:ndim)
+      
+ccc   initializations --------------->
+      val(0,0)=0.0
+      do i=1,nseq1
+        val(i,0)=0
+        idir(i,0)=0
+        preD(i,0)=0.0
+        preH(i,0)=-1000.0
+        preV(i,0)=-1000.0
+      enddo
+      do j=1,nseq2
+        val(0,j)=0
+        idir(0,j)=0
+        preD(0,j)=0.0
+        preH(0,j)=-1000.0
+        preV(0,j)=-1000.0
+      enddo
+      
+ccc   DP ------------------------------>
+      do 111 j=1,nseq2
+         do 222 i=1,nseq1
+ccc   preD=VAL(i-1,j-1)+SCORE(i,j)--------------->
+            preD(i,j)=val(i-1,j-1)+score(i,j)
+ccc   preH: pre-accepted H----------------------->
+            D=preD(i-1,j)+gap_open
+            H=preH(i-1,j)+gap_extn
+            V=preV(i-1,j)+gap_extn
+            if(D.gt.H.and.D.gt.V)then
+               preH(i,j)=D
+               idirH(i-1,j)=1
+            elseif(H.gt.V)then
+               preH(i,j)=H
+               idirH(i-1,j)=2
+            else
+               preH(i,j)=V
+               idirH(i-1,j)=3
+            endif
+ccc   preV: pre-accepted V----------------------->
+            D=preD(i,j-1)+gap_open
+            H=preH(i,j-1)+gap_extn
+            V=preV(i,j-1)+gap_extn
+            if(D.gt.H.and.D.gt.V)then
+               preV(i,j)=D
+               idirV(i,j-1)=1
+            elseif(H.gt.V)then
+               preV(i,j)=H
+               idirV(i,j-1)=2
+            else
+               preV(i,j)=V
+               idirV(i,j-1)=3
+            endif
+            
+ccc   decide idir(i,j)----------->
+            if(preD(i,j).gt.preH(i,j).and.preD(i,j).gt.preV(i,j))then
+               idir(i,j)=1
+               val(i,j)=preD(i,j)
+            elseif(preH(i,j).gt.preV(i,j))then
+               idir(i,j)=2
+               val(i,j)=preH(i,j)
+            else
+               idir(i,j)=3
+               val(i,j)=preV(i,j)
+            endif
+ 222     continue
+ 111  continue
+      score0=val(nseq1,nseq2)   !alignment score
+      
+ccc   tracing back the pathway:
+      do j=1,nseq2
+        j2i(j)=-1              !all are not aligned
+      enddo
+      i=nseq1
+      j=nseq2
+      do while(i.gt.0.and.j.gt.0)
+         if(idir(i,j).eq.1)then !from diagonal
+            j2i(j)=i
+            i=i-1
+            j=j-1
+         elseif(idir(i,j).eq.2)then
+            i=i-1
+            idir(i,j)=idirH(i,j)
+         else
+            j=j-1
+            idir(i,j)=idirV(i,j)
+         endif
+      enddo
+      
+*^^^^^^^^^^^DP finished ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^      
+      return
+      end
+      
+********************************************************************
+*     read matrix
+*     
+      subroutine matrix
+      parameter(naa=24) !number of amino acid
+      common/matra/imut(naa,naa)     !b,z,x are additional
+
+*     following from PAM30:      
+      imut(1,1)=6
+      imut(1,2)=-7
+      imut(1,3)=-4
+      imut(1,4)=-3
+      imut(1,5)=-6
+      imut(1,6)=-4
+      imut(1,7)=-2
+      imut(1,8)=-2
+      imut(1,9)=-7
+      imut(1,10)=-5
+      imut(1,11)=-6
+      imut(1,12)=-7
+      imut(1,13)=-5
+      imut(1,14)=-8
+      imut(1,15)=-2
+      imut(1,16)=0
+      imut(1,17)=-1
+      imut(1,18)=-13
+      imut(1,19)=-8
+      imut(1,20)=-2
+      imut(1,21)=-3
+      imut(1,22)=-3
+      imut(1,23)=-1
+      imut(1,24)=-17
+      imut(2,1)=-7
+      imut(2,2)=8
+      imut(2,3)=-6
+      imut(2,4)=-10
+      imut(2,5)=-8
+      imut(2,6)=-2
+      imut(2,7)=-9
+      imut(2,8)=-9
+      imut(2,9)=-2
+      imut(2,10)=-5
+      imut(2,11)=-8
+      imut(2,12)=0
+      imut(2,13)=-4
+      imut(2,14)=-9
+      imut(2,15)=-4
+      imut(2,16)=-3
+      imut(2,17)=-6
+      imut(2,18)=-2
+      imut(2,19)=-10
+      imut(2,20)=-8
+      imut(2,21)=-7
+      imut(2,22)=-4
+      imut(2,23)=-1
+      imut(2,24)=-17
+      imut(3,1)=-4
+      imut(3,2)=-6
+      imut(3,3)=8
+      imut(3,4)=2
+      imut(3,5)=-11
+      imut(3,6)=-3
+      imut(3,7)=-2
+      imut(3,8)=-3
+      imut(3,9)=0
+      imut(3,10)=-5
+      imut(3,11)=-7
+      imut(3,12)=-1
+      imut(3,13)=-9
+      imut(3,14)=-9
+      imut(3,15)=-6
+      imut(3,16)=0
+      imut(3,17)=-2
+      imut(3,18)=-8
+      imut(3,19)=-4
+      imut(3,20)=-8
+      imut(3,21)=6
+      imut(3,22)=-3
+      imut(3,23)=-1
+      imut(3,24)=-17
+      imut(4,1)=-3
+      imut(4,2)=-10
+      imut(4,3)=2
+      imut(4,4)=8
+      imut(4,5)=-14
+      imut(4,6)=-2
+      imut(4,7)=2
+      imut(4,8)=-3
+      imut(4,9)=-4
+      imut(4,10)=-7
+      imut(4,11)=-12
+      imut(4,12)=-4
+      imut(4,13)=-11
+      imut(4,14)=-15
+      imut(4,15)=-8
+      imut(4,16)=-4
+      imut(4,17)=-5
+      imut(4,18)=-15
+      imut(4,19)=-11
+      imut(4,20)=-8
+      imut(4,21)=6
+      imut(4,22)=1
+      imut(4,23)=-1
+      imut(4,24)=-17
+      imut(5,1)=-6
+      imut(5,2)=-8
+      imut(5,3)=-11
+      imut(5,4)=-14
+      imut(5,5)=10
+      imut(5,6)=-14
+      imut(5,7)=-14
+      imut(5,8)=-9
+      imut(5,9)=-7
+      imut(5,10)=-6
+      imut(5,11)=-15
+      imut(5,12)=-14
+      imut(5,13)=-13
+      imut(5,14)=-13
+      imut(5,15)=-8
+      imut(5,16)=-3
+      imut(5,17)=-8
+      imut(5,18)=-15
+      imut(5,19)=-4
+      imut(5,20)=-6
+      imut(5,21)=-12
+      imut(5,22)=-14
+      imut(5,23)=-1
+      imut(5,24)=-17
+      imut(6,1)=-4
+      imut(6,2)=-2
+      imut(6,3)=-3
+      imut(6,4)=-2
+      imut(6,5)=-14
+      imut(6,6)=8
+      imut(6,7)=1
+      imut(6,8)=-7
+      imut(6,9)=1
+      imut(6,10)=-8
+      imut(6,11)=-5
+      imut(6,12)=-3
+      imut(6,13)=-4
+      imut(6,14)=-13
+      imut(6,15)=-3
+      imut(6,16)=-5
+      imut(6,17)=-5
+      imut(6,18)=-13
+      imut(6,19)=-12
+      imut(6,20)=-7
+      imut(6,21)=-3
+      imut(6,22)=6
+      imut(6,23)=-1
+      imut(6,24)=-17
+      imut(7,1)=-2
+      imut(7,2)=-9
+      imut(7,3)=-2
+      imut(7,4)=2
+      imut(7,5)=-14
+      imut(7,6)=1
+      imut(7,7)=8
+      imut(7,8)=-4
+      imut(7,9)=-5
+      imut(7,10)=-5
+      imut(7,11)=-9
+      imut(7,12)=-4
+      imut(7,13)=-7
+      imut(7,14)=-14
+      imut(7,15)=-5
+      imut(7,16)=-4
+      imut(7,17)=-6
+      imut(7,18)=-17
+      imut(7,19)=-8
+      imut(7,20)=-6
+      imut(7,21)=1
+      imut(7,22)=6
+      imut(7,23)=-1
+      imut(7,24)=-17
+      imut(8,1)=-2
+      imut(8,2)=-9
+      imut(8,3)=-3
+      imut(8,4)=-3
+      imut(8,5)=-9
+      imut(8,6)=-7
+      imut(8,7)=-4
+      imut(8,8)=6
+      imut(8,9)=-9
+      imut(8,10)=-11
+      imut(8,11)=-10
+      imut(8,12)=-7
+      imut(8,13)=-8
+      imut(8,14)=-9
+      imut(8,15)=-6
+      imut(8,16)=-2
+      imut(8,17)=-6
+      imut(8,18)=-15
+      imut(8,19)=-14
+      imut(8,20)=-5
+      imut(8,21)=-3
+      imut(8,22)=-5
+      imut(8,23)=-1
+      imut(8,24)=-17
+      imut(9,1)=-7
+      imut(9,2)=-2
+      imut(9,3)=0
+      imut(9,4)=-4
+      imut(9,5)=-7
+      imut(9,6)=1
+      imut(9,7)=-5
+      imut(9,8)=-9
+      imut(9,9)=9
+      imut(9,10)=-9
+      imut(9,11)=-6
+      imut(9,12)=-6
+      imut(9,13)=-10
+      imut(9,14)=-6
+      imut(9,15)=-4
+      imut(9,16)=-6
+      imut(9,17)=-7
+      imut(9,18)=-7
+      imut(9,19)=-3
+      imut(9,20)=-6
+      imut(9,21)=-1
+      imut(9,22)=-1
+      imut(9,23)=-1
+      imut(9,24)=-17
+      imut(10,1)=-5
+      imut(10,2)=-5
+      imut(10,3)=-5
+      imut(10,4)=-7
+      imut(10,5)=-6
+      imut(10,6)=-8
+      imut(10,7)=-5
+      imut(10,8)=-11
+      imut(10,9)=-9
+      imut(10,10)=8
+      imut(10,11)=-1
+      imut(10,12)=-6
+      imut(10,13)=-1
+      imut(10,14)=-2
+      imut(10,15)=-8
+      imut(10,16)=-7
+      imut(10,17)=-2
+      imut(10,18)=-14
+      imut(10,19)=-6
+      imut(10,20)=2
+      imut(10,21)=-6
+      imut(10,22)=-6
+      imut(10,23)=-1
+      imut(10,24)=-17
+      imut(11,1)=-6
+      imut(11,2)=-8
+      imut(11,3)=-7
+      imut(11,4)=-12
+      imut(11,5)=-15
+      imut(11,6)=-5
+      imut(11,7)=-9
+      imut(11,8)=-10
+      imut(11,9)=-6
+      imut(11,10)=-1
+      imut(11,11)=7
+      imut(11,12)=-8
+      imut(11,13)=1
+      imut(11,14)=-3
+      imut(11,15)=-7
+      imut(11,16)=-8
+      imut(11,17)=-7
+      imut(11,18)=-6
+      imut(11,19)=-7
+      imut(11,20)=-2
+      imut(11,21)=-9
+      imut(11,22)=-7
+      imut(11,23)=-1
+      imut(11,24)=-17
+      imut(12,1)=-7
+      imut(12,2)=0
+      imut(12,3)=-1
+      imut(12,4)=-4
+      imut(12,5)=-14
+      imut(12,6)=-3
+      imut(12,7)=-4
+      imut(12,8)=-7
+      imut(12,9)=-6
+      imut(12,10)=-6
+      imut(12,11)=-8
+      imut(12,12)=7
+      imut(12,13)=-2
+      imut(12,14)=-14
+      imut(12,15)=-6
+      imut(12,16)=-4
+      imut(12,17)=-3
+      imut(12,18)=-12
+      imut(12,19)=-9
+      imut(12,20)=-9
+      imut(12,21)=-2
+      imut(12,22)=-4
+      imut(12,23)=-1
+      imut(12,24)=-17
+      imut(13,1)=-5
+      imut(13,2)=-4
+      imut(13,3)=-9
+      imut(13,4)=-11
+      imut(13,5)=-13
+      imut(13,6)=-4
+      imut(13,7)=-7
+      imut(13,8)=-8
+      imut(13,9)=-10
+      imut(13,10)=-1
+      imut(13,11)=1
+      imut(13,12)=-2
+      imut(13,13)=11
+      imut(13,14)=-4
+      imut(13,15)=-8
+      imut(13,16)=-5
+      imut(13,17)=-4
+      imut(13,18)=-13
+      imut(13,19)=-11
+      imut(13,20)=-1
+      imut(13,21)=-10
+      imut(13,22)=-5
+      imut(13,23)=-1
+      imut(13,24)=-17
+      imut(14,1)=-8
+      imut(14,2)=-9
+      imut(14,3)=-9
+      imut(14,4)=-15
+      imut(14,5)=-13
+      imut(14,6)=-13
+      imut(14,7)=-14
+      imut(14,8)=-9
+      imut(14,9)=-6
+      imut(14,10)=-2
+      imut(14,11)=-3
+      imut(14,12)=-14
+      imut(14,13)=-4
+      imut(14,14)=9
+      imut(14,15)=-10
+      imut(14,16)=-6
+      imut(14,17)=-9
+      imut(14,18)=-4
+      imut(14,19)=2
+      imut(14,20)=-8
+      imut(14,21)=-10
+      imut(14,22)=-13
+      imut(14,23)=-1
+      imut(14,24)=-17
+      imut(15,1)=-2
+      imut(15,2)=-4
+      imut(15,3)=-6
+      imut(15,4)=-8
+      imut(15,5)=-8
+      imut(15,6)=-3
+      imut(15,7)=-5
+      imut(15,8)=-6
+      imut(15,9)=-4
+      imut(15,10)=-8
+      imut(15,11)=-7
+      imut(15,12)=-6
+      imut(15,13)=-8
+      imut(15,14)=-10
+      imut(15,15)=8
+      imut(15,16)=-2
+      imut(15,17)=-4
+      imut(15,18)=-14
+      imut(15,19)=-13
+      imut(15,20)=-6
+      imut(15,21)=-7
+      imut(15,22)=-4
+      imut(15,23)=-1
+      imut(15,24)=-17
+      imut(16,1)=0
+      imut(16,2)=-3
+      imut(16,3)=0
+      imut(16,4)=-4
+      imut(16,5)=-3
+      imut(16,6)=-5
+      imut(16,7)=-4
+      imut(16,8)=-2
+      imut(16,9)=-6
+      imut(16,10)=-7
+      imut(16,11)=-8
+      imut(16,12)=-4
+      imut(16,13)=-5
+      imut(16,14)=-6
+      imut(16,15)=-2
+      imut(16,16)=6
+      imut(16,17)=0
+      imut(16,18)=-5
+      imut(16,19)=-7
+      imut(16,20)=-6
+      imut(16,21)=-1
+      imut(16,22)=-5
+      imut(16,23)=-1
+      imut(16,24)=-17
+      imut(17,1)=-1
+      imut(17,2)=-6
+      imut(17,3)=-2
+      imut(17,4)=-5
+      imut(17,5)=-8
+      imut(17,6)=-5
+      imut(17,7)=-6
+      imut(17,8)=-6
+      imut(17,9)=-7
+      imut(17,10)=-2
+      imut(17,11)=-7
+      imut(17,12)=-3
+      imut(17,13)=-4
+      imut(17,14)=-9
+      imut(17,15)=-4
+      imut(17,16)=0
+      imut(17,17)=7
+      imut(17,18)=-13
+      imut(17,19)=-6
+      imut(17,20)=-3
+      imut(17,21)=-3
+      imut(17,22)=-6
+      imut(17,23)=-1
+      imut(17,24)=-17
+      imut(18,1)=-13
+      imut(18,2)=-2
+      imut(18,3)=-8
+      imut(18,4)=-15
+      imut(18,5)=-15
+      imut(18,6)=-13
+      imut(18,7)=-17
+      imut(18,8)=-15
+      imut(18,9)=-7
+      imut(18,10)=-14
+      imut(18,11)=-6
+      imut(18,12)=-12
+      imut(18,13)=-13
+      imut(18,14)=-4
+      imut(18,15)=-14
+      imut(18,16)=-5
+      imut(18,17)=-13
+      imut(18,18)=13
+      imut(18,19)=-5
+      imut(18,20)=-15
+      imut(18,21)=-10
+      imut(18,22)=-14
+      imut(18,23)=-1
+      imut(18,24)=-17
+      imut(19,1)=-8
+      imut(19,2)=-10
+      imut(19,3)=-4
+      imut(19,4)=-11
+      imut(19,5)=-4
+      imut(19,6)=-12
+      imut(19,7)=-8
+      imut(19,8)=-14
+      imut(19,9)=-3
+      imut(19,10)=-6
+      imut(19,11)=-7
+      imut(19,12)=-9
+      imut(19,13)=-11
+      imut(19,14)=2
+      imut(19,15)=-13
+      imut(19,16)=-7
+      imut(19,17)=-6
+      imut(19,18)=-5
+      imut(19,19)=10
+      imut(19,20)=-7
+      imut(19,21)=-6
+      imut(19,22)=-9
+      imut(19,23)=-1
+      imut(19,24)=-17
+      imut(20,1)=-2
+      imut(20,2)=-8
+      imut(20,3)=-8
+      imut(20,4)=-8
+      imut(20,5)=-6
+      imut(20,6)=-7
+      imut(20,7)=-6
+      imut(20,8)=-5
+      imut(20,9)=-6
+      imut(20,10)=2
+      imut(20,11)=-2
+      imut(20,12)=-9
+      imut(20,13)=-1
+      imut(20,14)=-8
+      imut(20,15)=-6
+      imut(20,16)=-6
+      imut(20,17)=-3
+      imut(20,18)=-15
+      imut(20,19)=-7
+      imut(20,20)=7
+      imut(20,21)=-8
+      imut(20,22)=-6
+      imut(20,23)=-1
+      imut(20,24)=-17
+      imut(21,1)=-3
+      imut(21,2)=-7
+      imut(21,3)=6
+      imut(21,4)=6
+      imut(21,5)=-12
+      imut(21,6)=-3
+      imut(21,7)=1
+      imut(21,8)=-3
+      imut(21,9)=-1
+      imut(21,10)=-6
+      imut(21,11)=-9
+      imut(21,12)=-2
+      imut(21,13)=-10
+      imut(21,14)=-10
+      imut(21,15)=-7
+      imut(21,16)=-1
+      imut(21,17)=-3
+      imut(21,18)=-10
+      imut(21,19)=-6
+      imut(21,20)=-8
+      imut(21,21)=6
+      imut(21,22)=0
+      imut(21,23)=-1
+      imut(21,24)=-17
+      imut(22,1)=-3
+      imut(22,2)=-4
+      imut(22,3)=-3
+      imut(22,4)=1
+      imut(22,5)=-14
+      imut(22,6)=6
+      imut(22,7)=6
+      imut(22,8)=-5
+      imut(22,9)=-1
+      imut(22,10)=-6
+      imut(22,11)=-7
+      imut(22,12)=-4
+      imut(22,13)=-5
+      imut(22,14)=-13
+      imut(22,15)=-4
+      imut(22,16)=-5
+      imut(22,17)=-6
+      imut(22,18)=-14
+      imut(22,19)=-9
+      imut(22,20)=-6
+      imut(22,21)=0
+      imut(22,22)=6
+      imut(22,23)=-1
+      imut(22,24)=-17
+      imut(23,1)=-1
+      imut(23,2)=-1
+      imut(23,3)=-1
+      imut(23,4)=-1
+      imut(23,5)=-1
+      imut(23,6)=-1
+      imut(23,7)=-1
+      imut(23,8)=-1
+      imut(23,9)=-1
+      imut(23,10)=-1
+      imut(23,11)=-1
+      imut(23,12)=-1
+      imut(23,13)=-1
+      imut(23,14)=-1
+      imut(23,15)=-1
+      imut(23,16)=-1
+      imut(23,17)=-1
+      imut(23,18)=-1
+      imut(23,19)=-1
+      imut(23,20)=-1
+      imut(23,21)=-1
+      imut(23,22)=-1
+      imut(23,23)=-1
+      imut(23,24)=-17
+      imut(24,1)=-17
+      imut(24,2)=-17
+      imut(24,3)=-17
+      imut(24,4)=-17
+      imut(24,5)=-17
+      imut(24,6)=-17
+      imut(24,7)=-17
+      imut(24,8)=-17
+      imut(24,9)=-17
+      imut(24,10)=-17
+      imut(24,11)=-17
+      imut(24,12)=-17
+      imut(24,13)=-17
+      imut(24,14)=-17
+      imut(24,15)=-17
+      imut(24,16)=-17
+      imut(24,17)=-17
+      imut(24,18)=-17
+      imut(24,19)=-17
+      imut(24,20)=-17
+      imut(24,21)=-17
+      imut(24,22)=-17
+      imut(24,23)=-17
+      imut(24,24)=1
+      
+      return
+      end
+      
+      function upper(A)
+      CHARACTER A,upper
+      IF(A.LE.'z'.and.A.GE.'a')then
+         A=CHAR(ICHAR(A)-32)
+      endif
+      upper=A
+      RETURN
+      END
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fibronectin/args.py	Wed Nov 10 15:15:50 2021 +0000
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+import sys, getopt, os
+
+
+class Args:
+
+    def __init__(self):
+        """
+        Instanciate Files object
+        """
+        self.input = None
+        self.output_dir = None
+        self.pattern = None
+        self.site_res_5 = None
+        self.site_res_3 = None
+        self.getargs()
+
+    def usage(self, info):
+        text = None
+        text = "Fibronectin script.\n\n"
+        if (info): text += info
+        temp = "Option\t\t\t\tfile\t\t\tDescription\n"
+        text += temp
+        text += '-' * (len(temp) + 60)
+        text += '\n'
+        text += "-i, --input\t\t\tfile.fasta\t\tFasta file that contains the DNA sequences\n"
+        text += "-o, --output_dir\t\t/path/for/output\tDirectory where output files will be written\n"
+        text += "-p, --pattern\t\t\tstring\t\t\tPattern of the sequence bank\n"
+        text += "-5, --restriction-site-5\tstring\t\t\tSequence of the restriction site in 5'\n"
+        text += "-3, --restriction-site-3\tstring\t\t\tSequence of the restriction site in 3'\n"
+        return text
+
+    def case(self):
+        # Test des fichiers et repertoires
+        if not self.input:
+            sys.exit(self.usage("input (-i,--input) : \"%s\" must be indicated\n" % (self.input)))
+        if not self.output_dir:
+            sys.exit(self.usage("output directory (-o,--output_dir) : \"%s\" must be indicated\n" % (self.output_dir)))
+        if not self.pattern:
+            sys.exit(
+                self.usage("Pattern of the sequence bank (-p,--pattern) : \"%s\" must be indicated\n" % (self.pattern)))
+        if not self.site_res_5:
+            sys.exit(self.usage(
+                "Sequence of the restriction site in 5' (-5,--restriction-site-5) : \"%s\" must be indicated\n" % (
+                    self.site_res_5)))
+        if not self.site_res_3:
+            sys.exit(self.usage(
+                "Sequence of the restriction site in 3' (-3,--restriction-site-3) : \"%s\" must be indicated\n" % (
+                    self.site_res_3)))
+
+    def data_format(self):
+        """
+        Check if information are correct
+        """
+        # Run without arguments
+        if len(sys.argv) == 1:
+            sys.exit(self.usage(None))
+            # Test input file argument
+        if self.input:
+            if not os.path.isfile(self.input):
+                print(self.input)
+                print(os.path.isfile(self.input))
+                #sys.exit(self.usage("Error with \"%s\" : -i required an input file\n" % self.multilist))
+
+                # Determine les fichiers fournis en arguments
+
+    def getargs(self):
+        """
+        Determine the files provided as arguments
+        @return: Choosen options
+        """
+        # Sans argument
+        if len(sys.argv) <= 1: sys.exit("Do './fibronectin.py -h' for a usage summary")
+        # test des option
+        try:
+            (opts, args) = getopt.getopt(sys.argv[1:], "i:o:p:5:3:h",
+                                         ["input=", "output_dir=", "pattern=", "site_res_5=", "site_res_3="])
+        except getopt.GetoptError as err:
+            # print help information and exit:
+            print(str(err))  # will print something like "option -a not recognized"
+            sys.exit(self.usage(None))
+        # Identification of options
+        for (o, a) in opts:
+            if o in ("-i", "--input"):
+                self.input = a
+            elif o in ("-o", "--output_dir"):
+                self.output_dir = a
+            elif o in ("-p", "--pattern"):
+                self.pattern = a
+            elif o in ("-5", "--restriction-site-5"):
+                self.site_res_5 = a
+            elif o in ("-3", "--restriction-site-3"):
+                self.site_res_3 = a
+            elif o in ("-h", "--help"):
+                sys.exit(self.usage(None))
+            else:
+                assert False, "unhandled option"
+            # Verification of cases
+        self.case()
+        self.data_format()
Binary file fibronectin/args.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fibronectin/fibronectin.py	Wed Nov 10 15:15:50 2021 +0000
@@ -0,0 +1,424 @@
+#!/usr/bin/env python
+# title           :fibronectin.py
+# description     :This script will analyze fasta files, look for restriction sites, cut the sequences around the restriction sites, translate the nucleic sequences into amino acids sequences.
+# author          :Fabienne Wong Jun Tai
+# date            :20121107
+# version         :1.0
+# usage           :python fibronectin.py -i file.fasta -o /output/dir/ -p pattern -5 seq_restric_5'-3 seq_restric_3'
+# notes           :
+# python_version  :3.7.11
+# biopython_max_version  :1.72
+# ==============================================================================
+import math
+import matplotlib
+import numpy
+import re
+import subprocess
+import matplotlib.pyplot as plot
+from args import *
+from Bio import SeqIO, Seq
+from Bio.SubsMat import MatrixInfo as matlist
+from Bio import pairwise2
+from Bio.pairwise2 import format_alignment
+matplotlib.use('Agg')
+
+args = Args()
+print(sys.path[0])
+# Variables initialization
+fibronectin_script_dir = sys.path[0]
+print(fibronectin_script_dir)
+directory = args.output_dir
+mcl_file = directory + "mcl.in"
+mcl_output = directory + "mcl.out"
+html_file = directory + "fibronectin_report.html"
+graph_pic = directory + "distri.png"
+input_file = os.path.basename(args.input)
+site_res_5 = args.site_res_5
+site_res_3 = args.site_res_3
+tag = {'mut': [], 'ok_stop_ext': [], 'stop': [], 'no_restric': [], 'no_multiple': [], 'amber': []}
+all_seq = []
+all_seq_fasta = {}  # dictionnary that will store information about all the sequences
+good_seq = {}  # dictionnary that will store information about the valid sequences
+identical_clones = {}
+var_seq_common = {}  # dictionnary that will store the number of sequences that share the same variable parts
+align_scores = []
+nb_var_part = 0
+
+
+def reverse_complement(seq):
+    # Generate the reverse complement
+    complement_nuc = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'}
+    rev_com = ""
+    for n in (seq[::-1]):
+        rev_com += complement_nuc[n]
+    return rev_com
+
+
+def generate_aln(seq_dic, ids):
+    # Multiple Sequence Alignment via ClustalO
+    input = ''
+    for k in ids:
+        input += '>%s\n%s\n' % (k, re.sub("(.{80})", "\\1\n", seq_dic[k]['prot'], re.DOTALL))
+    p = subprocess.Popen("clustalo -i - --outfmt clu", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True)
+    aln_out, aln_err = p.communicate(input=input)
+    print(type(aln_out))
+    return aln_out
+
+
+def report_html(html_file, tag, all_seq, good_seq, all_seq_fasta, identical_clones, nb_var_part, var_seq_common, align_scores, args):
+    # Generate the html file for the report
+    all_seq.sort()
+    no_restric = tag['no_restric']
+    no_restric.sort()
+    no_multiple = tag['no_multiple']
+    no_multiple.sort()
+    stop = tag['stop']
+    stop.sort()
+    amber = tag['amber']
+    amber.sort()
+    mut = tag['mut']
+    mut.sort()
+    # good_ids = good_seq.keys()
+
+    good_seq = dict(sorted(good_seq.items()))
+    good_ids = good_seq.keys()
+
+    # good_ids.sort()
+
+    w = open(html_file, 'w')
+    w.write(
+        '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN""http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>Fibronectin Report</title><link href="http://twitter.github.com/bootstrap/assets/css/bootstrap.css" rel="stylesheet" /><style type="text/css">body {padding-top: 40px;}.subhead {padding: 40px 0;}.subhead h1 {font-size: 60px;}.fasta {   font-family: Monaco, Menlo, Consolas, "Courier New", monospace;   font-size: 12px;}code.grey{color: #636D71;}</style></head><body><a id="top"></a><div class="navbar navbar-fixed-top"><div class="navbar-inner"><div class="container"><a class="brand" href="#top">Fibronectin Report</a><div class="nav-collapse collapse"><ul class="nav"><li><a href="#input">Input data</a></li><li><a href="#analysis">Sequences analysis</a></li><li><a href="#variable">Variable regions analysis</a></li><li><a href="#cluster">Clustering</a></li><li><a href="#stat">Statistics</a></li><li><a href="#annex">Annex</a></li></ul></div></div></div></div><div class="container-fluid"><header class="subhead"><h1>Fibronectin Report</h1></header><div class="page-header"><a id="input"></a><h2>Input data</h2></div>')
+    # Input data
+    w.write(
+        '<p>Input file:<br/><code class="grey">%s</code></p><p>Number of sequences in input file:<br/><code class="grey">%d</code></p><p>Pattern of the sequence bank:<br/><code class="grey">%s</code></p><p>5\' restriction site:<br/><code class="grey">%s</code></p><p>3\' restriction site:<br/><code class="grey">%s</code></p>' % (
+            input_file, len(all_seq), args.pattern, args.site_res_5, args.site_res_3))
+    # Sequence analysis
+    w.write(
+        '<div class="page-header"><a id="analysis"></a><h2>Sequences analysis</h2></div><p>Caption:</p><ul><li class="text-success">Valid sequences that will be part of the next analysis </li><li class="text-warning">Good sequences but will not be part of the next analysis</li><li class="text-error">Rejected sequences</li></ul><table class="table table-striped table-bordered"><tr><th class="text-error">Absence of restriction sites</th><th class="text-error">Incorrect number of nucleotides between the restriction sites</th><th class="text-error">Stop codon <u>inside</u> the area of interest</th><th class="text-warning">Mutation in the conserved regions</th><th class="text-success">Valid sequences</th><th>Amber codon in the sequence (<u>inside</u> the area of interest)</th></tr>')
+    w.write(
+        '<tr><td class="text-error">%d sequence(s) (%.2f%%)</td><td class="text-error">%d sequence(s) (%.2f%%)</td><td class="text-error">%d sequence(s) (%.2f%%)</td><td class="text-warning">%d sequence(s) (%.2f%%)</td><td class="text-success">%d sequence(s) (%.2f%%)</td><td>%d sequence(s)</td></tr>' % (
+            len(no_restric), float(len(no_restric)) / float(len(all_seq)) * 100, len(no_multiple), float(len(no_multiple)) / float(len(all_seq)) * 100, len(stop),
+            float(len(stop)) / float(len(all_seq)) * 100, len(mut), float(len(mut)) / float(len(all_seq)) * 100, len(good_ids), float(len(good_ids)) / float(len(all_seq)) * 100,
+            len(amber)))
+    w.write(
+        '<tr><td class="text-error">%s</td><td class="text-error">%s</td><td class="text-error">%s</td><td class="text-warning">%s</td><td class="text-success">%s</td><td>%s</td></tr></table>' % (
+            '<br/>'.join(no_restric), '<br/>'.join(no_multiple), '<br/>'.join(stop), '<br/>'.join(mut), '<br/>'.join(good_ids), '<br/>'.join(amber)))
+    # Variable regions analysis
+    w.write(
+        '<div class="page-header"><a id="variable"></a><h2>Variable regions analysis</h2></div><p>The following group of sequences are identical clones on the variable regions:</p>')
+    identical_clones_seq = identical_clones.keys()
+    if identical_clones_seq:
+        for seq in identical_clones_seq:
+            ids = list(set(identical_clones[seq]))  # return only one occurrence of each item in the list
+            w.write('<div class="row-fluid"><div class="span5"><pre>%d sequences (%.2f%% of valid sequences)<br/>%s</pre></div>' % (
+                len(ids), float(len(ids)) / float(len(good_ids)) * 100, '<br/>'.join(ids)))
+            w.write('<div class="span3"><table class="table table-striped table-bordered"><thead><tr><th>Variable region</th><th>Repeated sequence</th></tr></thead><tbody>')
+            for z in range(len(good_seq[ids[0]]['var'])):
+                w.write('<td>%d</td><td>%s</td></tr>' % (z + 1, good_seq[ids[0]]['var'][z]))
+            w.write('</tbody></table></div></div>')
+    else:
+        w.write('<p>No clone was found.</p>')
+
+    first = True
+    for i in range(nb_var_part):
+        keys = []
+        for k in (var_seq_common[str(i + 1)].keys()):
+            nb = var_seq_common[str(i + 1)][k]
+            if nb > 1:
+                if first:
+                    w.write(
+                        '<p>Here\'s the distribution of the repeated sequences in variable regions:</p><table class="table table-striped table-bordered"><thead><tr><th>Variable region</th><th>Repeated sequence</th><th>Number of occurrences (percentage of valid sequences)</th></tr></thead><tbody>')
+                    first = False
+                    keys.append(k)
+                else:
+                    keys.append(k)
+        nb = len(keys)
+        if nb != 0:
+            w.write('<tr>')
+            for z in range(nb):
+                if z == 0:
+                    w.write('<td rowspan="%d">%d</td>' % (nb, i + 1))
+                w.write('<td>%s</td><td>%d (%.2f%%)</td></tr>' % (
+                    keys[z], var_seq_common[str(i + 1)][keys[z]], float(var_seq_common[str(i + 1)][keys[z]]) / float(len(good_ids)) * 100))
+    w.write('</tbody></table>')
+    # Clustering
+    w.write('<div class="page-header"><a id="cluster"></a><h2>Clustering</h2></div><p>The following clusters were generated by MCL:</p>')
+    for line in open(mcl_output, 'r'):
+        w.write('<div class="row-fluid"><div class="span6"><pre>%d sequences (%.2f%% of valid sequences)<br/>%s</pre></div></div>' % (
+            len(line.split("\t")), float(len(line.split("\t"))) / float(len(good_ids)) * 100, '<br/>'.join(line.split("\t"))))
+    # Statistics
+    w.write('<div class="page-header"><a id="stat"></a><h2>Statistics</h2></div>')
+    w.write('<p>Here\'s some statistics about the valid sequences:</p><p>Mean for the pairwise alignement scores: %.2f<br/>Standard deviation: %.2f</p>' % (
+        numpy.mean(align_scores), numpy.std(align_scores)))
+    w.write('<div class="row-fluid"><div class="span6"><img src="%s" alt="Distribution of the pairwise alignment score"></div>' % os.path.basename(graph_pic))
+    w.write('<div class="span6"><table class="table table-striped table-bordered"><thead><tr><th>Pairwise Alignment Score</th><th>Number of occurrences</th></tr></thead><tbody>')
+    uniq_scores = sorted(list(set(align_scores)))
+    scores_dic = {}
+    for score in uniq_scores:
+        scores_dic[score] = align_scores.count(score)
+
+    scores_dic = dict(sorted(scores_dic.items()))
+    scores = scores_dic.items()
+    # scores.sort()
+
+    for el in scores:
+        w.write('<tr><td>%.2f</td><td>%d</td></tr>' % (el[0], el[1]))
+    w.write('</tbody></table></div></div>')
+    # Annex
+    w.write('<div class="page-header"><a id="annex"></a><h2>Annex</h2></div>')
+    w.write('<p><strong>Valid protein sequences</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">')
+    for _id in good_ids:
+        w.write('>%s\n%s\n' % (_id, re.sub("(.{80})", "\\1\n", good_seq[_id]['prot'], re.DOTALL)))
+    w.write('</textarea>')
+    aln_out = generate_aln(good_seq, good_ids)
+    print(str(aln_out))
+    w.write(
+        '<p>Multiple sequence alignment of the <strong>valid sequences</strong> generated by Clustal Omega:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">%s</textarea>' % str(
+            aln_out))
+
+    if no_multiple:
+        w.write(
+            '<p><strong>Protein sequences with an incorrect number of nucleotides between the restriction sites</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">')
+        for _id in no_multiple:
+            w.write('>%s\n%s\n' % (_id, re.sub("(.{80})", "\\1\n", all_seq_fasta[_id]['prot'], re.DOTALL)))
+        w.write('</textarea>')
+
+    if mut:
+        w.write('<p><strong>Mutated protein sequences</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">')
+        for _id in mut:
+            w.write('>%s\n%s\n' % (_id, re.sub("(.{80})", "\\1\n", all_seq_fasta[_id]['prot'], re.DOTALL)))
+        w.write('</textarea>')
+        aln_out = generate_aln(all_seq_fasta, mut)
+
+        w.write(
+            '<p>Multiple sequence alignment of the <strong>mutated sequences</strong> generated by Clustal Omega:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">%s</textarea>' % str(
+                aln_out))
+
+    if stop:
+        w.write('<p><strong>Protein sequences with a stop codon</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">')
+        for _id in stop:
+            w.write('>%s\n%s\n' % (_id, re.sub("(.{80})", "\\1\n", all_seq_fasta[_id]['prot'], re.DOTALL)))
+        w.write('</textarea>')
+
+    if amber:
+        w.write('<p><strong>Protein sequences with an amber codon</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">')
+        for _id in amber:
+            w.write('>%s\n%s\n' % (_id, re.sub("(.{80})", "\\1\n", all_seq_fasta[_id]['prot'], re.DOTALL)))
+        w.write('</textarea>')
+
+    w.write('</div></body></html>')
+    w.close()
+
+
+nb_seq = len(list(SeqIO.parse(args.input, "fasta")))
+
+for seq_record in SeqIO.parse(args.input, "fasta"):
+    seq_id = seq_record.id
+    seq = str(seq_record.seq)
+    seq = seq.upper()
+    all_seq.append(seq_id)
+    # Checking if both restriction sites are present in the sequence
+    if site_res_5 in seq and site_res_3 in seq:
+        valid = True
+    else:
+        valid = False
+        tag['no_restric'].append(seq_id)
+    # If sequence has both restriction sites, checking if it is necessary to take the reverse complement strand
+    if valid:
+        site_res_5_pos = seq.index(site_res_5)
+        site_res_3_pos = seq.index(site_res_3)
+        # If site_res_5_pos > site_res_3_pos, reverse complement strand has to be calculated
+        if site_res_5_pos > site_res_3_pos:
+            # Checking if the number of nucleic acids between the restriction sites is a multiple of 3
+            length = math.fabs((site_res_5_pos + len(site_res_5)) - site_res_3_pos)
+            valid = length % 3 == 0
+            cut_seq = seq[:site_res_5_pos + len(site_res_5)]
+            cut_seq = reverse_complement(cut_seq)
+        # Else if site_res_5_pos < site_res_3_pos, use the sequence as it is
+        else:
+            # Checking if the number of nucleic acids between the restriction sites is a multiple of 3
+            length = math.fabs((site_res_3_pos + len(site_res_3)) - site_res_5_pos)
+            valid = length % 3 == 0
+            cut_seq = seq[site_res_5_pos:]
+        # If the number of nucleic acids between the restriction sites isn't a multiple of 3, put the sequence away
+        if not valid:
+            tag['no_multiple'].append(seq_id)
+            prot_seq = Seq.translate(cut_seq)
+            all_seq_fasta[seq_id] = {}
+            all_seq_fasta[seq_id]['prot'] = prot_seq
+        else:
+            # Translate nucleic sequence into amino acid sequence
+            prot_seq = Seq.translate(cut_seq)
+            all_seq_fasta[seq_id] = {}
+            all_seq_fasta[seq_id]['prot'] = prot_seq
+
+            # Looking for stop codon in the sequence and getting their position in the sequence
+            if '*' in prot_seq:
+                pos_stop = [m.start() for m in re.finditer("\*", prot_seq)]
+                stop = False
+                # Checking if stop codon is between the restriction sites, also checking if it is an amber codon. if stop codon other than amber codon -> tag stop
+                for i in range(len(pos_stop)):
+                    if pos_stop[i] < length / 3:
+                        stop_codon_nuc = cut_seq[pos_stop[i] * 3:pos_stop[i] * 3 + 3]
+                        if stop_codon_nuc != "TAG":
+                            tag['stop'].append(seq_id)
+                            stop = True
+                            break
+                        else:
+                            if seq_id not in tag['amber']:
+                                tag['amber'].append(seq_id)
+                # If stop codon wasn't found between the restriction sites
+                if not stop:
+                    """
+					# Checking if there is a stop codon outside the restriction sites. If yes -> tag ok_stop_ext
+					for i in range(len(pos_stop)):
+						if (pos_stop[i] > length/3):
+							stop_codon_nuc = cut_seq[pos_stop[i]*3:pos_stop[i]*3+3]
+							if stop_codon_nuc != "TAG":
+								tag['ok_stop_ext'].append(seq_id)
+								stop = True
+								break
+							else:
+								if (seq_id not in tag['amber']):
+									tag['amber'].append(seq_id)
+					"""
+                    # Checking if there was a mutation in the fix part, if yes -> tag mut else retrieve variable parts
+                    mut = False
+                    pattern_part = args.pattern.split(":")
+                    tmp_prot_seq = prot_seq
+                    var_parts = []
+                    for i in range(len(pattern_part) - 1):  # not checking the latest fix part
+                        part = pattern_part[i]
+                        # If part is fix
+                        if not part[0].isdigit():
+                            # If part not in prot_seq -> mutation, flag then break
+                            if part not in tmp_prot_seq:
+                                mut = True
+                                tag['mut'].append(seq_id)
+                                break
+                            # Else, store the variable part if exist then remove the fix part + variable part (tmp_prot_seq starts at the end of part)
+                            else:
+                                pos_fix = tmp_prot_seq.index(part)
+                                if pos_fix != 0:
+                                    var_parts.append(tmp_prot_seq[0:pos_fix])
+                                tmp_prot_seq = tmp_prot_seq[pos_fix + len(part):]
+                        # Else part is variable
+                        else:
+                            nb_var_part += 1
+                    # Treating latest fix part if no mutation before
+                    if not mut:
+                        last_part = pattern_part[-1]
+                        last_var = pattern_part[-2]
+                        if '-' in last_var:
+                            var_max = int(last_var.split('-')[1])
+                        else:
+                            var_max = int(last_var)
+                        last_part = last_part[0:var_max + 1]
+                        if last_part not in tmp_prot_seq:
+                            mut = True
+                            tag['mut'].append(seq_id)
+                        else:
+                            pos_fix = tmp_prot_seq.index(last_part)
+                            if pos_fix != 0:
+                                var_parts.append(tmp_prot_seq[0:pos_fix])
+                    # If no mutation the sequence is validated and all the info are stored
+                    if not mut:
+                        good_seq[seq_id] = {}
+                        good_seq[seq_id]['dna'] = cut_seq
+                        good_seq[seq_id]['prot'] = prot_seq
+                        good_seq[seq_id]['var'] = var_parts
+
+# If all sequences are invalid, the program will exit as there is no data to continue 
+if not good_seq:
+    print("All sequences are invalid. At least 2 valid sequences are necessary to proceed to the next step. The program will now exit.")
+    sys.exit()
+elif len(good_seq.keys()) == 1:
+    print("There is only one valid sequence among the input data. At least 2 valid sequences are necessary to proceed to the next step. The program will now exit")
+    sys.exit()
+
+# Initialization of dict var_seq_common
+for n in range(nb_var_part):
+    var_seq_common[str(n + 1)] = {}
+
+# Opening the file where the mcl input will be written
+mcl = open(mcl_file, 'w')
+
+id = good_seq.keys()
+for i in range(len(id)):
+    var_1 = good_seq[list(id)[i]]['var']
+
+    # Classifying variable sequences
+    for k in range(len(var_1)):
+        try:
+            var_seq_common[str(k + 1)][var_1[k]] += 1
+        except KeyError:
+            var_seq_common[str(k + 1)][var_1[k]] = 1
+
+    for j in range(i + 1, len(id)):
+        var_2 = good_seq[list(id)[j]]['var']
+        # Comparing the sequences' variable parts to find identical clones
+        if var_1 == var_2:
+            try:
+                s = "".join(var_1)
+                identical_clones[s].extend([id[i], id[j]])
+            except KeyError:
+                identical_clones[s] = [id[i], id[j]]
+
+        # Align the 2 sequences using NWalign_PAM30
+        seq_1 = ''.join(var_1)
+        seq_2 = ''.join(var_2)
+        print(seq_1)
+        print(seq_2)
+        matrix = matlist.pam30
+        cpt = 0
+        if len(seq_2) > len(seq_1):
+            print(pairwise2.align.globalds(seq_1, seq_2, matrix, -11, -1))
+            for a in pairwise2.align.globalds(seq_1, seq_2, matrix, -11, -1):
+                for k in range(a[4]):
+                    if a[0][k] == a[1][k]:
+                        cpt += 1
+                print(format_alignment(*a, full_sequences=True))
+        else:
+            print(pairwise2.align.globalds(seq_2, seq_1, matrix, -11, -1))
+            for a in pairwise2.align.globalds(seq_2, seq_1, matrix, -11, -1):
+                for k in range(a[4]):
+                    if a[0][k] == a[1][k]:
+                        cpt += 1
+                print(format_alignment(*a, full_sequences=True))
+        print("######################################@")
+        print(cpt)
+
+        if len(seq_2) > len(seq_1):
+            p = subprocess.Popen(fibronectin_script_dir + "/NWalign_PAM30 %s %s 3" % (seq_1, seq_2), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        else:
+            p = subprocess.Popen(fibronectin_script_dir + "/NWalign_PAM30 %s %s 3" % (seq_2, seq_1), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+        out, err = p.communicate()
+
+        print(out)
+        print("######################################@")
+        lines = out.split(bytes("\n", encoding='utf8'))
+        print(lines[5].split(bytes(' ', encoding='utf8'))[5])
+        score = float(lines[5].split(bytes(' ', encoding='utf8'))[5]) * 100
+        align_scores.append(score)
+        mcl.write('%s\t%s\t%0.2f\n' % (list(id)[i], list(id)[j], score))
+mcl.close()
+
+# Clusters formation
+subprocess.call("mcl %s --abc -I 6.0 -o %s" % (mcl_file, mcl_output), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+# Producing distribution graph
+plot.hist(align_scores, bins=numpy.arange(0, 101, 2))
+plot.xlabel('Pairwise Alignment Score')
+plot.ylabel('Number of occurrences')
+plot.title('Distribution of the pairwise alignment score')
+plot.grid(True)
+plot.savefig(graph_pic)
+
+# Generating html report
+report_html(html_file, tag, all_seq, good_seq, all_seq_fasta, identical_clones, nb_var_part, var_seq_common, align_scores, args)
+
+# Removing intermediate files
+subprocess.call("rm %s %s " % (mcl_file, mcl_output), shell=True)
+
+print("HTML report has been generated in the output directory. The program will now exit.")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fibronectin/fibronectin.xml	Wed Nov 10 15:15:50 2021 +0000
@@ -0,0 +1,93 @@
+<tool id="fibronectin" name="fibronectin" version="1.0">
+  <description>
+    diversity analysis of synthetic libraries of a Fibronectin domain
+  </description>
+  <macros>
+        <import>fibronectin_macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+
+  <command detect_errors="exit_code">
+  <![CDATA[
+    python $__tool_directory__/fibronectin_wrapper.py $fasta_file $pattern $restriction_site_5 $restriction_site_3
+    $__root_dir__ 
+    $report.extra_files_path 
+    $report
+    $__app__.config.new_file_path
+    $__tool_directory__
+    ]]> 
+  </command>
+  <inputs>
+    <param label="Fasta file" name="fasta_file" type="data" format="fasta" help="Fasta file that contains the DNA sequences" />
+    <param label="Pattern" name="pattern" type="text" area="true" size="4x50" help="Pattern of the sequence" />
+    <param label="5&#39; Restriction sites" name="restriction_site_5" type="text" help="Sequence of the restrict" />
+    <param label="3&#39; Restriction sites" name="restriction_site_3" type="text" help="Sequence of the restrict" />
+  </inputs>
+  <outputs>
+    <data format="html" name="report" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="fasta_file" value="fibronectin_datatest.fasta"/>
+      <param name="pattern" value="AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE"/>
+      <param name="restriction_site_5" value="GCGGCCGC"/>
+      <param name="restriction_site_3" value="GGTACC"/>
+      <output name="report" file="fibronectin_report.html" lines_diff="2">
+      </output> 
+    </test>
+  </tests>
+  <help>
+  <![CDATA[
+- **Contexte scientifique**
+
+- *Fibronectin* a été développé dans le but de permettre l'analyse de la diversité de banques synthétiques d'un domaine de la Fibronectine. 
+- Cette diversité est générée dans le contexte d'un projet d'évolution dirigée par approche de phage display en vue d'obtenir des ligands de forte affinité et spécificité pour des cibles biologiques. Elle a été introduite au niveau de trois boucles du domaine et consiste aussi bien en une variation des acides aminés qu'en une variation des longueurs des boucles. 
+- *Fibronectin* intervient avant l'étape de sélection des banques d'intérêt, en s'assurant de la qualité - et donc de la diversité - de la banque en déterminant notamment par projection, le nombre de séquences uniques et fonctionnelles.
+
+- **Description**
+
+- *Fibronectin* a été implémenté en Python. 
+
+- Les calculs sont répartis en 3 étapes :
+
+    1. Pré-traitement des données d'entrées (détermination de l'orientation des séquences, détermination de la phase de lecture, traduction des séquences nucléotidiques en séquences protéiques, élimination des séquences dites "déchets" contenant des codons stop dans les zones variables)
+    2. Alignement des séquences 2 à 2
+    3. Calculs statistiques sur les alignements (distances)
+
+- Les résultats sont présentés sous forme d'un rapport HTML.
+
+- **Exemple**
+
+- Motif : AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE
+- Site de restriction 5' : GCGGCCGC
+- Site de restriction 3' : GGTACC
+
+=====
+
+- **Scientific context**
+
+- *Fibronectin* was developed to analyze the diversity of synthetic libraries of a Fibronectin domain.
+- This diversity is generated in the context of a project of directed evolution using a phage display approach, to obtain ligands with high affinity and specificity for biological targets. It was introduced in three loops of the domain. It is both a variation of amino acids and a variation of the lengths of loops.
+- *Fibronectin* is used before the step of selecting banks of interest, by ensuring quality - and therefore of diversity - of the bank by determining, by projection, the number of unique and functional sequences.
+
+- **Description**
+
+- *Fibronectin* is implemented in Python.
+
+- The computations are divided into three steps:
+
+    1. Pre-processing of the input data (determination of the orientation of the sequences, determination of the reading frame, translation of nucleotide sequences in protein sequences, elimination of the sequences "wastes" containing stop codons in variable regions)
+    2. Alignment of sequences 2-2
+    3. Statistics calculations on the alignments (distances)
+
+- The results are presented as an HTML report.
+
+- **Example**
+
+- Pattern : AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE
+- 5' restriction site : GCGGCCGC
+- 3' restriction site : GGTACC
+
+ ]]> </help>
+ <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fibronectin/fibronectin_macros.xml	Wed Nov 10 15:15:50 2021 +0000
@@ -0,0 +1,12 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+    		<requirement type="package" version="1.2.3">clustalo</requirement>
+  		</requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41467-019-12528-4</citation>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fibronectin/fibronectin_wrapper.py	Wed Nov 10 15:15:50 2021 +0000
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+"""
+Wrapper for fibronectin.py
+"""
+import pkg_resources
+import logging, os, string, sys, tempfile, glob, shutil, types, urllib
+import shlex, subprocess
+from optparse import OptionParser, OptionGroup
+from stat import *
+
+
+log = logging.getLogger( __name__ )
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg ):
+    sys.stderr.write("%s\n" % msg)
+    sys.exit()
+
+def __main__():
+    #Parse Command Line
+    s = 'fibronectin_wrapper.py:  argv = %s\n' % (sys.argv)
+    argcnt = len(sys.argv)
+    fasta_file = sys.argv[1]
+    pattern = sys.argv[2]
+    restriction_site_5 = sys.argv[3]
+    restriction_site_3 = sys.argv[4]
+    install_dir = sys.argv[5]
+    extra_file_path = sys.argv[6]+"/"
+    report = sys.argv[7]
+    tmp_file_path = sys.argv[8]
+    tool_file_path = sys.argv[9]+"/"
+    try:# for test - needs this done
+        os.makedirs(extra_file_path)
+    except Exception as e:
+        stop_err('1- Error running fibronectin ' + str(e))
+    cmdline = 'python %sfibronectin.py -i %s -o %s -p %s -5 %s -3 %s > /dev/null' % (tool_file_path, fasta_file, extra_file_path, pattern, restriction_site_5, restriction_site_3)
+    try:
+        proc = subprocess.Popen(args=cmdline, shell=True, stderr=subprocess.PIPE)
+        returncode = proc.wait()
+        # get stderr, allowing for case where it's very large
+        stderr = b''
+        buffsize = 1048576
+        try:
+            while True:
+                stderr += proc.stderr.read(buffsize)
+                if not stderr or len(stderr) % buffsize != 0:
+                    break
+        except OverflowError:
+            pass
+        if returncode != 0:
+            raise Exception(stderr)
+    except Exception as e:
+        stop_err('2 -Error running fibronectin ' + str(e))
+    png_path = os.path.join(extra_file_path,'distrib.png')
+    shutil.move(extra_file_path+"/fibronectin_report.html", report)
+    #rval = ['<html><head><title>Fibronectin Galaxy Composite Dataset </title></head><p/>']
+    #rval.append('<div>%s<p/></div>' % (cmdline) )
+    #rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
+    #rval.append( '<li><a href="%s" type="text/plain">%s </a>%s</li>' % (png_path,'Sequences','Sequences' ) )
+    #rval.append( '</ul></div></html>' )
+    #f = file(html_file,'w')
+    #f.write("\n".join( rval ))
+    #f.write('\n')
+    #f.close()
+
+if __name__ == "__main__": __main__()
Binary file fibronectin/test-data/distri.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fibronectin/test-data/fibronectin_datatest.fasta	Wed Nov 10 15:15:50 2021 +0000
@@ -0,0 +1,991 @@
+>XL1_10_PSEXSEQ-REV_13 status=ok nucl=1301 crlStart=4 crlStop=1186 crlLen=1183 order=COL12-0DIL
+tagTTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG
+TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC
+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT
+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA
+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA
+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC
+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA
+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA
+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA
+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG
+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC
+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG
+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT
+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC
+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC
+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC
+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG
+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTT
+TGATGGTGGTTAACGGCGGGATATAACATGAGCTGGcttcgggatcgtcg
+tatcccactaccgagatgtccgcaccaacgcgcaccccggactccgaaat
+ggcgcgcattggccccacggccttcggatcgttgggaaccagcatcgcat
+t
+>XL1_11_PSEXSEQ-REV_14 status=ok nucl=1299 crlStart=8 crlStop=1185 crlLen=1178 order=COL12-0DIL
+actctgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG
+TTTCGCCATAGGTGATACGGTAATAGCTGACGCAGTTGCCGTGTGCATCC
+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT
+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA
+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA
+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC
+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA
+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA
+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA
+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG
+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC
+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG
+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT
+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC
+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC
+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC
+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG
+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGT
+TTGATGGTGGTTAACGGCGGGATATAACATGAGCTggcttcgggatcgtc
+gtatcccattaccgaaaatgtccgcaccaacgcgcaaccccggactcggg
+aaagggcgcgcattgcgcccaagcgccatctggatcgttgggaaaccag
+>XL1_12_PSEXSEQ-REV_15 status=ok nucl=1301 crlStart=6 crlStop=1184 crlLen=1179 order=COL12-0DIL
+agcctCGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGGCAGGTGGCGACGATGGTGTTGCAGTCGTTGTGAGCGTAAAC
+CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAT
+AAGAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTT
+TCGCCATAGGTGATACGGTAATAGTGGACGGCGGAGTTGGGGAGGTTTGC
+ATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTT
+TGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGC
+TTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAG
+GTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCA
+TAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATC
+TGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAAC
+GTAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGA
+TGGAAAAGCCCAGACCCTTCGGCGCAGGCCGAGAATGCCAGCACCAGACC
+CGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAA
+GCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCC
+GGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCAC
+ATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTT
+TCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGC
+GCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCA
+CCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAG
+AGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTG
+GTTTGATGGTGGTTAACGGCGGGATATAACATGAgctgtcttcggtatcg
+tcgtatcccactaccgagatggccgcaccaacgcgcaacccggaatcggt
+aatggcccgcattgcgcccaaggccctcttgatcgttgggaaccagcatc
+c
+>XL1_13_PSEXSEQ-REV_16 status=ok nucl=1301 crlStart=9 crlStop=1187 crlLen=1179 order=COL12-0DIL
+agcttagcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TAGAATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCGGACGAAGGAGCGGGGTGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGgcttccggatcgt
+cgtatcccactaccgagatgtccgcccaacgcgcaacccggactcggtaa
+tgggccgcattgcgcccagcgccttcggatcgttgggaaccagcatccca
+a
+>XL1_14_PSEXSEQ-REV_17 status=ok nucl=1301 crlStart=3 crlStop=1198 crlLen=1196 order=COL12-0DIL
+agCTTCGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGGGTGGAGTGGCTGATGTAGCTGTAGATGAAGTAGCGAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TATAAGAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGT
+TTGATGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTCGGGATCGtc
+gtacccactaccgaaatgtccgcccaacgcgcagcccggactcggtaatg
+ggccgcattgcgcccagcgccatctgatcgttgggaaccagcatcccagt
+g
+>XL1_15_PSEXSEQ-REV_18 status=ok nucl=1301 crlStart=7 crlStop=1186 crlLen=1180 order=COL12-0DIL
+tgcatcTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCA
+TAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATG
+GTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAA
+TCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAAACC
+GTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTTGA
+GGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTT
+CGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAG
+CTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGG
+CACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTT
+CCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACC
+ACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATC
+TTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGAC
+GGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATG
+CTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAA
+GCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCA
+GAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTT
+GTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAG
+CATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAA
+TTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAG
+TCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGG
+GAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGT
+GAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTG
+CAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGA
+TGGTGGTTAACGGCGGGATATAACATGAACTGGCTTcggtatcgtcgtat
+cccactaccgaaatgcccgcccaacgcgcagcccggactcggtaatgggc
+cgcattgcgcccagcgccatctgatcgttgggaaccagctccgcagtggg
+a
+>XL1_16_PSEXSEQ-REV_19 status=ok nucl=1301 crlStart=9 crlStop=1194 crlLen=1186 order=COL12-0DIL
+cgctttgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGGAAGCAGTCGCTGCTGTTGGCGTTGACGCCGCCGCAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TAGAATAATAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGT
+TTGATGGTGGTTAACGGCGGGATATAACATGAACTGGCTTCGGGatcgtc
+gtatcccactaccgaaatgtccgcaccaacgcgcagcccggaatcggtaa
+tgggccgcattgcgcccagggccatctgatctttggcacccagctccgca
+t
+>XL1_17_PSEXSEQ-REV_20 status=ok nucl=1301 crlStart=5 crlStop=1177 crlLen=1173 order=COL12-0DIL
+ctgaAATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATAG
+TCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGTG
+ATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATCG
+GGCAGCCGCGGCAGCTGAAGTGGATGGGGACGCTAGCGTAAACCGTAATG
+GTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTTGAGGAACT
+ACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCGCCAT
+AGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGCTGATC
+AGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCACGGA
+ACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCCATAA
+CCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCACATCA
+TAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTTCCAG
+GCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGGCCAT
+CTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCTGTTG
+TCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGCCCAG
+ACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGAGCCA
+GCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGTGTGA
+AATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCATAAA
+GTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGT
+TGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTCGGGA
+AACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAGG
+CGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGACG
+GGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCAGCAA
+GCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGATGGTGG
+TTAACGGCGGGATATAACATGAACTGGcttcggtatcgtcgtatcccact
+accgaaatgtccgcaccaacgcgcagcccggattcggaaaggcccgcatt
+gcgcccagggccatctgatcgttgggaaccagcatcccagtgggaacaat
+g
+>XL1_18_PSEXSEQ-REV_21 status=ok nucl=1301 crlStart=6 crlStop=1178 crlLen=1173 order=COL12-0DIL
+atctcTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCAT
+AGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGG
+TGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAAT
+CGGGATGGTGACGAGGACGTCGAAGTGGCCGATGGGAGCGTAAACCGTAA
+TGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATAAGAA
+TAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCGCC
+ATAGGTGATACGGTAATAGGCTACGGGGCGGTAGATTGCATCCCAGCTGA
+TCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCACG
+GAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCCAT
+AACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCACAT
+CATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTTCC
+AGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGGCC
+ATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCTGT
+TGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGCCC
+AGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCAGAGCC
+AGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGTGTG
+AAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCATAA
+AGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCG
+TTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTCGGG
+AAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAG
+GCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGAC
+GGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCAGCA
+AGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGATGGTG
+GTTAACGGCGGGATATAACATGAGCTGGcttcggtatcgtcgtatcccac
+taccgagatgtccgcaccaacgcgcagcccggaatcggtaatggcccgca
+ttgggcccagcgccatctgatctttgggaacccgcatccgcatggggaac
+a
+>XL1_19_PSEXSEQ-REV_22 status=ok nucl=1301 crlStart=8 crlStop=1188 crlLen=1181 order=COL12-0DIL
+tcatcgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGGAAGGCGCAGCTGATGGCGGAGAAGCTGGTGCCGTGAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+ATAAGAATAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG
+TTTCGCCATAGGTGATACGGTAATAGAGGACGATGCCGCGGAAGATGGAT
+GCATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAG
+TTTGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCA
+GCTTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTC
+AGGTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTG
+CATAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAA
+TCTGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTA
+ACGTAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGC
+GATGGAAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCA
+GACCCGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTA
+ATAAGCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACG
+AGCCGGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAAC
+TCACATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCC
+GCTTTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCA
+ACGCGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTT
+TTCACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTG
+AAAGAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAAT
+CCTGTTTGATGGTGGTTAACGGCGGGATATAACATGAActggcttccgga
+tcgtcgtatcccactaccgagatgtccgcaccaacgcgcaccccggaatc
+cggaaagggcccgcattgcggccaagcgcctcttgatcgttgggaacaag
+a
+>XL1_1_PSEXSEQ-REV_5 status=ok nucl=1301 crlStart=5 crlStop=1182 crlLen=1178 order=COL12-0DIL
+cgtgGAATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATA
+GTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGT
+GATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATC
+GGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAAACCGT
+AATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTTGAGG
+AACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCG
+CCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGCT
+GATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCA
+CGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCC
+ATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCAC
+ATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTT
+CCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGG
+CCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCT
+GTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGC
+CCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGA
+GCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGT
+GTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCA
+TAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATT
+GCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTC
+GGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGA
+GAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGA
+GACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCA
+GCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGAT
+GGTGGTTAACGGCGGGATATAACATGAGCTGGcttcggtatcgtcgtatc
+ccactaccgaaatgtccgcaccaacgcgcagcccggaatcggtaatggcc
+cgcattggccccagcgccttctgatcgttggcaaccagctccgcagtggg
+a
+>XL1_22_PSEXSEQ-REV_24 status=ok nucl=1301 crlStart=5 crlStop=1180 crlLen=1176 order=COL12-0DIL
+ctatTGGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGGAAGATGACGTTGTCGTCGTTGGGGGGGTTGGTAGCGTAAAC
+CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAG
+AATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTCCGCCCGTTT
+CGCCATAGGTGATACGGTAATAGGCGACGAAGGTGATGAGTGCATCCCAG
+CTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGG
+CACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTT
+CCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACC
+ACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATC
+TTCCAGGCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACG
+GCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGC
+TGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAG
+CCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAG
+AGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTG
+TGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGC
+ATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAAT
+TGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGT
+CGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGG
+AGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTG
+AGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGC
+AGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGA
+TGGTGGTTAACGGCGGGATATAACATGAGCtggcttcgggatcgtcgtat
+cccactaccgaaatgtccgcaccaacgcgcagcccggaatcggtaatggg
+ccgcattgggcccagcgccatctgatctttgggaaccagcatcccaatgg
+g
+>XL1_23_PSEXSEQ-REV_25 status=ok nucl=1301 crlStart=9 crlStop=1183 crlLen=1175 order=COL12-0DIL
+ttattcgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGGCAGCGGGAGAAGGGGTTGTCGGAGATGTCGCCAGCGTAAAC
+CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAT
+AATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTT
+TCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCA
+GCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCG
+GCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATT
+TCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAAC
+CACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTAGTGCATAACAT
+CTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGA
+CGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAAT
+GCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAA
+AGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCC
+AGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTT
+TGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGA
+GCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTA
+ATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCA
+GTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGG
+GGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAG
+TGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTT
+GCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTG
+ATGGTGGTTAACGGCGGGATATAACATGAACTGgcttcggtatcgtcgta
+tcccactaccgagatggcccgcacaacgcgcaacccggactcggtaatgg
+gccgcattggccccagcgccatctgatctttgggaaccagcatcgcagtg
+g
+>XL1_24_PSEXSEQ-REV_26 status=ok nucl=1301 crlStart=5 crlStop=1178 crlLen=1174 order=COL12-0DIL
+atctTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGAGTTACCCCCGTT
+TCGCCATAGGTGATACGGTAATAGCAGACGTAGTCGGGGCTGCTGTGTGC
+ATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTT
+TGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGC
+TTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAG
+GTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCA
+TAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATC
+TGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAAC
+GTAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGA
+TGGAAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGA
+CCCGCCAGAGCCACCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATA
+AGCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGC
+CGGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCA
+CATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCT
+TTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACG
+CGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTC
+ACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAA
+GAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCT
+GGTTTGATGGTGGTTAACGGCGGGATATaacatgaactggcttcggtatc
+gtcgtatcccactaccgaaatgtcccgaccaacgcgcagcccggactcgg
+taatgggccgcattgggcccagggccatctgatctttgggaaccagctcc
+g
+>XL1_25_PSEXSEQ-REV_27 status=ok nucl=1301 crlStart=10 crlStop=1192 crlLen=1183 order=COL12-0DIL
+aacctcgctCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAGAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTCgggatcgt
+cgtatcccactaccgagatgtccgcccaacgcgcagcccggaatcggtat
+tgggcccgattgcgcccagcgccatctgatcgttgggaaccagcatccgc
+a
+>XL1_26_PSEXSEQ-REV_28 status=ok nucl=1301 crlStart=6 crlStop=1173 crlLen=1168 order=COL12-0DIL
+ctggaTATCTTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATAG
+TCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGTG
+ATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATCG
+GACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAAACCGTA
+ATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTTGAGGA
+ACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCGC
+CATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGCTG
+ATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCAC
+GGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCCA
+TAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCACA
+TCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTTC
+CAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGGC
+CATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCTG
+TTGTCACCCGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGCCC
+AGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGAGC
+CAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGTGT
+GAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCATA
+AAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGC
+GTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTCGG
+GAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGA
+GGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGA
+CGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCAGC
+AAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGATGG
+TGGTTAACGGCGGGATATAACATgaactggcttcgggatcgtcgtatccc
+actaccgagattgcccgcaccaacgcgcaaccccggactcggtaaagggc
+ccgcattgcgcccagggccatctgatcgttgggaaccagcatccgcaatg
+g
+>XL1_29_PSEXSEQ-REV_29 status=ok nucl=1301 crlStart=5 crlStop=1184 crlLen=1180 order=COL12-0DIL
+taggTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG
+TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC
+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT
+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA
+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA
+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC
+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA
+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA
+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA
+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG
+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC
+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG
+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT
+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC
+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC
+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC
+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG
+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGT
+TTGATGGTGGTTAACGGCGGGATATAACATGAACtggcttcgggatcgtc
+gtatcccactaccgagatgtcccgcccaacgcgaagcccggaatcggtaa
+tgggccgcattgggcccaagcgcctctggatcgttgggaaccaggttcgc
+a
+>XL1_2_PSEXSEQ-REV_6 status=ok nucl=1301 crlStart=5 crlStop=1197 crlLen=1193 order=COL12-0DIL
+ttcgCGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGGATGAAGCCGTCGGCGCTGGTGCGGGCGCCGGTAGCGTAAACC
+GTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATA
+ATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTT
+CGCCATAGGTGATACGGTAATAGAAGACGTCGCCGAGGGGTGCATCCCAG
+CTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGG
+CACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTT
+CCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTACAGGTAAACC
+ACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATC
+TTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGAC
+GGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATG
+CTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAA
+GCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCA
+GAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTT
+GTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAG
+CATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAA
+TTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAG
+TCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGG
+GAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGT
+GAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTG
+CAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGA
+TGGTGGTTAACGGCGGGATATAACATGAACTGGCTTCGGTATCGTCGtat
+cccactaccgagatgtccgcaccaacgcgcagcccggaatcggtaatggc
+gcgcattgcgcccagggccatctgatcgttggcaaccagcatcccattgg
+g
+>XL1_30_PSEXSEQ-REV_30 status=ok nucl=1301 crlStart=4 crlStop=1178 crlLen=1175 order=COL12-0DIL
+atgGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATAGT
+CTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGTGA
+TGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATCGG
+GTTGTGGGCGTGGCTGCAGACGGTGACAGGCCGGAAATCGTTGCGGTTGA
+GGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTT
+CGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAG
+CTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGG
+CACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTT
+CCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACC
+ACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATC
+TTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGAC
+GGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATG
+CTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAA
+GCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCA
+GAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTT
+GTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAG
+CATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAA
+TTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAG
+TCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGG
+GAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGT
+GAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTG
+CAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGA
+TGGTGGTTAACGGCGGGATATAACATGAGCTGTCTTCGGTATCGTCGTAT
+CCCACTACCGAAATGTCCGCACCAACGCgcagcccggaatcggtaatggc
+gcgcattgcgcccagcgccatctgatcgttggcaaccagcatcgcagtgg
+gaacgatgccctcattcagcatttgcatggtttgttgaaaaccggaaatg
+g
+>XL1_33_PSEXSEQ-REV_23 status=ok nucl=1301 crlStart=329 crlStop=1165 crlLen=837 order=COL12-0DIL
+cgttttcggctctgatatctttggatcccacgcgtccctagcccacgcgt
+ggtgcatagtctggcacgtcatacggatacgaaccaccatgatggtgatg
+gtgatggtgatggctaccgcccgaaccgccggtaccggtacggtaattga
+tagaaatcggactagagctcgatgaggagatacgttaagagctcgaagcg
+taaaccgtaatggtatagtcgacacccggtgacaggccggaaatcgttgc
+ggttgaataacaacccgggacggtgaattcctgaaccggggagttaccgc
+ccgtttcgccataggtgatacggtaataGCTGACGTTGCTTTGGGATGCA
+TCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTT
+GGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCT
+TGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGG
+TAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCAT
+AACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCT
+GCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACG
+TAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGAT
+GGAAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGAC
+CCGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATA
+AGCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGC
+CGGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCA
+CATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCT
+TTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACG
+CGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTC
+ACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAA
+GAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCT
+GGTTGATGGTGGTTAacggcgggatataacatgaactggcttcggtatcg
+tcgtatccactaccgaaatgtccgaccaacgggcaacccggaatcggtaa
+tgggcggattgggcccagcgcatctgatcgttggaaccagcatcgcagtg
+g
+>XL1_3_PSEXSEQ-REV_7 status=ok nucl=1301 crlStart=9 crlStop=1188 crlLen=1180 order=COL12-0DIL
+ttactagcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGT
+TTGATGGTGGTTAACGGCGGGATATAACTTGAGCTGTCttcgggatcgtc
+gtatcccactaccgaaaatgtccgcaccaacgcgcaagcccggaatccgg
+tatgggcgcgcattggccccaaggccatcggatcgttgggaaccagcatc
+c
+>XL1_4_PSEXSEQ-REV_8 status=ok nucl=1301 crlStart=9 crlStop=1190 crlLen=1182 order=COL12-0DIL
+tgactgctCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGGGTGTGGGTGCAGTGGTTGTTGCGGCAGGTGTTGTCAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+ATAATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGATTACCGCCCGT
+TTCGCCATAGGTGATACGGTAATAGACGACGCAGTGGTCGGTGAAGGGTG
+CATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGT
+TTGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAG
+CTTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCA
+GGTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGC
+ATAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAAT
+CTGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAA
+CGTAAATGCTGTTGACCCGGAATGTGTTGATTGCCCACGCCACCTGCGAT
+GGAAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGAC
+CCGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATA
+AGCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGC
+CGGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCA
+CATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCT
+TTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACG
+CGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTC
+ACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAA
+GAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCT
+GGTTTGATGGTGGTTAACGGCGGGAAATAACATGAACTGGcttcggtatc
+gtcgtatcccactaccgaaatgtccgcacaacgcgcagcccggaatcggt
+aatgggccgcattgcgcccagcgccatctgatctttgggaaccagcatcg
+c
+>XL1_5_PSEXSEQ-REV_9 status=ok nucl=1301 crlStart=10 crlStop=1173 crlLen=1164 order=COL12-0DIL
+ctattcgctCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGT
+TTGATGGTGGTTAACGGCGGGATataacatgaactggcttcggtatcgtc
+gtatcccactaccgagatgtccgcccaacgcgcagcccggactcggtaat
+gggccgcattgcgcccagcgccatctgatcgttgggaacaagcttcgcat
+t
+>XL1_6_PSEXSEQ-REV_10 status=ok nucl=1301 crlStart=9 crlStop=1187 crlLen=1179 order=COL12-0DIL
+agcttagcTCTGAAATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGGGAGTTGATGGGGTGGAAGTTGGAGACGGTGGCGTTAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TAGAATAATAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGtcttcggtatcgt
+cgtatcccactaccgagatgtccgcacaacgcgcagcccggaatcggtaa
+tggccgcattgggcccagcgccatctgatcgttggcaaccagcttcgcat
+t
+>XL1_8_PSEXSEQ-REV_11 status=ok nucl=1301 crlStart=6 crlStop=1176 crlLen=1171 order=COL12-0DIL
+aacttTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATaacatgagctggcttcgggatcgt
+cgtatcccactaccgaaattgtccgaccaacgcgcaacccggactcggta
+ttgggccgcattgcgcccagggccatctgatctttgggaaccagcatccg
+c
+>XL1_9_PSEXSEQ-REV_12 status=ok nucl=1301 crlStart=3 crlStop=1188 crlLen=1186 order=COL12-0DIL
+tgGCTCTGGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGC
+ATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGAT
+GGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAA
+ATCGGGTGGTGGACGGCGTGGACGTCGTCGTTGACGAGGCTAGCGTAAAC
+CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAG
+AATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTT
+TCGCCATAGGTGATACGGTAATAGCAGACAGAGCTTTATGATGCATCCCA
+GCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCG
+GCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATT
+TCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAAC
+CACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACAT
+CTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGA
+CGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAAT
+GCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAA
+AGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCC
+AGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTT
+TGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGA
+GCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTA
+ATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCA
+GTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGG
+GGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAG
+TGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTT
+GCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTG
+ATGGTGGTTAACGGCGGGATATAACATGAACTGGCTTCgggatcgtcgta
+cccactaccgagatgtccgcacaacgcgcagcccggactcggtatggccc
+gcattggccccagcgccatctgatcgttgggaacaagcatcccaatgggg
+a
+>XL2-1_PSEXSEQ-REV_32 status=ok nucl=1301 crlStart=9 crlStop=1190 crlLen=1182 order=COL12-0DIL
+tcatagctCTGATTTCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG
+TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC
+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT
+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA
+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA
+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC
+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA
+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA
+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA
+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG
+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC
+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG
+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT
+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC
+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC
+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC
+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG
+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGT
+TTGATGGTGGTTAACGGCGGGATATAACATGAACTGGCTTcgggatcgtc
+gtatcccactaccgagatgtccgcacaacgcgcagcccggaatccggtaa
+tggcccgcattgcgcccagggccatctgatcgttgggaaccaagatccgc
+a
+>XL2-2_PSEXSEQ-REV_33 status=ok nucl=1301 crlStart=9 crlStop=1181 crlLen=1173 order=COL12-0DIL
+ctctctggATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCA
+TAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATG
+GTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAA
+TCGGGACGAGGGAGGCGGAGCAGCGGAGGCAGGGGCTAGCGTAAACCGTA
+ATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATAAGA
+ATAACCCGGGACGGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCG
+CCATAGGTGATACGGTAATAGTTGACGAAGGTGTGGCATGCATCCCAGCT
+GATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCA
+CGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCC
+ATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCAC
+ATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTT
+CCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGG
+CCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCT
+GTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGC
+CCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGA
+GCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGT
+GTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCA
+TAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATT
+GCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTC
+GGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGA
+GAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGA
+GACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCA
+GCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGAT
+GGTGGTTAACGGCGGGATATAACATGAGCTGgcttcggtaccgtcgtatc
+ccactaccgagatgtccgcaccaacgcgcagcccggaatcggtaatgggc
+cgcattgggcccagcgccatctgatcgttgggaaccagaatcccaattgg
+g
+>XL2-3_PSEXSEQ-REV_34 status=Failed nucl=519 crlStart=1 crlStop=21 crlLen=21 order=COL12-0DIL
+AAGGCCGTTTTACTTATTTGCtaataacaccttctccacgaacccccccg
+ggttcaacatcgagggcgagaatcagaaaccccccaccatgtggatgagg
+ctaagaatgtggtttcccccaaaacccccggtgcttgcttatggtgataa
+taatcccaccaaatatcggaagtcttcacaaattgtaaaaatcccgcttt
+atttttgtattactttagagtcgccgagacccagctcatgtaggtgtctg
+agaaggactggatctgaatcatcgatgagttcacctttactttctttttt
+ttttttctttttccaaataactaatagatgattcatcttgttgatgcctg
+aaacccgaccaacatagcttccacatgccaccaacatttgcttgttagcc
+tatctccgatctgaccccgtaggccccgctcccttaatggatcaggataa
+attttcttaccctctcggtgatggcggcccccagcgcccggccatcctta
+cctgtttttttatttgtc
+>XL2-4_PSEXSEQ-REV_35 status=ok nucl=1301 crlStart=6 crlStop=1192 crlLen=1187 order=COL12-0DIL
+agattAGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATAACATGAACTGGCTTCggtatcgt
+cgtatcccactaccgagatgtccgcaccaacgcggcagcccggaatcggt
+aatggcgcgcattgggcccaagcgccatctgatcgttgggaaccagcatc
+c
+>XL2-5_PSEXSEQ-REV_36 status=ok nucl=1301 crlStart=5 crlStop=1173 crlLen=1169 order=COL12-0DIL
+ctggAATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATAG
+TCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGTG
+ATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATCG
+GGTAGCTGTTGGCGACGATGACGCAGTAGCCGCTAGCGTAAACCGTAATG
+GTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATAATAAGA
+ACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCGCCAT
+AGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGCTGATC
+AGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCACGGA
+ACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCCATAA
+CCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCACATCA
+TAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTTCCAG
+GCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGGCCAT
+CTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCTGTTG
+TCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGCCCAG
+ACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGAGCCA
+GCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGTGTGA
+AATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCATAAA
+GTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGT
+TGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTCGGGA
+AACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAGG
+CGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGACG
+GGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCAGCAA
+GCGGTCCACGCTGGTTTGCCCAGCAGGCGAAATCCTGTTTGATGGTGTTA
+ACGGCGGGATTAACATGAACTGGcttcgggatcgtcgtatccactaccga
+aatgccgcaccacgcgcagcccggactcggaaatggccgcattggcccca
+gggccatctgatcgttggaaccaagatcccaatgggaacaagccctcatc
+c
+>XL3-1_PSEXSEQ-REV_37 status=ok nucl=1301 crlStart=6 crlStop=1187 crlLen=1182 order=COL12-0DIL
+ttcctCGATCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+GCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGgcttcggtatcgt
+cgtatcccactaccgagatgtccgcacaacgcgcaagccggaatcggtaa
+tggcccgcattgcgcccagcgccatctgatcgttgggaaccagcatccgc
+a
+>XL3-2_PSEXSEQ-REV_38 status=ok nucl=1301 crlStart=6 crlStop=1185 crlLen=1180 order=COL12-0DIL
+cgtctTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATAACATGAGCtggcttcgggatcgt
+cgtatcccactaccgagatgtccgcaccaacgcgcagcccggactcggta
+ttgggccgcattgcgcccagcgccatctgatcgttgggaacccagatcgc
+a
+>XL3-3_PSEXSEQ-REV_39 status=ok nucl=1301 crlStart=10 crlStop=1174 crlLen=1165 order=COL12-0DIL
+agactagctCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATataacatgaactggcttcggtatcgt
+cgtatcccactaccgaaatgtccgcaccaacgcgcaacccggaatcggga
+atgggccgcattgcgcccagcgccatctgatctttgggaaccagcatccc
+a
+>XL3-4_PSEXSEQ-REV_40 status=ok nucl=1301 crlStart=8 crlStop=1186 crlLen=1179 order=COL12-0DIL
+tcgctcgCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGCGGCTGTGGATGAAGCTGTGGTTGAGGCTGGAAGCGTAAACC
+GTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATA
+ATAAGAACCCGGGACGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTC
+GCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGC
+TGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGC
+ACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTC
+CATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCA
+CATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCT
+TCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACG
+GCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGC
+TGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAG
+CCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAG
+AGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTG
+TGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGC
+ATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAAT
+TGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGT
+CGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGG
+AGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTG
+AGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGC
+AGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGA
+TGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTcggtatcgtcgtat
+cccactaccgagatgtccgcacaacgcgcagcccggactcggtaatggcc
+cgcattggccccagcgccatctgatcgttgggaaccagctcccgagtggg
+a
+>XL3-5_PSEXSEQ-REV_41 status=ok nucl=1301 crlStart=8 crlStop=1190 crlLen=1183 order=COL12-0DIL
+ctgtcgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG
+TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC
+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT
+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA
+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA
+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC
+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA
+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA
+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA
+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG
+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC
+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG
+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT
+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC
+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC
+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC
+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG
+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTT
+TGATGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTCggtatcgtcg
+tatcccactaccgagatgtccgcaccaacgcgcagcccggactcggaatg
+gggcgcattgggcccagcgccatttgatcgttgggaaccagcatcgcatt
+g
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fibronectin/test-data/fibronectin_report.html	Wed Nov 10 15:15:50 2021 +0000
@@ -0,0 +1,127 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN""http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>Fibronectin Report</title><link href="http://twitter.github.com/bootstrap/assets/css/bootstrap.css" rel="stylesheet" /><style type="text/css">body {padding-top: 40px;}.subhead {padding: 40px 0;}.subhead h1 {font-size: 60px;}.fasta {   font-family: Monaco, Menlo, Consolas, "Courier New", monospace;   font-size: 12px;}code.grey{color: #636D71;}</style></head><body><a id="top"></a><div class="navbar navbar-fixed-top"><div class="navbar-inner"><div class="container"><a class="brand" href="#top">Fibronectin Report</a><div class="nav-collapse collapse"><ul class="nav"><li><a href="#input">Input data</a></li><li><a href="#analysis">Sequences analysis</a></li><li><a href="#variable">Variable regions analysis</a></li><li><a href="#cluster">Clustering</a></li><li><a href="#stat">Statistics</a></li><li><a href="#annex">Annex</a></li></ul></div></div></div></div><div class="container-fluid"><header class="subhead"><h1>Fibronectin Report</h1></header><div class="page-header"><a id="input"></a><h2>Input data</h2></div><p>Input file:<br/><code class="grey">fibronectin_datatest.fasta</code></p><p>Number of sequences in input file:<br/><code class="grey">36</code></p><p>Pattern of the sequence bank:<br/><code class="grey">AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE</code></p><p>5' restriction site:<br/><code class="grey">GCGGCCGC</code></p><p>3' restriction site:<br/><code class="grey">GGTACC</code></p><div class="page-header"><a id="analysis"></a><h2>Sequences analysis</h2></div><p>Caption:</p><ul><li class="text-success">Valid sequences that will be part of the next analysis </li><li class="text-warning">Good sequences but will not be part of the next analysis</li><li class="text-error">Rejected sequences</li></ul><table class="table table-striped table-bordered"><tr><th class="text-error">Absence of restriction sites</th><th class="text-error">Incorrect number of nucleotides between the restriction sites</th><th class="text-error">Stop codon <u>inside</u> the area of interest</th><th class="text-warning">Mutation in the conserved regions</th><th class="text-success">Valid sequences</th><th>Amber codon in the sequence (<u>inside</u> the area of interest)</th></tr><tr><td class="text-error">1 sequence(s) (2.78%)</td><td class="text-error">6 sequence(s) (16.67%)</td><td class="text-error">25 sequence(s) (69.44%)</td><td class="text-warning">0 sequence(s) (0.00%)</td><td class="text-success">4 sequence(s) (11.11%)</td><td>0 sequence(s)</td></tr><tr><td class="text-error">XL2-3_PSEXSEQ-REV_34</td><td class="text-error">XL1_22_PSEXSEQ-REV_24<br/>XL1_24_PSEXSEQ-REV_26<br/>XL1_30_PSEXSEQ-REV_30<br/>XL1_4_PSEXSEQ-REV_8<br/>XL2-2_PSEXSEQ-REV_33<br/>XL3-4_PSEXSEQ-REV_40</td><td class="text-error">XL1_10_PSEXSEQ-REV_13<br/>XL1_11_PSEXSEQ-REV_14<br/>XL1_13_PSEXSEQ-REV_16<br/>XL1_14_PSEXSEQ-REV_17<br/>XL1_15_PSEXSEQ-REV_18<br/>XL1_16_PSEXSEQ-REV_19<br/>XL1_17_PSEXSEQ-REV_20<br/>XL1_1_PSEXSEQ-REV_5<br/>XL1_23_PSEXSEQ-REV_25<br/>XL1_25_PSEXSEQ-REV_27<br/>XL1_26_PSEXSEQ-REV_28<br/>XL1_29_PSEXSEQ-REV_29<br/>XL1_33_PSEXSEQ-REV_23<br/>XL1_3_PSEXSEQ-REV_7<br/>XL1_5_PSEXSEQ-REV_9<br/>XL1_6_PSEXSEQ-REV_10<br/>XL1_8_PSEXSEQ-REV_11<br/>XL1_9_PSEXSEQ-REV_12<br/>XL2-1_PSEXSEQ-REV_32<br/>XL2-4_PSEXSEQ-REV_35<br/>XL2-5_PSEXSEQ-REV_36<br/>XL3-1_PSEXSEQ-REV_37<br/>XL3-2_PSEXSEQ-REV_38<br/>XL3-3_PSEXSEQ-REV_39<br/>XL3-5_PSEXSEQ-REV_41</td><td class="text-warning"></td><td class="text-success">XL1_12_PSEXSEQ-REV_15<br/>XL1_18_PSEXSEQ-REV_21<br/>XL1_19_PSEXSEQ-REV_22<br/>XL1_2_PSEXSEQ-REV_6</td><td></td></tr></table><div class="page-header"><a id="variable"></a><h2>Variable regions analysis</h2></div><p>The following group of sequences are identical clones on the variable regions:</p><p>No clone was found.</p><p>Here's the distribution of the repeated sequences in variable regions:</p><table class="table table-striped table-bordered"><thead><tr><th>Variable region</th><th>Repeated sequence</th><th>Number of occurrences (percentage of valid sequences)</th></tr></thead><tbody><tr><td rowspan="1">3</td><td>YSY</td><td>2 (50.00%)</td></tr></tbody></table><div class="page-header"><a id="cluster"></a><h2>Clustering</h2></div><p>The following clusters were generated by MCL:</p><div class="row-fluid"><div class="span6"><pre>4 sequences (100.00% of valid sequences)<br/>XL1_12_PSEXSEQ-REV_15<br/>XL1_18_PSEXSEQ-REV_21<br/>XL1_19_PSEXSEQ-REV_22<br/>XL1_2_PSEXSEQ-REV_6
+</pre></div></div><div class="page-header"><a id="stat"></a><h2>Statistics</h2></div><p>Here's some statistics about the valid sequences:</p><p>Mean for the pairwise alignement scores: 25.92<br/>Standard deviation: 3.89</p><div class="row-fluid"><div class="span6"><img src="distri.png" alt="Distribution of the pairwise alignment score"></div><div class="span6"><table class="table table-striped table-bordered"><thead><tr><th>Pairwise Alignment Score</th><th>Number of occurrences</th></tr></thead><tbody><tr><td>19.00</td><td>1</td></tr><tr><td>23.80</td><td>1</td></tr><tr><td>26.30</td><td>1</td></tr><tr><td>27.30</td><td>2</td></tr><tr><td>31.80</td><td>1</td></tr></tbody></table></div></div><div class="page-header"><a id="annex"></a><h2>Annex</h2></div><p><strong>Valid protein sequences</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">>XL1_12_PSEXSEQ-REV_15
+AAAGSSVSSVPTKLEVVAATPTSLLISWDANLPNSAVHYYRITYGETGGNSPVQEFTVPGSSYTATISGLSPGVDYTITV
+YAHNDCNTIVATCPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAR
+>XL1_18_PSEXSEQ-REV_21
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAIYRPVAYYRITYGETGGNSPVQEFTVPGYSYTATISGLSPGVDYTITVYA
+PIGHFDVLVTIPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRD
+>XL1_19_PSEXSEQ-REV_22
+AAAGSSVSSVPTKLEVVAATPTSLLISWDASIFRGIVLYYRITYGETGGNSPVQEFTVPGYSYTATISGLSPGVDYTITV
+YAHGTSFSAISCAFPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAM
+>XL1_2_PSEXSEQ-REV_6
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAPLGDVFYYRITYGETGGNSPVQEFTVPGSYYTATISGLSPGVDYTITVYA
+TGARTSADGFIPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAR
+</textarea><p>Multiple sequence alignment of the <strong>valid sequences</strong> generated by Clustal Omega:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">CLUSTAL O(1.2.3) multiple sequence alignment
+
+
+XL1_12_PSEXSEQ-REV_15      AAAGSSVSSVPTKLEVVAATPTSLLISWDANLPNSAVHYYRITYGETGGNSPVQEFTVPG
+XL1_18_PSEXSEQ-REV_21      AAAGSSVSSVPTKLEVVAATPTSLLISWDAIY--RPVAYYRITYGETGGNSPVQEFTVPG
+XL1_19_PSEXSEQ-REV_22      AAAGSSVSSVPTKLEVVAATPTSLLISWDASIFRGIVLYYRITYGETGGNSPVQEFTVPG
+XL1_2_PSEXSEQ-REV_6        AAAGSSVSSVPTKLEVVAATPTSLLISWDAPL--GDVFYYRITYGETGGNSPVQEFTVPG
+                           ******************************      * **********************
+
+XL1_12_PSEXSEQ-REV_15      SSYTATISGLSPGVDYTITVYAHNDCNTI-VATCPISINYRTGTGGSGGSHHHHHHHHGG
+XL1_18_PSEXSEQ-REV_21      YSYTATISGLSPGVDYTITVYAPIGHFDV-LVTIPISINYRTGTGGSGGSHHHHHHHHGG
+XL1_19_PSEXSEQ-REV_22      YSYTATISGLSPGVDYTITVYAHGTSFSAISCAFPISINYRTGTGGSGGSHHHHHHHHGG
+XL1_2_PSEXSEQ-REV_6        SYYTATISGLSPGVDYTITVYATGARTSA-DGFIPISINYRTGTGGSGGSHHHHHHHHGG
+                             ********************            **************************
+
+XL1_12_PSEXSEQ-REV_15      SYPYDVPDYAPRVG*GRVGSKDIRAR
+XL1_18_PSEXSEQ-REV_21      SYPYDVPDYAPRVG*GRVGSKDIRD-
+XL1_19_PSEXSEQ-REV_22      SYPYDVPDYAPRVG*GRVGSKDIRAM
+XL1_2_PSEXSEQ-REV_6        SYPYDVPDYAPRVG*GRVGSKDIRAR
+                           ************************  
+</textarea><p><strong>Protein sequences with an incorrect number of nucleotides between the restriction sites</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">>XL1_22_PSEXSEQ-REV_24
+AAAGSSVSSVPTKLEVVAATPTSLLISWDALITFVAYYRITYGETGGTPRFRNSPSRVLILPQRFPACHRVSTIPLRFTL
+PTPPTTTTSSSRFLSITVPVPAVRAVAITITITIMVVRIRMTCQTMHHAWARDAWDPKISEPI
+>XL1_24_PSEXSEQ-REV_26
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAHSSPDYVCYYRITYGETGVTPGSGIHRPG*FLNRNDFRPVTGCRLYHYGL
+RFELLTYLLIEL*SDFYQLPYRYRRFGR*PSPSPSPSWWFVSV*RARLCTTRGLGTRGIQRYQSK
+>XL1_30_PSEXSEQ-REV_30
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPSAATPTTRFL
+SITVPVPAVRAVAITITITIMVVRIRMTCQTMHHAWARDAWDPKIS
+>XL1_4_PSEXSEQ-REV_8
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAPFTDHCVVYYRITYGETGGNPRFRNSPSRVLIIPQRFPACHRVSTIPLRF
+TLTTPAATTTAPTPRFLSITVPVPAVRAVAITITITIMVVRIRMTCQTMHHAWARDAWDPKISEQS
+>XL2-2_PSEXSEQ-REV_33
+AAAGSSVSSVPTKLEVVAATPTSLLISWDACHTFVNYYRITYGETGGNSPVQEFTRPGLFLYRNDFRPVTGCRLYHYGLR
+*PLPPLLRLPRPDFYQLPYRYRRFGR*PSPSPSPSWWFVSV*RARLCTTRGLGTRGIQRYPE
+>XL3-4_PSEXSEQ-REV_40
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTSRVLIIPQRFPACHRVSTIPLRFTL
+PASTTASSTAADFYQLPYRYRRFGR*PSPSPSPSWWFVSV*RARLCTTRGLGTRGIQRYQSER
+</textarea><p><strong>Protein sequences with a stop codon</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">>XL1_10_PSEXSEQ-REV_13
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAT
+>XL1_11_PSEXSEQ-REV_14
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAHGNCVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAE
+>XL1_13_PSEXSEQ-REV_16
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAPRSFVRYYRITYGETGGNSPVQEFTVPGSYSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAK
+>XL1_14_PSEXSEQ-REV_17
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSYTATISGLSPGVDYTITVYA
+RYFIYSYISHSTPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAK
+>XL1_15_PSEXSEQ-REV_18
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRC
+>XL1_16_PSEXSEQ-REV_19
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGYYSTATISGLSPGVDYTITVYA
+CGGVNANSSDCFPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAK
+>XL1_17_PSEXSEQ-REV_20
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SVPIHFSCRGCPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIS
+>XL1_1_PSEXSEQ-REV_5
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIP
+>XL1_23_PSEXSEQ-REV_25
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSYYTATISGLSPGVDYTITVYA
+GDISDNPFSRCPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAN
+>XL1_25_PSEXSEQ-REV_27
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAR
+>XL1_26_PSEXSEQ-REV_28
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKRYP
+>XL1_29_PSEXSEQ-REV_29
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAP
+>XL1_33_PSEXSEQ-REV_23
+AAAGSSVSSVPTKLEVVAATPTSLLISWDASQSNVSYYRITYGETGGNSPVQEFTVPGCYSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAEN
+>XL1_3_PSEXSEQ-REV_7
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAS
+>XL1_5_PSEXSEQ-REV_9
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAN
+>XL1_6_PSEXSEQ-REV_10
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGYYSTATISGLSPGVDYTITVYA
+NATVSNFHPINSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDFRAK
+>XL1_8_PSEXSEQ-REV_11
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAK
+>XL1_9_PSEXSEQ-REV_12
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVCYYRITYGETGGNSPVQEFTVPGSYSTATISGLSPGVDYTITVYA
+SLVNDDVHAVHHPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIQS
+>XL2-1_PSEXSEQ-REV_32
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKEIRAM
+>XL2-4_PSEXSEQ-REV_35
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAN
+>XL2-5_PSEXSEQ-REV_36
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSYYTATISGLSPGVDYTITVYA
+SGYCVIVANSYPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIP
+>XL3-1_PSEXSEQ-REV_37
+AAAGSSVSSVPTKLEVVAATPTSLLISCDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRSR
+>XL3-2_PSEXSEQ-REV_38
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAR
+>XL3-3_PSEXSEQ-REV_39
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAS
+>XL3-5_PSEXSEQ-REV_41
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAT
+</textarea></div></body></html>
\ No newline at end of file