view make_pan_library.sh @ 0:f1a157358d4d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edta commit 24cb0421330e54b144b3e6f1be4ae35ac0e48c1c
author bgruening
date Sun, 16 Oct 2022 12:41:19 +0000
parents
children
line wrap: on
line source

#!/usr/bin/env bash

known_te=($1)

echo 'get classification info and convert #unknown to #DNA/Helitron'
for j in *mod.EDTA.TElib.novel.fa; do 
    for i in `cat $j.real`; do 
        grep $i $j; 
    done| \
    perl -nle 's/#unknown/#DNA\/Helitron/; print $_' > $j.real.ori & 
done

wait

echo 'aggregate novel TE libraries'
i=0
for j in *real.ori; do
  i=$(($i+5000));
  perl /EDTA/util/rename_TE.pl $j $i;
done > NAM.EDTA1.8.0.EDTA.TElib.novel.fa.raw
perl /EDTA/util/rename_TE.pl NAM.EDTA1.8.0.EDTA.TElib.novel.fa.raw > NAM.EDTA1.8.0.EDTA.TElib.novel.fa.raw2
mv NAM.EDTA1.8.0.EDTA.TElib.novel.fa.raw2 NAM.EDTA1.8.0.EDTA.TElib.novel.fa.raw

# wait

# echo 'remove redundant'
# nohup perl /EDTA/util/cleanup_nested.pl \
#     -in NAM.EDTA1.8.0.EDTA.TElib.novel.fa.raw \
#     -cov 0.95 \
#     -minlen 80 \
#     -miniden 80\
#     -blast /opt/conda/lib/python3.6/site-packages/Bio/Blast &

# ls
# wait

# echo 'remove a number of false TEs and rename IDs'
# RepeatMasker -pa 36 -q -no_is -norna -nolow -div 40 -lib rm.fa -cutoff 225 NAM.EDTA1.8.0.EDTA.TElib.novel.fa.raw.cln
# perl /EDTA/util/output_by_list.pl 1 NAM.EDTA1.8.0.EDTA.TElib.novel.fa.raw.cln 1 \
#     <(awk '{print $5}' NAM.EDTA1.8.0.EDTA.TElib.novel.fa.raw.cln.out|grep TE) -ex -FA | \
#     perl /EDTA/util/rename_TE.pl - > NAM.EDTA1.8.0.EDTA.TElib.novel.fa

# echo 'make comprehensive TE library'
# cat $known_te NAM.EDTA1.8.0.EDTA.TElib.novel.fa > NAM.EDTA1.8.0.TE11122019.TElib.fa

# echo 'finished make_pan_library'