Mercurial > repos > iuc > funannotate_annotate
comparison test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg @ 0:a5baa4ff168d draft
"planemo upload commit 87560553f1dbbd3e0ab7d7157fa5a7f32f61dca1"
author | iuc |
---|---|
date | Mon, 04 Oct 2021 19:39:38 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a5baa4ff168d |
---|---|
1 # | |
2 # parameters for all Drosophila versions | |
3 # | |
4 # date : 11.8.2009 | |
5 # | |
6 | |
7 # | |
8 # Properties for augustus | |
9 #------------------------------------ | |
10 /augustus/verbosity 3 # 0-3, 0: only print the necessary | |
11 maxDNAPieceSize 200000 # maximum segment that is predicted in one piece | |
12 stopCodonExcludedFromCDS false # make this 'true' if the CDS includes the stop codon (training and prediction) | |
13 | |
14 # gff output options: | |
15 protein on # output predicted protein sequence | |
16 codingseq off # output the coding sequence | |
17 cds on # output 'cds' as feature for exons | |
18 start on # output start codons (translation start) | |
19 stop on # output stop codons (translation stop) | |
20 introns on # output introns | |
21 tss on # output transcription start site | |
22 tts on # output transcription termination site | |
23 print_utr off # output 5'UTR and 3'UTR lines in addition to exon lines | |
24 | |
25 checkExAcc off # internal parameter for extrinsic accuracy | |
26 | |
27 # alternative transcripts and posterior probabilities | |
28 sample 100 # the number of sampling iterations | |
29 alternatives-from-sampling false # output alternative transcripts | |
30 minexonintronprob 0.08 # minimal posterior probability of all (coding) exons | |
31 minmeanexonintronprob 0.4 # minimal geometric mean of the posterior probs of introns and exons | |
32 maxtracks -1 # maximum number of reported transcripts per gene (-1: no limit) | |
33 keep_viterbi true # set to true if all Viterbi transcripts should be reported | |
34 uniqueCDS true # don't report transcripts that differ only in the UTR | |
35 UTR on # predict untranslated regions | |
36 | |
37 # | |
38 # | |
39 # The rest of the file contains mainly meta parameters used for training. | |
40 # | |
41 | |
42 # global constants | |
43 # ---------------------------- | |
44 | |
45 /Constant/trans_init_window 25 | |
46 /Constant/ass_upwindow_size 32 | |
47 /Constant/ass_start 1 | |
48 /Constant/ass_end 4 | |
49 /Constant/dss_start 3 | |
50 /Constant/dss_end 4 | |
51 /Constant/init_coding_len 9 | |
52 /Constant/intterm_coding_len 0 | |
53 /Constant/tss_upwindow_size 45 | |
54 /Constant/decomp_num_at 1 | |
55 /Constant/decomp_num_gc 1 | |
56 /Constant/gc_range_min 0.32 # This range has an effect only when decomp_num_steps>1. | |
57 /Constant/gc_range_max 0.50 # States the minimal and maximal percentage of c or g | |
58 /Constant/decomp_num_steps 1 # I recommend keeping this to 1 for most species. | |
59 /Constant/min_coding_len 201 # no gene with a coding sequence shorter than this is predicted | |
60 /Constant/probNinCoding 0.23 # divide this by .25 to get a malus for making one masked letter part of the coding sequence | |
61 /Constant/amberprob 0.34 # Prob(stop codon = tag), if 0 tag is assumed to code for amino acid | |
62 /Constant/ochreprob 0.41 # Prob(stop codon = taa), if 0 taa is assumed to code for amino acid | |
63 /Constant/opalprob 0.25 # Prob(stop codon = tga), if 0 tga is assumed to code for amino acid | |
64 /Constant/subopt_transcript_threshold 0.7 | |
65 /Constant/almost_identical_maxdiff 10 | |
66 | |
67 # type of weighing, one of 1 = equalWeights, 2 = gcContentClasses, 3 = multiNormalKernel | |
68 /BaseCount/weighingType 3 | |
69 # file with the weight matrix (only for multiNormalKernel type weighing) | |
70 /BaseCount/weightMatrixFile fly_weightmatrix.txt # change this to your species if at all necessary | |
71 | |
72 # Properties for IGenicModel | |
73 # ---------------------------- | |
74 /IGenicModel/verbosity 0 | |
75 /IGenicModel/infile fly_igenic_probs.pbl # change this and the other five filenames *_probs.pbl below to your species | |
76 /IGenicModel/outfile fly_igenic_probs.pbl | |
77 /IGenicModel/patpseudocount 5.0 | |
78 /IGenicModel/k 4 # order of the Markov chain for content model, keep equal to /ExonModel/k | |
79 | |
80 # Properties for ExonModel | |
81 # ---------------------------- | |
82 /ExonModel/verbosity 3 | |
83 /ExonModel/infile fly_exon_probs.pbl | |
84 /ExonModel/outfile fly_exon_probs.pbl | |
85 /ExonModel/patpseudocount 5.0 | |
86 /ExonModel/minPatSum 350 | |
87 /ExonModel/k 4 # order of the Markov chain for content model | |
88 /ExonModel/etorder 2 | |
89 /ExonModel/etpseudocount 3 | |
90 /ExonModel/exonlengthD 3000 # beyond this the distribution is geometric | |
91 /ExonModel/maxexonlength 15000 | |
92 /ExonModel/slope_of_bandwidth 0.3 | |
93 /ExonModel/minwindowcount 8 | |
94 /ExonModel/tis_motif_memory 3 | |
95 /ExonModel/tis_motif_radius 2 | |
96 | |
97 # Properties for IntronModel | |
98 # ---------------------------- | |
99 /IntronModel/verbosity 0 | |
100 /IntronModel/infile fly_intron_probs.pbl | |
101 /IntronModel/outfile fly_intron_probs.pbl | |
102 /IntronModel/patpseudocount 5.0 | |
103 /IntronModel/k 4 # order of the Markov chain for content model, keep equal to /ExonModel/k | |
104 /IntronModel/slope_of_bandwidth 0.4 | |
105 /IntronModel/minwindowcount 3 | |
106 /IntronModel/asspseudocount 0.01 | |
107 /IntronModel/dsspseudocount 0.01015 | |
108 /IntronModel/dssneighborfactor 0.001 | |
109 #/IntronModel/splicefile fly_splicefile.txt # this optional file contains additional windows around splice sites for training, uncomment if you have one | |
110 /IntronModel/sf_with_motif false # if true the splice file is also used to train the branch point region | |
111 /IntronModel/d 929 # constraint: this must be larger than 4 + /Constant/dss_end + /Constant/ass_upwindow_size + /Constant/ass_start | |
112 /IntronModel/ass_motif_memory 1 | |
113 /IntronModel/ass_motif_radius 4 | |
114 | |
115 # Properties for UtrModel | |
116 # ---------------------------- | |
117 /UtrModel/verbosity 3 | |
118 /UtrModel/infile fly_utr_probs.pbl | |
119 /UtrModel/outfile fly_utr_probs.pbl | |
120 /UtrModel/k 4 | |
121 /UtrModel/utr5patternweight 0.3 #0.7625 | |
122 /UtrModel/utr3patternweight 0.3 #0.5 | |
123 /UtrModel/patpseudocount 1 | |
124 /UtrModel/tssup_k 1 | |
125 /UtrModel/tssup_patpseudocount 1 | |
126 /UtrModel/slope_of_bandwidth 0.25 | |
127 /UtrModel/minwindowcount 1 | |
128 /UtrModel/exonlengthD 800 | |
129 /UtrModel/maxexonlength 1200 | |
130 /UtrModel/max3singlelength 2000 # excludes roughly 1% | |
131 /UtrModel/max3termlength 1200 # excludes ~ 0.3% | |
132 /UtrModel/tss_start 8 | |
133 /UtrModel/tss_end 5 | |
134 /UtrModel/tata_start 2 | |
135 /UtrModel/tata_end 10 | |
136 /UtrModel/tata_pseudocount 2 | |
137 /UtrModel/d_tss_tata_min 26 # minimal distance between start of tata box (if existent) and tss | |
138 /UtrModel/d_tss_tata_max 37 # maximal distance between start of tata box (if existent) and tss | |
139 /UtrModel/polyasig_consensus aataaa # polyadenylation signal training not fully automated yet | |
140 /UtrModel/d_polyasig_cleavage 14 # the transcription end is predicted this many bases after the polyadenylation signal | |
141 /UtrModel/d_polya_cleavage_min 9 | |
142 /UtrModel/d_polya_cleavage_max 35 | |
143 /UtrModel/prob_polya 0.95 | |
144 /UtrModel/tts_motif_memory 1 |