view MolD_parameter_file.txt @ 0:4e8e2f836d0f draft default tip

planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
author itaxotools
date Sun, 29 Jan 2023 16:25:48 +0000
parents
children
line wrap: on
line source

#############SET INPUT AND OUTPUT FILES (INCLUDING COMPLETE PATH)
INPUT_FILE=D:\MOLD3\Pontohedyle_COI.fas
OUTPUT_FILE=D:\MOLD3\Pontohedyle_COI.txt

# Code gaps as characters: 'yes' or 'no'
Gaps_as_chars=no

#############SET TAXON PARAMETERS (NO DEFAULTS, no parameters entered will lead to an error).
# Set list of the focus taxa (enter a comma separated list)
# to combine several taxa in the list and diagnosed pooled records as a single taxon, put '+' between the pooled taxa names
# to diagnose all taxa, enter 'ALL'
# to diagnose all taxa with more than N sequences available (where N is a natural number), enter '>N'
# to diagnose only taxa, with certain pattern in the taxon name, enter 'P:pattern'
# to pool all records with with certain pattern in the taxon name together, and diagnose as single taxon, enter 'P+:pattern'
# to obtain sets of diagnostic sites for taxa pairs, enter:
#   allVSall - for all possible pairwise comparisons in the dataset
#   taxon1VSall - for pairwise comparisons of taxon1 with all other taxa in the dataset
#   taxon1VStaxon2 - for pairwise comparison of taxa taxon 1 and taxon2

#qTAXA=all, allVSall
qTAXA= neridae, wiggi, verrucosa, neridae+wiggi+verrucosa, neridaeVSwiggiVSverrucosa
#qTAXA= neridaeVSwiggiVSverrucosa

# Set taxon rank: if species - 1, if above species - 2.

Taxon_rank=1

#############SET ADVANCED PARAMETERS FOR pDNC RECOVERY.
# If you don't want to set them, don't enter anything after '='
# Set number of the informative positions to be considered, integer (default 100)
# By entering '>0' all informative positions will be selected.

Cutoff=>1

# Set how many ambiguously called nucleotides are allowed (default 25).

NumberN=100
# Set number recursions of MolD (default 10000).

Number_of_iterations=10000

# Set maximum length for the raw pDNCs (defailt 12).

MaxLen1=12

# Set maximum length for the refined pDNCs (default 7).

MaxLen2=5

# Set a sequence to be used as a reference for site indexing (1st position will correspond to the beginning of this sequence.
# Default - indexing starts from the beginning of the alignment. So, alignment is expected to be trimmed on the left to match the beginning of the reference sequence (if the reference is provided).
# Leave as is, or enter sequence <identifier>,in (if seq will be used for diagnoses calculations), or <identifier>,ex if seq is to be used for indexin only.

Iref=NO

############ SET PARAMETERS OF ARTIFICIAL DATASETS (only sDNSs).
# Set percent difference between original and modified sequence (default 1 for species-level taxa, 3 for for supraspecific taxa).

Pdiff=1

# Set max number of sequences per taxon to modify (default 10).

NMaxSeq=10

# Set rDNC scoring threshold (default stringent).
# 100 artificial datasets are created to score the sDNC. If the sDNC remains diagnostic in  requested (defined by value of threshold),
# or higher number of artificial datasets in two consequtive runs, then sDNC is output. The threshold values are like:
# lousy: 66
# moderate: 75
# stringent: 90
# very_stringent: 95

Scoring=moderate