Mercurial > repos > okorol > itsx

diff ITSx @ 0:f82c70f54bd7 draft
Uploaded
author: okorol
date: Tue, 24 Mar 2015 12:02:48 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ITSx	Tue Mar 24 12:02:48 2015 -0400
@@ -0,0 +1,2695 @@
+#!/usr/bin/perl
+# ITSx ITS Extractor
+$app_title     = "ITSx -- Identifies ITS sequences and extracts the ITS region";
+$app_author    = "Johan Bengtsson-Palme et al., University of Gothenburg";
+$app_version   = "1.0.11";
+$app_message   = "";
+# ----------------------------------------------------------------- #
+
+# License information
+$license =
+"    ITSx - ITS Extractor -- Identifies ITS sequences and extracts the ITS region\
+    Copyright (C) 2012-2014 Johan Bengtsson-Palme et al.\
+\
+    This program is free software: you can redistribute it and/or modify\
+    it under the terms of the GNU General Public License as published by\
+    the Free Software Foundation, either version 3 of the License, or\
+    (at your option) any later version.\
+\
+    This program is distributed in the hope that it will be useful,\
+    but WITHOUT ANY WARRANTY; without even the implied warranty of\
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\
+    GNU General Public License for more details.\
+\
+    You should have received a copy of the GNU General Public License\
+    along with this program, in a file called 'license.txt'\
+    If not, see: http://www.gnu.org/licenses/.\
+";
+
+## BUGS:
+$bugs = "New features in this version ($app_version):\
+- None\
+\
+Fixed bugs in this version ($app_version):\
+- Fixed a bug causing newline characters to be occasionally skipped in the 'its1.full_and_partial.fasta' FASTA output file when the '--anchor' option was used\
+\
+Known bugs in this version ($app_version):\
+- None\
+";
+
+## OPTIONS:
+$options = "\
+-i {file} : DNA FASTA input file to investigate\
+-o {file} : Base for the names of output file(s)\
+-p {directory} : A path to a directory of HMM-profile collections representing ITS conserved regions, default is in the same directory as ITSx itself\
+--date {T or F} : Adds a date and time stamp to the output directory, off (F) by default\
+--reset {T or F} : Re-creates the HMM-database before ITSx is run, off (F) by default\
+
+Sequence selection options:\
+-t {character code} : Profile set to use for the search, see the User's Guide (comma-separated), default is all\
+-E {value} : Domain E-value cutoff for a sequence to be included in the output, default = 1e-5\
+-S {value} : Domain score cutoff for a sequence to be included in the output, default = 0\
+-N {value} : The minimal number of domains that must match a sequence before it is included, default = 2\
+--selection_priority {sum, domains, eval, score} : Selects what will be of highest priority when determining the origin of the sequence, default is sum\
+--search_eval {value} : The E-value cutoff used in the HMMER search, high numbers may slow down the process, cannot be used with the --search_score option, default is 0.01\
+--search_score {value} : The score cutoff used in the HMMER search, low numbers may slow down the process, cannot be used with the --search_eval option, default is to used E-value cutoff, not score\
+--allow_single_domain {e-value,score or F} : Allow inclusion of sequences that only find a single domain, given that they meet the given E-value and score thresholds, on with parameters 1e-9,0 by default\
+--allow_reorder {T or F} : Allows profiles to be in the wrong order on extracted sequences, off (F) by default\
+--complement {T or F} : Checks both DNA strands against the database, creating reverse complements, on (T) by default\
+--cpu {value} : the number of CPU threads to use, default is 1\
+--multi_thread {T or F} : Multi-thread the HMMER-search, on (T) if number of CPUs (--cpu option > 1), else off (F) by default\
+--heuristics {T or F} : Selects whether to use HMMER's heuristic filtering, off (F) by default\
+
+Output options:\
+--summary {T or F} : Summary of results output, on (T) by default\
+--graphical {T or F} : 'Graphical' output, on (T) by default\
+--fasta {T or F} : FASTA-format output of extracted ITS sequences, on (T) by default\
+--preserve {T or F} : Preserve sequence headers in input file instead of printing out ITSx headers, off (F) by default\
+--save_regions {SSU,ITS1,5.8S,ITS2,LSU,all,none} : A comma separated list of regions to output separate FASTA files for, 'ITS1,ITS2' by default\
+--anchor {integer or HMM} : Saves an additional number of bases before and after each extracted region. If set to 'HMM' all bases matching the corresponding HMM will be output, default = 0\
+--only_full {T or F} : If true, output is limited to full-length regions, off (F) by default\
+--partial {integer} : Saves additional FASTA-files for full and partial ITS sequences longer than the specified cutoff, default = 0 (off)\
+--concat {T or F} : Saves a FASTA-file with concatenated ITS sequences (with 5.8S removed), off (F) by default\
+--minlen {integer} : Minimum length the ITS regions must be to be outputted in the concatenated file (see above), default = 0\
+--positions {T or F} : Table format output containing the positions ITS sequences were found in, on (T) by default\
+--table {T or F} : Table format output of sequences containing probable ITS sequences, off (F) by default\
+--not_found {T or F} : Saves a list of non-found entries, on (T) by default\
+--detailed_results {T or F} : Saves a tab-separated list of all results, off (F) by default\
+--truncate {T or F} : Truncates the FASTA output to only contain the actual ITS sequences found, on (T) by default\
+--silent {T or F} : Supresses printing progress info to stderr, off (F) by default\
+--graph_scale {value} : Sets the scale of the graph output, if value is zero, a percentage view is shown, default = 0\
+--save_raw {T or F} : Saves all raw data for searches etc. instead of removing it on finish, off (F) by default\
+
+-h : displays this help message\
+--help : displays this help message\
+--bugs : displays the bug fixes and known bugs in this version of ITSx\
+--license : displays licensing information\
+";
+
+
+## Print title message
+print STDERR "$app_title\nby $app_author\nVersion: $app_version\n$app_message";
+print STDERR "-----------------------------------------------------------------\n";
+
+## Setup default variable values
+use List::Util qw(first max maxstr min minstr reduce shuffle sum);
+
+$bindir = $0;
+$bindir =~ s/_x//;
+$input = "";
+$output = "ITSx_out";
+$hmmscan = "";
+$profileDB = "$bindir\_db/HMMs";
+$type = "all";
+$E = 1e-5;
+$S = 0;
+$N = 2;
+$priority = "sum";
+$search_eval = 0.01;
+$search_score = "";
+$allow_single_E = 1e-9;
+$allow_single_score = 0;
+#$allow_single_E = -1;   # Turns off single-domain matching by E-value
+#$allow_single_score = 0;   # Turns off single-domain matching by score
+$allow_reorder = 0;
+$complement = 1;
+$cpu = 1;
+$multi_thread = "unset";
+$heuristics = 0;
+$out_sum = 1;
+$out_graph = 1;
+$out_fasta = 1;
+$out_preserve = 0;
+$out_ssu = 0;
+$out_its1 = 1;
+$out_its2 = 1;
+$out_58S = 0;
+$out_lsu = 0;
+$out_pos = 1;
+$out_table = 0;
+$out_not = 1;
+$out_date = 0;
+$out_joined = 0;
+$out_results = 0;
+$out_partial = 0;
+$out_concat = 0;
+$concat_minlen = 0;
+$truncate = 1;
+$anchor = 0;
+$only_full = 0;
+$graph_scale = 0;
+$debug = 0;
+$reset = 0;
+
+## Read command-line options
+for ($i = 0; $i <= scalar(@ARGV); $i++) {   # Goes through the list of arguments
+  $arg = @ARGV[$i];   # Stores the current argument in $arg
+
+  if ($arg eq "-i") {   # Read input files from -i flag
+    $i++;
+    $input = @ARGV[$i];
+  }
+  if ($arg eq "-o") {   # Read output files from -o flag
+    $i++;
+    $output = @ARGV[$i];
+  }
+  if ($arg eq "-p") {   # Read profile database from -p flag
+    $i++;
+    $profileDB = @ARGV[$i];
+  }
+  if ($arg eq "--hmmscan") {   # Read pre-computed hmmscan output file from --hmmscan flag ('undocumented' feature)
+    $i++;
+    $hmmscan = @ARGV[$i];
+  }
+  if ($arg eq "--date") {   # Determine whether or not to add a date stamp based on the --date flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Ff0]/) {   # Check if argument begins with "F", "f", or "0"
+      $out_date = 0;
+    } else {
+      $out_date = 1;
+    }
+  }
+  if ($arg eq "--reset") {   # Reset HMM database?
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Ff0]/) {   # Check if argument begins with "F", "f", or "0"
+      $reset = 0;
+    } else {
+      $reset = 1;
+    }
+  }
+
+  if ($arg eq "-t") {   # Select what types of ITSs to look for using the -t flag
+    $i++;
+    $type = @ARGV[$i];
+  }
+  if ($arg eq "-E") {   # Set the E-value cutoff using the -E flag
+    $i++;
+    $E = @ARGV[$i];
+  }
+  if ($arg eq "-S") {   # Set the score cutoff using the -S flag
+    $i++;
+    $S = @ARGV[$i];
+  }
+  if ($arg eq "-N") {   # Set the number of found domains cutoff using the -N flag
+    $i++;
+    $N = @ARGV[$i];
+  }
+  if ($arg eq "--selection_priority") {   # Set how to order the ITS types using the --selection_priority flag
+    $i++;
+    $priority = @ARGV[$i];
+  }
+  if ($arg eq "--search_eval") {   # Set the E-value cutoff for the HMMER search using the --search_eval flag
+    $i++;
+    $search_eval = @ARGV[$i];
+    $search_score = "";   #  Turns off score cutoff
+  }
+  if ($arg eq "--search_score") {   # Set the score cutoff for the HMMER search using the --search_score flag
+    $i++;
+    $search_score = @ARGV[$i];
+    $search_eval = "";   #  Turns off E-value cutoff
+  }
+  if ($arg eq "--allow_single_domain") {   # Determine whether or not to allow single domain matches based on the --allow_single_domain flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Ff0]/) {   # Check if argument begins with "F", "f", or "0"
+      $allow_single_E = -1;   # Turns off single-domain matching by E-value
+      $allow_single_score = 0;   # Turns off single-domain matching by score
+    } else {
+      ($allow_single_E,$allow_single_score) = split(',',@ARGV[$i]);   # Turns on single-domain matching, assigning the first given value as the E-value cutoff, and the second as score cutoff
+    }
+  }
+  if ($arg eq "--allow_reorder") {   # Determine whether or not to allow the domains to be in the wrong order based on the --allow_reorder flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $allow_reorder = 1;
+    } else {
+      $allow_reorder = 0;
+    }
+  }
+  if ($arg eq "--complement") {   # Determine whether or not to scan the complementary strand of the input file based on the --complement flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $complement = 1;
+    } else {
+      $complement = 0;
+    }
+  }
+  if ($arg eq "--cpu") {   # Set the number of CPUs to use based on the --cpu flag
+    $i++;
+    $cpu = @ARGV[$i];
+  }
+  if ($arg eq "--multi_thread") {   # Determine whether or not to multi-thread the HMMER step based on the --multi_thread flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $multi_thread = 1;
+    } else {
+      $multi_thread = 0;
+    }
+  }
+  if ($arg eq "--heuristics") {   # Determine whether or not to use HMMER's heuristic filtering based on the --heuristics flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $heuristics = 1;
+    } else {
+      $heuristics = 0;
+    }
+  }
+
+  if ($arg eq "--summary") {   # Determine whether or not to output a summary based on the --summary flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $out_sum = 1;
+    } else {
+      $out_sum = 0;
+    }
+  }
+  if ($arg eq "--graphical") {   # Determine whether or not to output a graphical representation of matches based on the --graphical flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $out_graph = 1;
+    } else {
+      $out_graph = 0;
+    }
+  }
+  if ($arg eq "--detailed_results") {   # Determine whether or not to output a detailed results list, based on the --detailed_results flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $out_results = 1;
+    } else {
+      $out_results = 0;
+    }
+  }
+  if ($arg eq "--partial") {   # Set the full-and-partial cutoff
+    $i++;
+    $out_partial = @ARGV[$i];
+  }
+  if ($arg eq "--anchor") {   # Set the length of the sequence "anchors"
+    $i++;
+    $anchor = @ARGV[$i];
+  }
+  if ($arg eq "--only_full") {   # Output only full-length regions
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $only_full = 1;
+    } else {
+      $only_full = 0;
+    }
+  }
+  if ($arg eq "--save_regions") {   # Determine which regions to output FASTA files for based on the --save_regions flag
+    $i++;
+    @save_regions = split(',',uc(@ARGV[$i]));
+    $out_ssu = 0;
+    $out_its1 = 0;
+    $out_its2 = 0;
+    $out_58S = 0;
+    $out_lsu = 0;
+    foreach $save_region (@save_regions) {
+      if ($save_region eq "SSU") {
+	$out_ssu = 1;
+      }
+      if ($save_region eq "ITS1") {
+	$out_its1 = 1;
+      }
+      if ($save_region eq "5.8S") {
+	$out_58S = 1;
+      }
+      if ($save_region eq "ITS2") {
+	$out_its2 = 1;
+      }
+      if ($save_region eq "LSU") {
+	$out_lsu = 1;
+      }
+      if ($save_region eq "ALL") {
+	$out_ssu = 1;
+	$out_its1 = 1;
+	$out_its2 = 1;
+	$out_58S = 1;
+	$out_lsu = 1;
+      }
+      if ($save_region eq "NONE") {
+	$out_ssu = 0;
+	$out_its1 = 0;
+	$out_its2 = 0;
+	$out_58S = 0;
+	$out_lsu = 0;
+      }
+    }
+  }
+  if ($arg eq "--positions") {   # Determine whether or not to output a positions file based on the --positions flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $out_pos = 1;
+    } else {
+      $out_pos = 0;
+    }
+  }
+  if ($arg eq "--concat") {   # Determine whether or not to output a concatednated ITS1 + ITS2 file
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $out_concat = 1;
+    } else {
+      $out_concat = 0;
+    }
+  }
+  if ($arg eq "--minlen") {   # Set the min length of the combined ITS1 and ITS2 sequences for concatenation
+    $i++;
+    $concat_minlen = @ARGV[$i];
+  }
+  if ($arg eq "--fasta") {   # Determine whether or not to output FASTA-files based on the --fasta flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $out_fasta = 1;
+    } else {
+      $out_fasta = 0;
+    }
+  }
+  if ($arg eq "--preserve") {   # Determine whether or not to preserve FASTA-headers based on the --preserve flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $out_preserve = 1;
+    } else {
+      $out_preserve = 0;
+    }
+  }
+  if ($arg eq "--joined") {   # Determine whether or not to output a FASTA-file containing ALL sorts of output sequences (for debugging)
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $out_joined = 1;
+    } else {
+      $out_joined = 0;
+    }
+  }
+  if ($arg eq "--table") {   # Determine whether or not to output tables of all potential matches based on the --table flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $out_table = 1;
+    } else {
+      $out_table = 0;
+    }
+  }
+  if ($arg eq "--not_found") {   # Determine whether or not to output a list of sequences that are not ITSs based on the --not_found flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $out_not = 1;
+    } else {
+      $out_not = 0;
+    }
+  }
+  if ($arg eq "--silent") {   # Determine whether or not to output anything to the screen based on the --silent flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $silent = 1;
+    } else {
+      $silent = 0;
+    }
+  }
+  if ($arg eq "--graph_scale") {   # Set the scale of the graphical output based on the --graph_scale flag
+    $i++;
+    $graph_scale = @ARGV[$i];
+  }
+  if ($arg eq "--save_raw") {   # Determine whether or not to save all the raw intermediate data based on the --save_raw flag
+    $i++;
+    if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) {   # Check if argument begins with "T", "t", or "1"
+      $save_raw = 1;
+    } else {
+      $save_raw = 0;
+    }
+  }
+
+  ## If "-h" or "--help" are among the options, output usage data and options
+  if (($arg eq "-h") || ($arg eq "--help")) {
+    print "Usage: ITSx -i <input file> -o <output file>\nOptions:$options";
+    print "-----------------------------------------------------------------\n";
+    exit;   # Exit ITSx
+  }
+
+  ## If "--bugs" is among the options, output bugs and features information
+  if ($arg eq "--bugs") {
+    print "$bugs\n";
+    exit;   # Exit ITSx
+  }
+
+  ## If "--license" is among the options, output license information
+  if ($arg eq "--license") {
+    print "$license\n";
+    exit;   # Exit ITSx
+  }
+
+  if ($arg eq "--debug") {   # Run ITSx in debug mode
+    $debug = 1;
+  }
+  if ($arg eq "--pipeline") {   # Run ITSx in pipeline mode
+    $pipeline = 1;
+  }
+}
+
+## Setup some variables dependent on input
+
+if ($multi_thread eq "unset") {   # If the multi-thread option is not set
+  if ($cpu > 1) {   # Then if the number of CPUs used > 1, then multi-thread HMMER searches
+    $multi_thread = 1;
+  } else {   # Else, run HMMER searches sequentially on one CPU
+    $multi_thread = 0;
+  }
+}
+
+if ($hmmscan ne "") {   # If a pre-computed hmmscan output is supplied
+  $output = $hmmscan;   # Then set the base of the output directory name to be the same as that hmmscan output file
+}
+
+## Check for binaries
+
+chomp($path = `which hmmpress`);   # Get the path for hmmpress
+if ($path eq "") {   # If the path is empty, then show an error message and exit ITSx
+  print STDERR "FATAL ERROR :: Could not locate HMMER binaries! It seems that hmmpress is not installed properly.\
+Consult the manual for installation instructions. Note that HMMER3 is required. Previous HMMER-versions will not work.\
+This error is fatal, and ITSx will now abort.\n";
+  print STDERR "-----------------------------------------------------------------\n";
+  exit;
+}
+
+chomp($path = `which hmmscan`);   # Get the path for hmmscan
+if ($path eq "") {   # If the path is empty, then show an error message and exit ITSx
+  print STDERR "FATAL ERROR :: Could not locate HMMER binaries! It seems that hmmscan is not installed properly.\
+Consult the manual for installation instructions. Note that HMMER3 is required. Previous HMMER-versions will not work.\
+This error is fatal, and ITSx will now abort.\n";
+  print STDERR "-----------------------------------------------------------------\n";
+  exit;
+}
+
+
+## Check for database
+chomp($errormsg = `ls $profileDB* 2>&1 1>/dev/null`);   # Get the error msg when looking for the profile database
+if (substr($errormsg,0,4) eq "ls: ") {   # If the error message begins with "ls: ", then show an error message and exit ITSx
+  print STDERR "FATAL ERROR :: The specified profile database could not be found.\
+Consult the manual for installation instructions.\
+This error is fatal, and ITSx will now abort.\n";
+  print STDERR "-----------------------------------------------------------------\n";
+  exit;
+}
+
+if ($pipeline == 0) {   # If ITSx is not run in pipeline mode (i.e. from ITSx)
+  if ($out_date == 1) {   # If a date and time stamp should be supplied
+    ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);   # Get the date and time
+    $year = $year + 1900;   # Format the year
+    $mon = $mon + 1;   # Format the month
+    if ($mon < 10) {   # Add a zero to the month, if needed
+      $mon = "0" . $mon;
+    }
+    if ($mday < 10) {   # Add a zero to the day, if needed
+      $mday = "0" . $mday;
+    }
+    if ($hour < 10) {   # Add a zero to the hour, if needed
+      $hour = "0" . $hour;
+    }
+    if ($min < 10) {   # Add a zero to the minute, if needed
+      $min = "0" . $min;
+    }
+    $outputDate = ".$year\-$mon\-$mday\_$hour.$min";   # Create a date and time stamp
+    $outputDate =~ s./.-.g;   # Remove any potential slashes in the output name (as this will confuse ITSx's file naming)
+    $output = $output . $outputDate;   # Add the date and time stamp top the output base name
+  }
+}
+
+$tempDir = "ITSx_temp_directory__$output";   # Setup a temporary directory variable
+$tempDir =~ s./.-.g;   # Remove any potential slashes in the output name (as this will confuse ITSx's file naming)
+
+if ($pipeline == 0) {   # If not running in pipeline mode
+  ## Create a summary file
+  if ($out_sum == 1) {   # If summary output is on
+    $now = localtime;   # Get the current time
+    open (SUMMARY, ">$output.summary.txt");   # Create the summary file
+    print SUMMARY "ITSx run started at $now.\n";   # Output the starting time for the analysis
+    print SUMMARY "-----------------------------------------------------------------\n";
+    close (SUMMARY);   # Close summary file
+  }
+}
+
+## Create a temporary directory for ITSx
+if ($pipeline == 0) {   # If ITSx is not run in pipeline mode (i.e. from ITSx)
+  `mkdir $tempDir 2> /dev/null`;   # Create a temporary directory
+}
+
+
+## Prepare profile database
+## Get the current time and output info message
+$now = localtime;
+if ($silent == 0) {
+  print STDERR "$now : Preparing HMM database (should be quick)...\n";
+}
+
+## Setup profile index
+%profileIndex = {};
+
+# A = Alveolata
+# B = Bryophyta
+# C = Bacillariophyta
+# D = Amoebozoa
+# E = Euglenozoa
+# F = Fungi
+# G = Chlorophyta (green algae)
+# H = Rhodophyta (red algae)
+# I = Phaeophyceae (brown algae)
+# L = Marchantiophyta (liverworts)
+# M = Metazoa
+# N = Microsporidia
+# O = Oomycota
+# P = Haptophyceae (prymnesiophytes)
+# Q = Raphidophyceae
+# R = Rhizaria
+# S = Synurophyceae
+# T = Tracheophyta (higher plants)
+# U = Eustigmatophyceae
+# X = Apusozoa
+# Y = Parabasalia
+
+$profileIndex{"A"} = "alveolates";
+$profileIndex{"B"} = "bryophyta";
+$profileIndex{"C"} = "bacillariophyta";
+$profileIndex{"D"} = "amoebozoa";
+$profileIndex{"E"} = "euglenozoa";
+$profileIndex{"F"} = "fungi";
+$profileIndex{"G"} = "chlorophyta";
+$profileIndex{"H"} = "rhodophyta";
+$profileIndex{"I"} = "phaeophyceae";
+$profileIndex{"J"} = "undefined";
+$profileIndex{"K"} = "undefined";
+$profileIndex{"L"} = "marchantiophyta";
+$profileIndex{"M"} = "metazoa";
+$profileIndex{"N"} = "microsporidia";
+$profileIndex{"O"} = "oomycota";
+$profileIndex{"P"} = "haptophyceae";
+$profileIndex{"Q"} = "raphidophyceae";
+$profileIndex{"R"} = "rhizaria";
+$profileIndex{"S"} = "synurophyceae";
+$profileIndex{"T"} = "tracheophyta";
+$profileIndex{"U"} = "eustigmatophyceae";
+$profileIndex{"V"} = "undefined";
+$profileIndex{"W"} = "undefined";
+$profileIndex{"X"} = "apusozoa";
+$profileIndex{"Y"} = "parabasalia";
+$profileIndex{"Z"} = "undefined";
+
+
+@profileList = split(',',uc($type));   # Get the list of profile types
+foreach $entry (@profileList) {   # Go through the entered types
+  if (($entry eq "ALL") || ($entry eq ".")) {   # If "all" among the entries
+    push(@profileSet,"A");   # Add the alveolates profiles to the investigation set
+    push(@profileSet,"B");   # Add the bryophytes profiles to the investigation set
+    push(@profileSet,"C");   # Add the bacillariophyta profiles to the investigation set
+    push(@profileSet,"D");   # Add the amoebozoa profiles to the investigation set
+    push(@profileSet,"E");   # Add the euglenozoa profiles to the investigation set
+    push(@profileSet,"F");   # Add the fungi profiles to the investigation set
+    push(@profileSet,"G");   # Add the green algae profiles to the investigation set
+    push(@profileSet,"H");   # Add the red algae profiles to the investigation set
+    push(@profileSet,"I");   # Add the brown algae profiles to the investigation set
+    push(@profileSet,"L");   # Add the liverworts profiles to the investigation set
+    push(@profileSet,"M");   # Add the metazoa profiles to the investigation set
+    #push(@profileSet,"N");   # Add the microsporidia profiles to the investigation set
+    push(@profileSet,"O");   # Add the oomycetes profiles to the investigation set
+    push(@profileSet,"P");   # Add the prymnesiophytes profiles to the investigation set
+    push(@profileSet,"Q");   # Add the raphidophytes profiles to the investigation set
+    push(@profileSet,"R");   # Add the rhizaria profiles to the investigation set
+    push(@profileSet,"S");   # Add the synurophyceae profiles to the investigation set
+    push(@profileSet,"T");   # Add the tracheophyta (higher plants) profiles to the investigation set
+    push(@profileSet,"U");   # Add the eustigmatophytes profiles to the investigation set
+    #push(@profileSet,"X");   # Add the apusozoa profiles to the investigation set
+    #push(@profileSet,"Y");   # Add the parabasalia profiles to the investigation set
+  } else {
+    if (length($entry) == 1) {  # If the name has only one character
+      if ($entry =~ m/[ABCDERFHILMNOPQRSTUXY]/) {  # If the selected set exists
+	push(@profileSet,$entry);   # Add the selected profiles to the investigation set
+      }
+    } else {
+      if ($entry =~ m/ALVEOL/) {
+	push(@profileSet,"A");   # Add the alveolates profiles to the investigation set
+      }
+      if ($entry =~ m/BRYO/) {
+	push(@profileSet,"B");   # Add the bryophytes profiles to the investigation set
+      }
+      if ($entry =~ m/MOSS/) {
+	push(@profileSet,"B");   # Add the bryophytes profiles to the investigation set
+      }
+      if ($entry =~ m/BACILL/) {
+	push(@profileSet,"C");   # Add the bacillariophyta profiles to the investigation set
+      }
+      if ($entry =~ m/DIATOM/) {
+	push(@profileSet,"C");   # Add the bacillariophyta profiles to the investigation set
+      }
+      if ($entry =~ m/AMOEB/) {
+	push(@profileSet,"D");   # Add the amoebozoa profiles to the investigation set
+      }
+      if ($entry =~ m/EUGLE/) {
+	push(@profileSet,"E");   # Add the euglenozoa profiles to the investigation set
+      }
+      if ($entry =~ m/FUNG/) {
+	push(@profileSet,"F");   # Add the fungi profiles to the investigation set
+      }
+      if ($entry =~ m/GREEN/) {
+	push(@profileSet,"G");   # Add the green algae profiles to the investigation set
+      }
+      if ($entry =~ m/CHLORO/) {
+	push(@profileSet,"G");   # Add the green algae profiles to the investigation set
+      }
+      if ($entry =~ m/RED-AL/) {
+	push(@profileSet,"H");   # Add the red algae profiles to the investigation set
+      }
+      if ($entry =~ m/RHODO/) {
+	push(@profileSet,"H");   # Add the red algae profiles to the investigation set
+      }
+      if ($entry =~ m/BROWN/) {
+	push(@profileSet,"I");   # Add the brown algae profiles to the investigation set
+      }
+      if ($entry =~ m/PHAEOP/) {
+	push(@profileSet,"I");   # Add the brown algae profiles to the investigation set
+      }
+      if ($entry =~ m/LIVER/) {
+	push(@profileSet,"L");   # Add the liverworts profiles to the investigation set
+      }
+      if ($entry =~ m/MARCH/) {
+	push(@profileSet,"L");   # Add the liverworts profiles to the investigation set
+      }
+      if ($entry =~ m/METAZ/) {
+	push(@profileSet,"M");   # Add the metazoa profiles to the investigation set
+      }
+      if ($entry =~ m/ANIMAL/) {
+	push(@profileSet,"M");   # Add the metazoa profiles to the investigation set
+      }
+      if ($entry =~ m/MICROSPOR/) {
+	push(@profileSet,"N");   # Add the microsporidia profiles to the investigation set
+      }
+      if ($entry =~ m/OOMYC/) {
+	push(@profileSet,"O");   # Add the oomycetes profiles to the investigation set
+      }
+      if ($entry =~ m/PRYMN/) {
+	push(@profileSet,"P");   # Add the prymnesiophytes profiles to the investigation set
+      }
+      if ($entry =~ m/HAPTO/) {
+	push(@profileSet,"P");   # Add the prymnesiophytes profiles to the investigation set
+      }
+      if ($entry =~ m/RAPHID/) {
+	push(@profileSet,"Q");   # Add the raphidophytes profiles to the investigation set
+      }
+      if ($entry =~ m/RHIZA/) {
+	push(@profileSet,"R");   # Add the rhizaria profiles to the investigation set
+      }
+      if ($entry =~ m/SYNUR/) {
+	push(@profileSet,"S");   # Add the synurophyceae profiles to the investigation set
+      }
+      if ($entry =~ m/TRACHE/) {
+	push(@profileSet,"T");   # Add the tracheophyta profiles to the investigation set
+      }
+      if ($entry =~ m/PLANTS/) {
+	push(@profileSet,"T");   # Add the tracheophyta profiles to the investigation set
+      }
+      if ($entry =~ m/EUSTIG/) {
+	push(@profileSet,"U");   # Add the eustigmatophytes profiles to the investigation set
+      }
+      if ($entry =~ m/APUSO/) {
+	push(@profileSet,"X");   # Add the apusozoa profiles to the investigation set
+      }
+      if ($entry =~ m/PARAB/) {
+	push(@profileSet,"Y");   # Add the parabasalia profiles to the investigation set
+      }
+    }
+  }
+}
+
+foreach $set (@profileSet) {   # For each set of profiles in the the full profile set for investigation
+  $hmmPath = $profileDB . "/" . $set . ".hmm";   # Determine the path to the HMM-file
+  chomp($modelCount = `grep -c "//" $hmmPath`);   # Count the number of models in the HMM-file
+  push(@modelCount,$modelCount);   # Add the number of models in this HMM-file to the list of model counts
+  if ($reset == 1) {
+    `rm -f $hmmPath.h3* 2> /dev/null`;   # Delete old HMM-files
+    `hmmpress $hmmPath 2> /dev/null`;   # Prepare the HMM-file for searching
+    ## Redirecting stderr is a quick and dirty solution to get rid of the messages... Could be made more elegant
+  }
+}
+
+## Clean-up input files and create complementary strand if needed
+## Get the current time and output an info message
+$now = localtime;
+if ($silent == 0) {
+  print STDERR "$now : Checking and handling input sequence data (should not take long)...\n";
+}
+
+## Open the summary file for writing
+if ($out_sum == 1) {   # If summary output is on
+  open (SUMMARY, ">>$output.summary.txt");   # Append to the summary file
+}
+
+if ($input ne "") {   # If an input file is given
+  ## Read from file
+  open (SEQUENCES, $input);   # Open the input file for reading
+  open (MAIN, ">$tempDir/main.fasta");   # Create a temporary file for storing the cleaned sequences representing the main strand
+  open (COMPLEMENT, ">$tempDir/complement.fasta");   # Create a temporary file for storing the cleaned sequences representing the complementary strand
+  $inputSequenceCount = 0;   # Reset input sequence counter
+  while ($sequence = <SEQUENCES>) {   # Repeat for every line in the input file
+    chomp($sequence);   # Truncate any potential line feeds
+    $sequence =~ tr/\r\n//d;  # Remove all carriage return and new line characters
+    if (substr($sequence,0,1) eq ">") {   # If a new FASTA entry is found in the input file
+      $inputSequenceCount++;   # Add one to the input sequence counter
+      print MAIN $mainSeq . "\n";   # Write the previous main DNA sequence to the main sequence file
+      print MAIN $sequence . " main\n";   # Write the definition line of the new sequence to the main sequence file
+      $sequenceDB{"$sequenceID"} = $mainSeq;  # Add sequence to sequence database
+      $headers{"$sequenceID"} = $header;  # Add the header to the header database
+      push(@sequenceOrder,$sequenceID);  # Add this sequence ID to the ordered list of sequences
+      ($sequenceID) = split(" ",substr($sequence,1));
+      $header = $sequence;  # Save the sequence header
+      $mainSeq = "";   # Empty the main sequence entry
+      if ($complement == 1) {   # If the complementary file should be written
+	$complementSeq = reverse($complementSeq);   # Reverse the complementary DNA sequence
+	print COMPLEMENT $complementSeq . "\n";   # Write the previous complementary DNA sequence to the complementary sequence file
+	print COMPLEMENT $sequence . " complement\n";   # Write the definition line of the new sequence to the complement sequence file
+	$complementSeq = "";   # Empty the complementary sequence entry
+      }
+    } else {    # If this line is just a continuation of the current DNA sequence
+      $mseq = $sequence;   # Store this part of the DNA sequence in the intermediate varaible $mseq
+      $mseq =~ s/[ .-]//g;   # Remove any gap characters present in this sequence part (good if input was an alignment)
+      $mseq =~ tr/[a-z]/[A-Z]/;   # Make all letters uppercase
+      $mseq =~ s/[^A-Z]//g;  # Remove all non-alphabetic characters
+      $mseq =~ tr/U/T/;   # Exchanges U:s for T:s (Uracil to Thymine, good if input was RNA sequence)
+      $mainSeq = $mainSeq . $mseq;   # Add the intermediate DNA sequence to the end of the main DNA sequence entry
+      
+      if ($complement == 1) {   # If the complementary file should be written
+	$cseq = $sequence;   # Store this part of the DNA sequence in the intermediate varaible $cseq
+	$cseq =~ s/[ .-]//g;   # Remove any gap characters present in this sequence part (good if input was an alignment)
+	$cseq =~ tr/[a-z]/[A-Z]/;   # Make all letters uppercase
+	$cseq =~ s/[^A-Z]//g;  # Remove all non-alphabetic characters
+	$cseq =~ tr/ACGTURYSWKMBDHVN/TGCAAYRWSMKVHDBN/;   # Replace all characters with its complementary base
+	$complementSeq = $complementSeq . $cseq;   # Add the intermediate DNA sequence to the end of the complementary DNA sequence entry
+      }
+    }
+  }
+  ## When the input file's end is reached
+  print MAIN $mainSeq . "\n";   # Write the last main DNA sequence to the main sequence file
+  if ($complement == 1) {   # If the complementary file should be written
+    $complementSeq = reverse($complementSeq);   # Reverse the complementary DNA sequence
+    print COMPLEMENT $complementSeq . "\n";   # Write the last complementary DNA sequence to the complementary sequence file
+  }
+  $sequenceDB{"$sequenceID"} = $mainSeq;  # Add sequence to sequence database
+  $headers{"$sequenceID"} = $header;  # Add the header to the header database
+  push(@sequenceOrder,$sequenceID);  # Add this sequence ID to the ordered list of sequences
+  $mainSeq = "";   # Empty the main sequence entry
+  $complementSeq = "";   # Empty the complementary sequence entry
+  close (SEQUENCES);   # Close the sequence input file
+  close (COMPLEMENT);   # Close the complementary output file
+  close (MAIN);   # Close the main output file
+} else {   # If no input file is supplied, then read from stdin instead
+  $input = "$tempDir/main.fasta";   # Set up a temporary input file path
+  open (MAIN, ">$tempDir/main.fasta");   # Create a temporary file for storing the cleaned sequences representing the main strand
+  open (COMPLEMENT, ">$tempDir/complement.fasta");   # Create a temporary file for storing the cleaned sequences representing the complementary strand
+  $inputSequenceCount = 0;   # Reset input sequence counter
+  while ($sequence = <STDIN>) {   # Repeat for every line in the standard input
+    chomp($sequence);   # Truncate any potential line feeds
+    if (substr($sequence,0,1) eq ">") {   # If a new FASTA entry is found in the input
+      $inputSequenceCount++;   # Add one to the input sequence counter
+      print MAIN $mainSeq . "\n";   # Write the previous main DNA sequence to the main sequence file
+      print MAIN $sequence . " main\n";   # Write the definition line of the new sequence to the main sequence file
+      $sequenceDB{"$sequenceID"} = $mainSeq;  # Add sequence to sequence database
+      $headers{"$sequenceID"} = $header;  # Add the header to the header database
+      push(@sequenceOrder,$sequenceID);  # Add this sequence ID to the ordered list of sequences
+      $sequenceID = split(" ",substr($sequence,1));
+      $header = $sequence;  # Save the sequence header
+      $mainSeq = "";   # Empty the main sequence entry
+      if ($complement == 1) {   # If the complementary file should be written
+	$complementSeq = reverse($complementSeq);   # Reverse the complementary DNA sequence
+	print COMPLEMENT $complementSeq . "\n";   # Write the previous complementary DNA sequence to the complementary sequence file
+	print COMPLEMENT $sequence . " complement\n";   # Write the definition line of the new sequence to the complement sequence file
+	$complementSeq = "";   # Empty the complementary sequence entry
+      }
+    } else {    # If this line is just a continuation of the current DNA sequence
+      $mseq = $sequence;   # Store this part of the DNA sequence in the intermediate varaible $mseq
+      $mseq =~ s/[ .-]//g;   # Remove any gap characters present in this sequence part (good if input was an alignment)
+      $mseq =~ tr/[a-z]/[A-Z]/;   # Make all letters uppercase
+      $mseq =~ s/[^A-Z]//g;  # Remove all non-alphabetic characters
+      $mseq =~ tr/U/T/;   # Exchanges U:s for T:s (Uracil to Thymine, good if input was RNA sequence)
+      $mainSeq = $mainSeq . $mseq;   # Add the intermediate DNA sequence to the end of the main DNA sequence entry
+      
+      if ($complement == 1) {   # If the complementary file should be written
+	$cseq = $sequence;   # Store this part of the DNA sequence in the intermediate varaible $cseq
+	$cseq =~ s/[ .-]//g;   # Remove any gap characters present in this sequence part (good if input was an alignment)
+	$cseq =~ tr/[a-z]/[A-Z]/;   # Make all letters uppercase
+	$cseq =~ s/[^A-Z]//g;  # Remove all non-alphabetic characters
+	$cseq =~ tr/ACGTURYSWKMBDHVN/TGCAAYRWSMKVHDBN/;   # Replace all characters with its complementary base
+	$complementSeq = $complementSeq . $cseq;   # Add the intermediate DNA sequence to the end of the complementary DNA sequence entry
+      }
+    }
+  }
+  ## When the input file's end is reached
+  print MAIN $mainSeq . "\n";   # Write the last main DNA sequence to the main sequence file
+  if ($complement == 1) {   # If the complementary file should be written
+    $complementSeq = reverse($complementSeq);   # Reverse the complementary DNA sequence
+    print COMPLEMENT $complementSeq . "\n";   # Write the last complementary DNA sequence to the complementary sequence file
+  }
+  $sequenceDB{"$sequenceID"} = $mainSeq;  # Add sequence to sequence database
+  $headers{"$sequenceID"} = $header;  # Add the header to the header database
+  push(@sequenceOrder,$sequenceID);  # Add this sequence ID to the ordered list of sequences
+  $mainSeq = "";   # Empty the main sequence entry
+  $complementSeq = "";   # Empty the complementary sequence entry
+  close (COMPLEMENT);   # Close the complementary output file
+  close (MAIN);   # Close the main output file
+}
+
+if ($out_sum == 1) {   # If summary output should be written
+  print SUMMARY "Number of sequences in input file:       \t$inputSequenceCount\n";   # Write info on the number of input sequences to the summary file
+}
+
+
+## Perform HMM-scan
+if ($hmmscan eq "") {   # If a pre-computed hmmscan output file is not supplied
+  if ($heuristics == 0) {  # If HMMER's heuristic filtering should not be used
+    $heurMax = "--max";  # Set the heurMax to "--max" (indicating that HMMER should turn off filtering)
+  } else {
+    $heurMax = "";  # Set the heurMax to empty (indicating that HMMER should turn on filtering)
+  }
+  if ($multi_thread == 0) {   # If multi-threading is off
+    ## Get the current time and output info message
+    $now = localtime;
+    if ($silent == 0) {
+      print STDERR "$now : Comparing sequences to HMM database (this may take a long while)...\n";
+    }
+    foreach $set (@profileSet) {   # Go sequentially through all profile sets to search for
+      $hmmPath = $profileDB . "/" . $set . ".hmm";   # Set the path to the HMM-file of the current set
+      if ($search_eval ne "") {   # If E-value cutoff is use for the search
+	hmmerSearch("hmmscan --cpu $cpu $heurMax -E $search_eval $hmmPath $tempDir/main.fasta","$tempDir/main.$set.hmmscan","M",$set);   # Call HMMER with E-value cutoff
+      } else {   # If score cutoff is use for the search
+	hmmerSearch("hmmscan --cpu $cpu $heurMax -T $search_score $hmmPath $tempDir/main.fasta","$tempDir/main.$set.hmmscan","M",$set);   # Call HMMER with score cutoff
+      }
+      if ($complement == 1) {   # If the complementary file should be scanned
+	if ($search_eval ne "") {   # If E-value cutoff is use for the search
+	  hmmerSearch("hmmscan --cpu $cpu $heurMax -E $search_eval $hmmPath $tempDir/complement.fasta","$tempDir/complement.$set.hmmscan","C",$set);   # Call HMMER with E-value cutoff
+	} else {   # If score cutoff is use for the search
+	  hmmerSearch("hmmscan --cpu $cpu $heurMax -T $search_score $hmmPath $tempDir/complement.fasta","$tempDir/complement.$set.hmmscan","C",$set);   # Call HMMER with score cutoff
+	}
+      }
+    }
+  } else {   # If multi-threading is on
+    ## Get the current time and output info message
+    $now = localtime;
+    if ($silent == 0) {
+      print STDERR "$now : Doing paralellised comparison to HMM database (this may take a long while)...\n";
+    }
+
+    ## Determining number of cpus per thread
+    if ($complement == 1) {   # If the complementary file should be scanned
+      $hmmcpu = int(0.5 * $cpu / scalar(@profileSet));   # Assign X CPUs to each thread, X = 0.5 * (TOTAL_CPUs_USED) / (TOTAL_NUMBER_OF_PROFILE_SETS)
+    } else {   # If the complementary file should not be scanned
+      $hmmcpu = int($cpu / scalar(@profileSet));   # Assign X CPUs to each thread, X = (TOTAL_CPUs_USED) / (TOTAL_NUMBER_OF_PROFILE_SETS)
+    }
+    if ($hmmcpu < 1) {   # If the number of CPUs per thread is smaller than 1
+      $hmmcpu = 1;   # Give each thread at least one CPU to work on
+    }
+    ## Main strand searches...
+    $cpuCount = 0;
+    foreach $set (@profileSet) {   # Go through each profile set to investigate
+      if ($cpuCount < $cpu) {
+	$cpuCount++;
+	$pid = fork();   # Fork off a copy of this process for this set
+      } else {
+	$deceasedPID = wait();   # Wait until a PID is finished, and gather its number
+	$pid = fork();   # Fork off a copy of this process for this set
+      }
+      if ($pid != 0) {   # If this is the parent process
+	push(@pids,$pid);   # Add the new process ID to the list of active process IDs
+      } else {   # If this is the new child process
+	$hmmPath = $profileDB . "/" . $set . ".hmm";   # Set the path to the HMM-file of the current set
+	if ($search_eval ne "") {   # If E-value cutoff is use for the search
+	  hmmerSearch("hmmscan --cpu $hmmcpu $heurMax -E $search_eval $hmmPath $tempDir/main.fasta","$tempDir/main.$set.hmmscan","M",$set);   # Call HMMER with E-value cutoff
+	} else {   # If score cutoff is use for the search
+	  hmmerSearch("hmmscan --cpu $hmmcpu $heurMax -T $search_score $hmmPath $tempDir/main.fasta","$tempDir/main.$set.hmmscan","M",$set);   # Call HMMER with score cutoff
+	}
+	## Stop child process...
+	exit;   # Exits the child process
+      }
+    }
+    ## Revese strand searches...
+    if ($complement == 1) {   # If the complementary file should be scanned
+      foreach $set (@profileSet) {   # Go through each profile set to investigate
+	if ($cpuCount < $cpu) {
+	  $cpuCount++;
+	  $pid = fork();   # Fork off a copy of this process for this set
+	} else {
+	  $deceasedPID = wait();   # Wait until a PID is finished, and gather its number
+	  $pid = fork();   # Fork off a copy of this process for this set
+	}
+	if ($pid != 0) {   # If this is the parent process
+	  push(@pids,$pid);   # Add the new process ID to the list of active process IDs
+	} else {   # If this is the new child process
+	  $hmmPath = $profileDB . "/" . $set . ".hmm";   # Set the path to the HMM-file of the current set
+	  if ($search_eval ne "") {   # If E-value cutoff is use for the search
+	    hmmerSearch("hmmscan --cpu $hmmcpu $heurMax -E $search_eval $hmmPath $tempDir/complement.fasta","$tempDir/complement.$set.hmmscan","C",$set);   # Call HMMER with E-value cutoff
+	  } else {   # If score cutoff is use for the search
+	    hmmerSearch("hmmscan --cpu $hmmcpu $heurMax -T $search_score $hmmPath $tempDir/complement.fasta","$tempDir/complement.$set.hmmscan","C",$set);   # Call HMMER with score cutoff
+	  }
+	  ## Stop child process...
+	  exit;   # Exits the child process
+	}
+      }
+    }
+    ## Get the current time and output the active process IDs
+    $now = localtime;
+    #print STDERR "    $now : Active PIDs: ";
+    #foreach $p (@pids) {   # Go through the list of PIDs
+    #  print STDERR "$p ";   # Print the PID
+    #}
+    #print STDERR "\n";   # Print a new line
+    do {   # Loop until all child PIDs have finished.
+      $deceasedPID = wait();   # Wait until a PID is finished, and gather its number
+      $now = localtime;   # Get the current time
+      if ($deceasedPID > -1) {   # If the PID that finished wasn't the last active one
+#	print STDERR "    $now : PID $deceasedPID finished.\n";   # Print finished PID
+      } else {   # If PID that finished was the last
+	print STDERR "    $now : All processes finished.\n";   # Print that all PIDs have finished
+      }
+    } until (wait() == -1);   # Do this loop until all PIDs have finished
+    $now = localtime;   # Get current time
+    print STDERR "$now : Parallel HMM-scan finished.\n";   # Print informative finishing message
+  }
+} else {    # If a pre-computed hmmscan file is supplied then
+## Get the current time and output that the hmmscan step is skipped
+  $now = localtime;
+  if ($silent == 0) {
+    print STDERR "$now : Skipping hmmscan! Using $hmmscan as input for the analysis instead.\n";
+  }
+}
+
+## Analyse HMM-scan output
+## Get the current time and output info
+$now = localtime;
+if ($silent == 0) {
+  print STDERR "$now : Analysing results of HMM-scan (this might take quite some time)...\n";
+}
+
+## Set up output files
+if ($out_table == 1) {   # If table output is on
+  open (TABLE, ">$output.hmmer.table");   # Create a table output file
+}
+if ($out_graph == 1) {   # If graphical output is on
+  open (GRAPH, ">$output.graph");   # Create a graph output file
+}
+if ($out_not == 1) {   # If not-found output is on
+  open (NOTFOUND, ">$tempDir/$output\_hmmer_no_detections.txt");   # Create a HMMER not-found output file
+}
+
+$setI = 0;   # Set the profile set indicator to zero
+foreach $set (@profileSet) {   # Go through all the profile sets to be investigated
+  for ($co = 0; $co <= 1; $co++) {   # Do main (and complementary) strand analysis in order
+    if ($co > 0) {   # If main strand analysis is finished
+      if ($complement == 1) {   # If complementary strand should be analysed
+	open (HMMOUTPUT, "$tempDir/complement.$set.hmmscan");   # Open hmmscan output for reading
+	open (SEQUENCES, "$tempDir/complement.fasta");   # Open complementary sequence file for reading
+	if ($out_table == 1) {   # If table output is on, write a header for this set
+	  print TABLE "***********************************************************\n";
+	  print TABLE "$set matches on complementary strand:\n";
+	}
+	if ($out_graph == 1) {   # If graphical output is on, write a header for this set
+	  print GRAPH "***********************************************************\n";
+	  print GRAPH "$set matches on complementary strand:\n";
+	}
+      } else {   # If complementary strand should not be analysed
+	last;   # Exit this loop
+      }
+    }
+    if ($co == 0) {   # If main strand analysis is not finished
+      open (HMMOUTPUT, "$tempDir/main.$set.hmmscan");   # Open hmmscan output for reading
+      open (SEQUENCES, "$tempDir/main.fasta");   # Open main sequence file for reading
+      if ($out_table == 1) {   # If table output is on, write a header for this set
+	print TABLE "***********************************************************\n";
+	print TABLE "$set matches on main strand:\n";
+      }
+      if ($out_graph == 1) {   # If graphical output is on, write a header for this set
+	print GRAPH "***********************************************************\n";
+	print GRAPH "$set matches on main strand:\n";
+      }
+    }
+  ## Read and analyse hmmscan output file
+  while ($line = <HMMOUTPUT>) {   # Read in the hmmscan output file, line by line
+    chomp($line);   # Remove any potential line feeds
+    if (substr($line,0,13) eq "## New query:") {   # If this line begin with "## New query:", then this is a new entry
+      undef %hits;   # Empty the hits hash
+      undef %evals;   # Empty the e-value hash
+      undef %scores;   # Empty the score hash
+      $querytemp = substr($line,14);   # Extract everything from this line, except for the start ("## New query:")
+      ($query,$length) = split('\t',$querytemp);  # Split query name, length and DNA sequence
+      if ($co == 0) {   # If main strand analysis
+	$DNA = $sequenceDB{"$query"};
+      } else {  # If complementary strand
+	$cseq = $sequenceDB{"$query"};   # Store the DNA sequence in the intermediate varaible $cseq
+	$cseq =~ tr/[a-z]/[A-Z]/;   # Make all letters uppercase
+	$cseq =~ s/[^A-Z]//g;  # Remove all non-alphabetic characters
+	$cseq =~ tr/ACGTURYSWKMBDHVN/TGCAAYRWSMKVHDBN/;   # Replace all characters with its complementary base
+	$DNA = reverse($cseq);
+      }
+    } else {
+      
+      if ($line ne "//") {
+	## Find domain annotations...
+	($query,$matchProfile,$length,$domNo,$sign,$score,$bias,$cE,$iE,$hmmFrom,$hmmTo,$hmmends,$queryFrom,$queryTo,$queryends,$envFrom,$envTo,$envends,$acc) = split('\t',$line);   # Split the line into a collection of stat variables
+
+	$useQueryFrom = $queryFrom;
+	if ($hmmFrom > 1) {  # If the HMM-profile is not matched from the beginning
+	  $hmmDiff = $hmmFrom - 1;
+	  if ($useQueryFrom > $hmmDiff) {
+	    $useQueryFrom = $queryFrom - $hmmDiff;
+	  } else {
+	    $useQueryFrom = 1;
+	  }
+	}
+
+	if (uc($anchor) eq "HMM") {
+	  $anchorLen = $hmmTo - $hmmFrom + 1;
+	} else {
+	  $anchorLen = $anchor;
+	}
+	  
+
+	$query_profile_match = $query . ":" . $matchProfile;
+	$profileExists = 0;   # Assume that the newly found match profile ($matchProfile) is not already found for this sequence
+	if (exists($hits{$query_profile_match})) {   # If a profile from the list is the same as the match profile
+	  ($hitFrom,$hitTo,$hitProfile,$hitScore,$hitE) = split('\t',$hits{$query_profile_match});   # Split the entry in list into stat variables
+	  if ($iE < $hitE) {   # If the new match profile has a smaller E-value than the one from the list
+	    $hits{$query_profile_match} = "$useQueryFrom\t$envTo\t$matchProfile\t$score\t$iE\t$anchorLen";   # Replace the data in the hit list with the data for the newly found match profile
+	    $evals{$query_profile_match} = $iE;   # Replace the E-value in the hit list with the E-value for the newly found match profile
+	    $scores{$query_profile_match} = $score;   # Replace the score in the hit list with the score for the newly found match profile
+	  }
+	  $profileExists = 1;   # Indicate that this match profile was found in the hit list
+	}
+	if ($profileExists == 0) {   # If the match profile was not found in the hit list
+	  if (($iE <= $E) && ($score >= $S)) {  # If this hits lives up to the minimal score and E-value cutoffs
+	    $hits{$query_profile_match} = "$useQueryFrom\t$envTo\t$matchProfile\t$score\t$iE\t$anchorLen";   # Add the data for the newly found match profile
+	    $evals{$query_profile_match} = $iE;   # Add the E-value for the newly found match profile
+	    $scores{$query_profile_match} = $score;   # Add the score for the newly found match profile
+	  }
+	}
+      } else {   # If the line only contains "//", the end of this sequence's hmmscan entry is reached
+      ## Save analysis results
+	@sortedKeys = sort {$hits{$a} <=> $hits{$b}} keys(%hits);   # Sort the the list of hits numerically ascending (smallest first)
+	undef @sortedHits;
+	undef @scores;
+	undef @evals;
+	foreach $key (@sortedKeys) {
+	  push(@sortedHits, $hits{$key});   # Add the hit to the list of hits numerically ascending (smallest first)
+	  push(@scores, $scores{$key});  # Add the score to the scores array
+	  push(@evals, $evals{$key});  # Add the E-value to the evals array
+	}
+
+	## If the number of hits > N, the min eval < E and the max score > S then include query sequence
+	## OR if a single domain satisfies the thresholds and this is allowed, include it!
+	if ( ((scalar(@sortedHits) >= $N) && (min(@evals) <= $E) && (max(@scores) >= $S)) ||
+	     ((scalar(@sortedHits) > 0) && ($allow_single_E >= 0) && (min(@evals) <= $allow_single_E) && (max(@scores) >= $allow_single_score)) ) {
+	  if ($debug == 1) {   # If debugging mode is on
+	    print STDERR $query . " :\t" . scalar(@sortedHits) . "\t" . min(@evals) . "\t" . max(@scores) . "\n";   # Print some top hit statistics
+	  }
+	  
+	  ## Save some total stats to be able to determine origin of ITS sequence
+	  if (scalar(@evals) > 0) {   # If there are any E-values stored
+	    $averageE = sum(@evals) / scalar(@evals);   # Calculate the average E-value for this profile set
+	    $averageScore = sum(@scores) / scalar(@scores);   # Calculate the average score for this profile set
+	    $numberOfDomains = scalar(@sortedHits);   # Calculate the number of domains matched on this sequence
+	    
+	    #$scoreSum = sum(@scores) / @modelCount[$setI];   # Calculate score sum as: sum / (no. of profiles of this given type)
+	    $scoreSum = sum(@scores) / 4;   # Calculate score sum as: sum / (no. of profiles of this given type)
+	    
+	    $saveThis = "$query\t$set\t$co\t$numberOfDomains\t$averageE\t$averageScore\t$scoreSum\t$DNA\t";   # Collect the variables to save for this sequence and this profile set
+	    foreach $hit (@sortedHits) {   # Go through the list of hits and add specific information to save from each hit
+	      ($hitFrom,$hitTo,$hitProfile,$hitScore,$hitE,$hitanchorlen) = split('\t',$hit);   # Extract information from this hit
+	      $saveThis = $saveThis . "$hitFrom;$hitTo;$hitProfile;$hitScore;$hitE;$hitanchorlen\t";   # Add information to the list of variables to save
+	    }
+	    #push(@allHits, $saveThis);   # Add this information to the collection of all hits for this sequence, across all profile sets
+	    if (exists($allHits{$query})) {
+	      $allHits{$query} = $allHits{$query} . "\n" . $saveThis;
+	    } else {
+	      $allHits{$query} = $saveThis;
+	    }
+	  }
+	  
+	  if ($out_table == 1) {   # If table output is on
+	    print TABLE $query . "\t" . $length . "\t";   # Print query and length information to table
+	    foreach $hit (@sortedHits) {   # Go through each hit in the hit list
+	      ($hitFrom,$hitTo,$hitProfile,$hitScore,$hitE,$hitanchorlen) = split('\t',$hit);   # Extract data corresponding to this hit
+	      print TABLE "$hitFrom - $hitTo: $hitProfile ($hitScore, $hitE)\t";   # Print hit information to table
+	    }
+	    print TABLE "\n";   # Print new line
+	  }
+	  
+	  if ($out_graph == 1) {   # If graphical output is on
+	    print GRAPH ">> " . $query . "\t" . $length . " bp\n";   # Print a sequence header
+	    $insertPoint = 0;   # Set the domain insert point to beginning of line
+	    $hi = 0;   # Set hit number to zero
+	    foreach $hit (@sortedHits) {   # Go through the hit list
+	      ($hitFrom,$hitTo,$hitProfile,$hitScore,$hitE,$anchorLen) = split('\t',$hit);   # Split the hit into stat variables
+	      if ($graph_scale == 0) {   # If the graph scale is scaled individually to 100% for each sequence
+		$pFrom = $hitFrom / $length * 100;   # Set the profile start on graph relative to its position in the sequence
+		$pTo = $hitTo / $length * 100;   # Set the profile end on graph relative to its position in the sequence
+		$pEnd = 100;   # Set the end of the sequence graph to be at 100
+	      } else {   # If the scale is the same for all sequences
+		$pFrom = $hitFrom * $graph_scale;   # Set the profile start on graph scaled to the parameter given
+		$pTo = $hitTo * $graph_scale;   # Set the profile end on graph scaled to the parameter given
+		$pEnd = $length * $graph_scale;   # Set the end of the sequence graph to be at the end of the sequence scaled to the parameter given
+	      }
+	      for ($insertPoint = $insertPoint; $insertPoint <= $pFrom; $insertPoint++) {   # Go forward through the sequence, moving the insert point one step at a time until the beginning of the next profile is reached
+		print GRAPH "-";   # Print a "-"
+	      }
+	      print GRAPH substr($hitProfile,2,3);   # When the profile is reached, print its name
+	      $insertPoint = $insertPoint + 3;   # Move the insert point three steps forward, to account for the inserted name
+	      ($nextHitStart,$nextHitEnd,$nextProfile) = split('\t',@sortedHits[$hi + 1]);   # Check where the next hit in the list is located
+	      if (($nextHitStart <= $hitTo) && ($nextHitStart > 0)) {   # If the next hit in the list overlaps with this profile
+		if ($graph_scale == 0) {   # If the scale is relative
+		  $pTo = $nextHitStart / $length * 100 - 1;   # Change the profile end on the graph to be where this next profile starts
+		} else {   # If the scale is the same for all sequences
+		  $pTo = $nextHitStart * $graph_scale - 1;   # Change the profile end on the graph to be where this next profile starts
+		}
+	      }
+	      for ($insertPoint = $insertPoint; $insertPoint <= $pTo; $insertPoint++) {   # Go forward through the sequence, moving the insert point one step at a time until the end of the current profile is reached
+		print GRAPH "=";   # Print a "="
+	      }
+	      if (($nextHitStart <= $hitTo) && ($nextHitStart > 0)) {   # If the next hit in the list overlaps with this profile
+		print GRAPH ">";   # Print a ">" to indicate the profile overlap
+		$insertPoint++;   # Move the insert point one additional step forward to account for the ">" inserted
+	      }
+	      $hi++;   # Increase the hit number by one
+	    }
+	    for ($insertPoint = $insertPoint; $insertPoint <= $pEnd; $insertPoint++) {   # If there is no more profile matches to sequence, go forward through the sequence, moving the insert point one step at a time until the end of the sequence is reached
+	      print GRAPH "-";   # Print a "-"
+	    }
+	    print GRAPH "\n";   # Print a new line, indicating the end of this sequence entry
+	  }
+	} else {   # If this sequence didn't find any good-enough profile matches
+	  if ($out_not == 1) {   # If not-found output is on
+	    print NOTFOUND $query . "\n";   # Print the name of this query to the not-found list
+	  }
+	}
+      }
+    }
+  }
+  close (SEQUENCES);   # Close the input sequence file
+  close (HMMOUTPUT);   # Close the hmmscan output file
+  }
+  $setI++;   # Add one to the profile set indicator
+}
+
+## Close output files
+if ($out_table == 1) {   # If table output is on, close the table file
+  close (TABLE);
+}
+if ($out_graph == 1) {   # If graphical output is on, close the graph file
+  close (GRAPH);
+}
+if ($out_not == 1) {   # If not-found output is on, close the not-found file
+  close (NOTFOUND);
+  #$profileCount = scalar(@profileSet);   # Count the number of profile sets
+  #if ($complement == 1) {   # If complementary strand was scanned
+  #  $profileCount = $profileCount * 2;   # Double the number of profile sets that was investigated (and thus the number of not-founds that could at max be found)
+  #}
+  #`sort $tempDir/hmmer_no_detections.txt | uniq -c | grep "  *$profileCount " | sed "s/  *$profileCount //" > $output\_no_detections.txt`;   # Sort the not-found list, count the number of profile sets having no matches for each query. Save those that have only non-matches to the hmmer-not-found file
+}
+
+
+## Create total collected output and FASTA output
+
+if ($out_results == 1) {
+  open (RESULTS, ">$output.extraction.results");   # Create a results file
+}
+open (RAWOUT, ">$tempDir/ITSx_output.raw");   # Create a raw output file for ALL data
+open (PROBLEM, ">$output.problematic.txt");   # Create a file for problematic entries
+$foundProblem = 0;
+if ($out_pos == 1) {
+  open (POS, ">$output.positions.txt");   # Create a positions file
+}
+if ($out_fasta == 1) {   # If FASTA output should be written
+  open (FASTA, ">$output.full.fasta");   # Create a FASTA output file for found sequences
+  if ($allow_reorder == 0) {   # If reordering of domains is not allowed
+    open (CHIMERA, ">$output.chimeric.fasta");   # Create a FASTA file for potential chimera sequences with profile matches in the wrong order
+    $foundChimera = 0;
+  }
+  if ($out_partial > 0) {
+    open (FULLPARTIAL, ">$output.full_and_partial.fasta");
+  }
+}
+
+if ($out_joined == 1) {   # If SSU FASTA output should be written
+  open (JOINED, ">$output.joined.fasta");
+}
+if ($out_ssu == 1) {   # If SSU FASTA output should be written
+  open (SSU, ">$output.SSU.fasta");
+}
+if ($out_lsu == 1) {   # If LSU FASTA output should be written
+  open (LSU, ">$output.LSU.fasta");
+}
+if ($out_58S == 1) {   # If 5.8S FASTA output should be written
+  open (MID, ">$output.5_8S.fasta");
+}
+if ($out_its1 == 1) {   # If ITS1 FASTA output should be written
+  open (ITS1, ">$output.ITS1.fasta");
+  if ($out_partial > 0) {
+    open (ITS1PARTIAL, ">$output.ITS1.full_and_partial.fasta");
+  }
+}
+if ($out_its2 == 1) {   # If ITS2 FASTA output should be written
+  open (ITS2, ">$output.ITS2.fasta");
+  if ($out_partial > 0) {
+    open (ITS2PARTIAL, ">$output.ITS2.full_and_partial.fasta");
+  }
+}
+if ($out_concat == 1) {  # If concatenated output should be written
+  open (CONCAT, ">$output.concat.fasta");
+}
+
+undef @sortedHits;   # Empty the array of sorted hits
+
+# @sortedHits = sort @allHits;   # Sort the full list of hits in alphabetical order (to be able to analyse all sequences with same ID at once)
+if ($out_not == 1) {   # If not-found output is on
+  open (NOTFOUND, ">$output\_no_detections.txt");   # Create a not-found output file
+  $noDetect = 0;
+}
+foreach $sequenceID (@sequenceOrder) {   # Sort the full list of hits in their original order (to be able to analyse all sequences with same ID at once)
+  if ($sequenceID ne "") {
+    $countsInList = 0;
+    if (exists($allHits{$sequenceID})) {
+      @allHits = split('\n',$allHits{$sequenceID});
+    } else {
+      undef @allHits;
+    }
+    foreach $line (@allHits) {
+      @item = split('\t',$line);   # Split the line into an array
+      if (@item[0] eq $sequenceID) {  # If this item corresponds to the current sequence ID
+	push(@sortedHits,$line);  # Add it to the sorted list of hits
+	$countsInList++;
+      }
+    }
+    if ($countsInList == 0) {  # If no matches were found
+      if ($out_not == 1) {   # If not-found output is on
+	print NOTFOUND "$sequenceID\n";  # Output the sequence ID
+	$noDetect++;
+      }
+    }
+  }
+}
+if ($out_not == 1) {   # If not-found output is on
+  close (NOTFOUND);
+  if ($noDetect == 0) {
+    `rm $output\_no_detections.txt 2> /dev/null`;
+  }
+}
+
+## Set all counts for different ITS types to zero
+undef @itsCounts;
+$itsChimeric = 0;
+$itsMain = 0;
+$itsCompl = 0;
+
+push(@sortedHits,"--END--");   # Add a last item to the sorted list, so that all items are securely saved
+$lc = 1;   # Set the line count to one
+foreach $line (@sortedHits) {   # Go through the list of found hits
+  print RAWOUT "$line\n";   # Write the raw data associated with this hit to the raw data output file
+  @item = split('\t',$line);   # Split the line into an array
+  ## If this sequence ID is the same as the saved ones, then add it, else empty the array sequence ID and save
+  if ((@seqID[0] ne @item[0]) && (@item[0] ne "") || ($lc > scalar(@sortedHits))) {   # If this sequence ID is not the same as the last one and is non-empty, or if the end of the list has been reached
+    ## Save profile-type which is most likely...
+    if ($priority eq "sum") {   # If the sum-of-scores algorithm should be used to determine the most likely profile-type
+      ## Reset variables to unrealisticly high or low values
+      $best = 0;
+      $bestCount = 0;
+      $bestEval = 1000;
+      $bestScore = -1000;
+      $bestSum = -1000;
+      
+      for ($i = 0; $i < scalar(@seqScoreSum); $i++) {   # Go through all sum-of-scores entries
+	if (@seqScoreSum[$i] > $bestSum) {   # If the current value is larger than the previous top value
+	  $best = $i;   # Set the best value to be the current value
+	  ## Set all other best variables to those corresponding to the current value
+	  $bestCount = @seqDomCounts[$i];
+	  $bestEval = @seqAvgE[$i];
+	  $bestScore = @seqAvgScore[$i];
+	  $bestSum = @seqScoreSum[$i];
+	}
+	if (@seqScoreSum[$i] == $bestSum) {   # If the current value is equal to the previous top value
+	  if (@seqDomCounts[$i] > $bestCount) {   # If the current domain count is larger than the previous top domain count
+	    $best = $i;   # Set the best value to be the current value
+	    ## Set all other best variables to those corresponding to the current value
+	    $bestCount = @seqDomCounts[$i];
+	    $bestEval = @seqAvgE[$i];
+	    $bestScore = @seqAvgScore[$i];
+	    $bestSum = @seqScoreSum[$i];
+	  }
+	  if (@seqDomCounts[$i] == $bestCount) {   # If the current domain count is equal to the previous top domain count
+	    if (@seqAvgE[$i] < $bestEval) {   # If the current E-value is smaller than the previous top E-value
+	      $best = $i;   # Set the best value to be the current value
+	      ## Set all other best variables to those corresponding to the current value
+	      $bestCount = @seqDomCounts[$i];
+	      $bestEval = @seqAvgE[$i];
+	      $bestScore = @seqAvgScore[$i];
+	      $bestSum = @seqScoreSum[$i];
+	    }
+	  }
+	}
+      }
+    }
+    if ($priority eq "domains") {   # If the number of found domains should be used to determine the most likely profile-type
+      ## Reset variables to unrealisticly high or low values
+      $best = 0;
+      $bestCount = 0;
+      $bestEval = 1000;
+      $bestScore = -1000;
+
+      for ($i = 0; $i < scalar(@seqDomCounts); $i++) {   # Go through all domain count entries
+	if (@seqDomCounts[$i] > $bestCount) {   # If the current domain count is larger than the previous top domain count
+	  $best = $i;   # Set the best value to be the current value
+	  ## Set all other best variables to those corresponding to the current value
+	  $bestCount = @seqDomCounts[$i];
+	  $bestEval = @seqAvgE[$i];
+	  $bestScore = @seqAvgScore[$i];
+	}
+	if (@seqDomCounts[$i] == $bestCount) {   # If the current domain count is equal to the previous top domain count
+	  if (@seqAvgE[$i] < $bestEval) {   # If the current E-value is smaller than the previous top E-value
+	    $best = $i;   # Set the best value to be the current value
+	    ## Set all other best variables to those corresponding to the current value
+	    $bestCount = @seqDomCounts[$i];
+	    $bestEval = @seqAvgE[$i];
+	    $bestScore = @seqAvgScore[$i];
+	  }
+	  if (@seqAvgE[$i] == $bestEval) {   # If the current E-value is equal to the previous top E-value
+	    if (@seqAvgScore[$i] > $bestScore) {   # If the current average score is larger than the previous top average score
+	      $best = $i;   # Set the best value to be the current value
+	      ## Set all other best variables to those corresponding to the current value
+	      $bestCount = @seqDomCounts[$i];
+	      $bestEval = @seqAvgE[$i];
+	      $bestScore = @seqAvgScore[$i];
+	    }
+	  }
+	}
+      }
+    }
+    if ($priority eq "eval") {   # If the average E-value should be used to determine the most likely profile-type
+      ## Reset variables to unrealisticly high or low values
+      $best = 0;
+      $bestCount = 0;
+      $bestEval = 1000;
+      $bestScore = -1000;
+
+      for ($i = 0; $i < scalar(@seqDomCounts); $i++) {   # Go through all domain counts entries
+	if (@seqAvgE[$i] < $bestEval) {   # If the current E-value is smaller than the previous top E-value
+	  $best = $i;   # Set the best value to be the current value
+	  ## Set all other best variables to those corresponding to the current value
+	  $bestCount = @seqDomCounts[$i];
+	  $bestEval = @seqAvgE[$i];
+	  $bestScore = @seqAvgScore[$i];
+	}
+	if (@seqAvgE[$i] == $bestEval) {   # If the current E-value is equal to the previous top E-value
+	  if (@seqAvgScore[$i] > $bestScore) {   # If the current average score is larger than the previous top average score
+	    $best = $i;   # Set the best value to be the current value
+	    ## Set all other best variables to those corresponding to the current value
+	    $bestCount = @seqDomCounts[$i];
+	    $bestEval = @seqAvgE[$i];
+	    $bestScore = @seqAvgScore[$i];
+	  }
+	  if (@seqAvgScore[$i] == $bestScore) {   # If the current average score is equal to the previous top average score
+	    if (@seqDomCounts[$i] > $bestCount) {   # If the current number of domains is larger than the previous top number of domains
+	      $best = $i;   # Set the best value to be the current value
+	      ## Set all other best variables to those corresponding to the current value
+	      $bestCount = @seqDomCounts[$i];
+	      $bestEval = @seqAvgE[$i];
+	      $bestScore = @seqAvgScore[$i];
+	    }
+	  }
+	}
+      }
+    }
+    if ($priority eq "score") {   # If the average score should be used to determine the most likely profile-type
+      ## Reset variables to unrealisticly high or low values
+      $best = 0;
+      $bestCount = 0;
+      $bestEval = 1000;
+      $bestScore = -1000;
+
+      for ($i = 0; $i < scalar(@seqDomCounts); $i++) {   # Go through all domain counts entries
+	if (@seqAvgScore[$i] > $bestScore) {   # If the current average score is larger than the previous top average score
+	  $best = $i;   # Set the best value to be the current value
+	  ## Set all other best variables to those corresponding to the current value
+	  $bestCount = @seqDomCounts[$i];
+	  $bestEval = @seqAvgE[$i];
+	  $bestScore = @seqAvgScore[$i];
+	}
+	if (@seqAvgScore[$i] == $bestScore) {   # If the current average score is equal to the previous top average score
+	  if (@seqAvgE[$i] < $bestEval) {   # If the current E-value is smaller than the previous top E-value
+	    $best = $i;   # Set the best value to be the current value
+	    ## Set all other best variables to those corresponding to the current value
+	    $bestCount = @seqDomCounts[$i];
+	    $bestEval = @seqAvgE[$i];
+	    $bestScore = @seqAvgScore[$i];
+	  }
+	  if (@seqAvgE[$i] == $bestEval) {   # If the current E-value is equal to the previous top E-value
+	    if (@seqDomCounts[$i] > $bestCount) {   # If the current number of domains is larger than the previous top number of domains
+	      $best = $i;   # Set the best value to be the current value
+	      ## Set all other best variables to those corresponding to the current value
+	      $bestCount = @seqDomCounts[$i];
+	      $bestEval = @seqAvgE[$i];
+	      $bestScore = @seqAvgScore[$i];
+	    }
+	  }
+	}
+      }
+    }
+    
+    if (@seqID[$best] ne "") {   # If the sequence ID of the most likely profile is not empty
+
+      $allanchorLens = @anchorLens[$best];  # Get the best anchor lengths
+      @allanchorLens = split(',', $allanchorLens);  # Split the anchor lens into an array
+
+      $chimeric = 0;   # Assume the sequence is not chimeric
+      if ($allow_reorder == 0) {   # If re-order of domain is not allowed
+	$domain_order = @allSeqDomains[$best];   # Gather the order the domains are found in
+	@domain_order = split(' ',$domain_order);   # Split the list into an array
+	@sorted_domain_order = sort {$a cmp $b} @domain_order;   # Sort the array alphabetically
+	for ($di = 0; $di <= scalar(@domain_order); $di++) {   # Go through the sorted array
+	  if ((@domain_order[$di] ne @sorted_domain_order[$di]) || (@problemCode[$best] =~ m/C/)) {    # Check if the order of the arrays differ at any poiny
+	    $chimeric = 1;   # If they do differ, mark the sequence as chimeric
+	  }
+	}
+      }
+
+      $seqDNALength = length(@seqDNA[$best]);   # Get the length of the DNA sequence
+
+      ## Print sequence and match data...
+      ## Order of columns in the output file:
+      ## ID Length Type Main/Compl Domains Avg.Eval Avg.Score Start End Start_domain End_domain Chimeric
+      if ($out_results == 1) {
+	print RESULTS @seqID[$best] . "\t" . $seqDNALength . "\t" . @seqITSType[$best] . "\t" . @seqCompl[$best] . "\t" . @seqDomCounts[$best] . "\t" . @seqAvgE[$best] . "\t" . @seqAvgScore[$best] . "\t" . @seqScoreSum[$best] . "\t" . @dnaStart[$best] . "\t" . @dnaEnd[$best] . "\t" . @startDomain[$best] . "\t" . @endDomain[$best] . "\t";   # Print sequence and match data to the results file
+	if ($chimeric == 1) {   # If the sequence was regarded chimeric
+	  print RESULTS "Chimeric\t";   # Add a chimeric tag to the entry
+	} else {   # If not chimeric
+	  print RESULTS "\t";   # Add an empty column
+	}
+	$allDomains = @allSeqDomains[$best];   # Get the domain order of the entry
+	$allDomains =~ tr/ /,/;   # Replace spaces with commas in the domain order string
+	$allDomains = substr($allDomains,0,length($allDomains) - 1);   # Remove the last character (a comma)
+	print RESULTS $allDomains;   # Write the domain order to the results file
+	print RESULTS "\t";   # Write a tab to the results file
+      }
+
+      if ($out_pos == 1) {
+	$out_all_pos = 1;
+	if ($out_all_pos == 1) {  # Output positions of all domains
+	  $seqPartLen = @dnaEnd[$best] - @dnaStart[$best] + 1;
+	  if ($seqPartLen < 0) {
+	    $seqPartLen = $seqPartLen * -1;
+	  }
+	  ## Print the positions of all identified domains to the position file
+	  print POS @seqID[$best] . "\t" . $seqDNALength . " bp." . "\t";
+	  if (@problemCode[$best] !~ m/S/) {
+	    print POS "SSU: " . @ssuStart[$best] . "-" . @ssuEnd[$best] . "\t";
+	  } else {
+	    print POS "SSU: Not found\t";
+	  }
+	  if ((@problemCode[$best] =~ m/X/) || ((@problemCode[$best] =~ m/[15]/) && (@problemCode[$best] =~ m/S/))) {
+	    print POS "ITS1: Not found\t";
+	  } else {
+	    if (@problemCode[$best] =~ m/O/) {
+	      print POS "ITS1: " . (@ssuEnd[$best] + 1) . "-" . (@ssuEnd[$best] + 1) . "\t";
+	    } else {
+	      print POS "ITS1: " . @its1Start[$best] . "-" . @its1End[$best] . "\t";
+	    }
+	  }
+	  if (@problemCode[$best] !~ m/[125OP]/) {
+	    print POS "5.8S: " . @midStart[$best] . "-" . @midEnd[$best] . "\t";
+	  } else {
+	    if (@problemCode[$best] =~ m/5/) {
+	      print POS "5.8S: Not found\t";
+	    } else {
+	      if (@problemCode[$best] =~ m/1/) {
+		print POS "5.8S: No start\t";
+	      } else {
+		if (@problemCode[$best] =~ m/O/) {
+		  print POS "5.8S: Overlap SSU\t";
+		} else {
+		  if (@problemCode[$best] =~ m/P/) {
+		    print POS "5.8S: Overlap LSU\t";
+		  } else {
+		    print POS "5.8S: No end\t";
+		  }
+		}
+	      }
+	    }
+	  }
+	  if ((@problemCode[$best] =~ m/Y/) ||((@problemCode[$best] =~ m/[25]/) && (@problemCode[$best] =~ m/L/))) {
+	    print POS "ITS2: Not found\t";
+	  } else {
+	    if (@problemCode[$best] =~ m/O/) {
+	      print POS "ITS2: " . (@lsuStart[$best] - 1) . "-" . (@lsuStart[$best] - 1) . "\t";
+	    } else {
+	      print POS "ITS2: " . @its2Start[$best] . "-" . @its2End[$best] . "\t";
+	    }
+	  }
+	  if (@problemCode[$best] !~ m/L/) {
+	    print POS "LSU: " . @lsuStart[$best] . "-" . @lsuEnd[$best] . "\t";
+	  } else {
+	    print POS "LSU: Not found" . "\t";
+	  }
+
+	  if (@problemCode[$best] =~ m/5/) {
+	    print POS "Broken or partial sequence, no 5.8S! ";
+	  } else {
+	    if (@problemCode[$best] =~ m/[12]/) {
+	      print POS "Broken or partial sequence, only partial 5.8S! ";
+	    }
+	  }
+	  if (@problemCode[$best] =~ m/B/) {
+	    print POS "ITS region too long! ";
+	  }
+	  if (@problemCode[$best] =~ m/O/) {
+	    print POS "5.8S seem to overlap with SSU! ";
+	  }
+	  if (@problemCode[$best] =~ m/P/) {
+	    print POS "5.8S seem to overlap with LSU! ";
+	  }
+	  if (@problemCode[$best] =~ m/C/) {
+	    print POS "Chimeric! ";
+	  }
+	  print POS "\n";
+
+	} else {  # Output only ITS positions
+	  $seqPartLen = @dnaEnd[$best] - @dnaStart[$best] + 1;
+	  print POS @seqID[$best] . "\t" . $seqDNALength . " bp." . "\t" . "ITS1: " . @its1Start[$best] . "-" . @its1End[$best] . "\t" . "ITS2: " . @its2Start[$best] . "-" . @its2End[$best] . "\n";  # Print the positions of the ITS sequences to the position file
+	}
+      }
+
+      if (@problem[$best] ne "") {
+	$foundProblem++;
+	print PROBLEM @seqID[$best] . "\t" . @problem[$best] . "\n";
+      }
+      
+      ## Set extended type string (the string going into the definition line of the FASTA file)
+      $extendedType = $profileIndex{@seqITSType[$best]} . " ITS sequence";
+      @itsCounts[ord(@seqITSType[$best])]++;   # Add one ITS to the appropriate counter
+
+      if (@seqCompl[$best] == 1) {   # If domains were found on complementary strand
+	$extendedStrand = "complementary strand";   # Set the strand string to complementary
+	$itsCompl++;   # Add one to the complementary strand counter
+      } else {   # If domains were found on the main strand
+	$extendedStrand = "main strand";   # Set the strand string to main
+	$itsMain++;   # Add one to the main strand counter
+      }
+      
+      ## Print (extracted) ITS sequence...
+      if ($out_fasta == 1) {   # If FASTA-output is on
+	if ($truncate == 0) {   # If the whole sequence should be kept in output file
+	  if ($chimeric == 0) {   # If the sequence is not chimeric
+	    if (@problemCode[$best] !~ m/[SL]/) {
+	      if ($out_preserve == 1) {  # If sequence headers should be preserved
+		print FASTA $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	      } else {
+		print FASTA ">" . @seqID[$best] . "|" . @seqITSType[$best] ." " . $extendedType . " (" . $seqDNALength . " bp)  on " . $extendedStrand . "\n";   # Write FASTA definition line
+	      }
+	      print FASTA  @seqDNA[$best] . "\n";   # Write DNA sequence
+	    }
+	    if ($out_partial > 0) {
+	      if (@problemCode[$best] !~ m/[SL125]/) {
+	      if ($out_preserve == 1) {  # If sequence headers should be preserved
+		print FULLPARTIAL $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	      } else {
+		print FULLPARTIAL ">" . @seqID[$best] . "|" . @seqITSType[$best] ." " . $extendedType . " (" . $seqDNALength . " bp) Full ITS region on " . $extendedStrand . "\n";   # Write FASTA defline
+	      }
+		print FULLPARTIAL  @seqDNA[$best] . "\n";   # Write DNA sequence
+	      } else {
+		$its1PartLen = @its1End[$best] - @its1Start[$best] + 1;
+		$its2PartLen = @its2End[$best] - @its2Start[$best] + 1;
+		if (($out_partial < $its1PartLen) && ($out_partial < $its2PartLen)) {
+		  if ($out_preserve == 1) {  # If sequence headers should be preserved
+		    print FULLPARTIAL $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+		  } else {
+		    print FULLPARTIAL ">" . @seqID[$best] . "|" . @seqITSType[$best] ." " . $extendedType . " (" . $seqDNALength . " bp) Partial ITS region on " . $extendedStrand . "\n";   # Write FASTA defline
+		  }
+		  print FULLPARTIAL  @seqDNA[$best] . "\n";   # Write DNA sequence
+		}
+	      }
+	    }
+	  } else {   # If sequence is regarded chimeric
+	    $itsChimeric++;   # Add one to the chimeric counter
+	    if ($out_preserve == 1) {  # If sequence headers should be preserved
+	      print CHIMERA $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	    } else {
+	      print CHIMERA ">" . @seqID[$best] . "|" . @seqITSType[$best] ." Chimeric " . $extendedType . " (" . length(@seqDNA[$best]) . " bp)  on " . $extendedStrand . "\n";   # Write FASTA definition line
+	    }
+	    print CHIMERA  @seqDNA[$best] . "\n";   # Write DNA sequence
+	    $foundChimera++;
+	  }
+	} else {    # If only the ITS part of the sequence should be saved to output file
+	  $fastaStartPoint = @dnaStart[$best] - 1;   # Start extraction at the start of the first domain
+	  $fastaEndPoint = @dnaEnd[$best]+10;   # End extraction 10 bp after the last domain
+
+	  if (@seqDomCounts[$best] > 1) {   # If more than one domain was found
+	    if (substr(@startDomain[$best],0,5) eq "1_SSU")  {   # If the first domain was SSU
+	      $fastaStartPoint = @ssuEnd[$best];   # Set the start point of the extraction to the end of the SSU domain
+	    }
+	    if (substr(@startDomain[$best],0,5) eq "4_LSU")  {   # If the first domain was LSU
+	      $fastaStartPoint = @lsuEnd[$best];   # Set the start point of the extraction to the end of the LSU domain
+	    }
+	    if (substr(@endDomain[$best],0,5) eq "1_SSU")  {   # If the last domain was SSU
+	      $fastaEndPoint = @ssuStart[$best] - 1;   # Set the end point of the extraction to the start of the SSU domain
+	    }
+	    if (substr(@endDomain[$best],0,5) eq "4_LSU")  {   # If the last domain was LSU
+	      $fastaEndPoint = @lsuStart[$best] - 1;   # Set the end point of the extraction to the start of the LSU domain
+	    }
+	  }
+	  
+	  if ($fastaStartPoint < 0) {   # If the start point is smaller than zero, set the start point to zero
+	    $fastaStartPoint = 0;
+	  }
+	  if ($fastaEndPoint > length(@seqDNA[$best])) {   # If the end point is larger than the length of the sequence, set the end point to the sequence end
+	    $fastaEndPoint = length(@seqDNA[$best]);
+	  }
+
+	  $fastaLength = $fastaEndPoint - $fastaStartPoint + 1;
+
+	  if ($chimeric == 0) {   # If the sequence is not chimeric
+	    if (@problemCode[$best] !~ m/[SL]/) {
+	      if ($out_preserve == 1) {  # If sequence headers should be preserved
+		print FASTA $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	      } else {
+		print FASTA ">" . @seqID[$best] . "|" . @seqITSType[$best] . " " . $extendedType . " (" . $fastaLength . " bp) on " . $extendedStrand . "\n";   # Write FASTA definition line
+	      }
+	      if ($anchorLen > 0) {
+		if ($fastaStartPoint - @allanchorLens[0] > 0) {
+		  print FASTA substr(@seqDNA[$best],$fastaStartPoint - @allanchorLens[0],$fastaLength + @allanchorLens[0] + @allanchorLens[3]) . "\n";   # Write DNA sequence
+		} else {
+		  print FASTA substr(@seqDNA[$best],0,$fastaLength + $fastaStartPoint + @allanchorLens[3]) . "\n";   # Write DNA sequence
+		}
+	      } else {
+		print FASTA substr(@seqDNA[$best],$fastaStartPoint,$fastaLength) . "\n";   # Write DNA sequence
+	      }
+	    }
+	    if ($out_partial > 0) {
+	      if (@problemCode[$best] !~ m/[SL125]/) {
+		if ($out_preserve == 1) {  # If sequence headers should be preserved
+		  print FULLPARTIAL $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+		} else {
+		  print FULLPARTIAL ">" . @seqID[$best] . "|" . @seqITSType[$best] ." " . $extendedType . " (" . $seqDNALength . " bp) Full ITS region on " . $extendedStrand . "\n";   # Write FASTA defline
+		}
+		print FULLPARTIAL substr(@seqDNA[$best],$fastaStartPoint,$fastaLength) . "\n";   # Write DNA sequence
+	      } else {
+		$its1PartLen = @its1End[$best] - @its1Start[$best] + 1;
+		$its2PartLen = @its2End[$best] - @its2Start[$best] + 1;
+		if (($out_partial < $its1PartLen) && ($out_partial < $its2PartLen)) {
+		  if ($out_preserve == 1) {  # If sequence headers should be preserved
+		    print FULLPARTIAL $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+		  } else {
+		    print FULLPARTIAL ">" . @seqID[$best] . "|" . @seqITSType[$best] ." " . $extendedType . " (" . $seqDNALength . " bp) Partial ITS region on " . $extendedStrand . "\n";   # Write FASTA defline
+		  }
+		  print FULLPARTIAL substr(@seqDNA[$best],$fastaStartPoint,$fastaLength) . "\n";   # Write DNA sequence
+		}
+	      }
+	    }
+	  } else {   # If sequence is regarded chimeric
+	    $itsChimeric++;   # Add one to the chimeric counter
+	    if ($out_preserve == 1) {  # If sequence headers should be preserved
+	      print CHIMERA $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	    } else {
+	      print CHIMERA ">" . @seqID[$best] . "|" . @seqITSType[$best] . " Chimeric " . $extendedType . " (" . $fastaLength . " bp) on " . $extendedStrand . "\n";   # Write FASTA definition line
+	    }
+	    print CHIMERA @seqDNA[$best] . "\n";   # Write DNA sequence
+	    $foundChimera++;
+	  }
+	}
+      }
+
+      if ($out_joined == 1) {
+	if ($chimeric == 0) {   # If the sequence is not chimeric
+	  if ($out_preserve == 1) {  # If sequence headers should be preserved
+	    print JOINED $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	  } else {
+	    print JOINED ">" . @seqID[$best] . "|" . @seqITSType[$best] . " " . $extendedType . " (" . $fastaLength . " bp) From domain " . @startDomain[$best] . " to " . @endDomain[$best] . " on " . $extendedStrand . " Found domains: ";   # Write FASTA definition line
+	    print JOINED substr(@allSeqDomains[$best],0,length(@allSeqDomains[$best]) - 1) . "\n";   # Write domain order
+	  }
+	  print JOINED substr(@seqDNA[$best],$fastaStartPoint,$fastaLength) . "\n";   # Write DNA sequence
+	} else {   # If sequence is regarded chimeric
+	  if ($out_preserve == 1) {  # If sequence headers should be preserved
+	    print JOINED $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	  } else {
+	    print JOINED ">" . @seqID[$best] . "|" . @seqITSType[$best] . " Chimeric " . $extendedType . " (" . $fastaLength . " bp) From domain " . @startDomain[$best] . " to " . @endDomain[$best] . " on " . $extendedStrand . " Found domains: ";   # Write FASTA definition line
+	    print JOINED substr(@allSeqDomains[$best],0,length(@allSeqDomains[$best]) - 1) . "\n";   # Write domain order
+	  }
+	  print JOINED substr(@seqDNA[$best],$fastaStartPoint,$fastaLength) . "\n";   # Write DNA sequence
+	}
+      }
+
+      ## Write SSU sequence to file
+      if ($out_ssu == 1) {  # If SSU output is on
+	if (@problemCode[$best] !~ m/S/) {
+	  if ($only_full == 0) {
+	    $seqPartLen =  @ssuEnd[$best] - @ssuStart[$best] + 1;
+	    if ($out_preserve == 1) {  # If sequence headers should be preserved
+	      print SSU $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	    } else {
+	      print SSU ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|SSU " . "Extracted SSU sequence (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	    }
+	    if (uc($anchor) ne "HMM") {
+	      if (@ssuStart[$best] - 1 - $anchorLen > 0) {
+		print SSU substr(@seqDNA[$best], @ssuStart[$best] - 1 - $anchorLen, $anchorLen);
+	      } else {
+		print SSU substr(@seqDNA[$best], 0, @ssuStart[$best] - 1);
+	      }
+	      print SSU substr(@seqDNA[$best], @ssuStart[$best] - 1, @ssuEnd[$best] - @ssuStart[$best] + 1);
+	      if (@ssuEnd[$best] - @ssuStart[$best] + 1 - $anchorLen > 0) {
+		print SSU substr(@seqDNA[$best], @ssuEnd[$best], $anchorLen);
+	      } else {
+		print SSU substr(@seqDNA[$best], @ssuEnd[$best]);
+	      }
+	    } else {
+	      print SSU substr(@seqDNA[$best], @ssuStart[$best] - 1, @ssuEnd[$best] - @ssuStart[$best] + 1);
+	    }
+	    print SSU "\n";   # Write DNA sequence
+	  }
+	}
+      }
+
+      ## Write LSU sequence to file
+      if ($out_lsu == 1) {  # If LSU output is on
+	if ($only_full == 0) {
+	  if (@problemCode[$best] !~ m/L/) {
+	    $seqPartLen = @lsuEnd[$best] - @lsuStart[$best] + 1;
+	    if ($out_preserve == 1) {  # If sequence headers should be preserved
+	      print LSU $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	    } else {
+	      print LSU ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|LSU " . "Extracted LSU sequence (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	    }
+	    if (uc($anchor) ne "HMM") {
+	      if (@lsuStart[$best] - 1 - $anchorLen > 0) {
+		print LSU substr(@seqDNA[$best], @lsuStart[$best] - 1 - $anchorLen, $anchorLen);
+	      } else {
+		print LSU substr(@seqDNA[$best], 0, @lsuStart[$best] - 1);
+	      }
+	      print LSU substr(@seqDNA[$best], @lsuStart[$best] - 1, @lsuEnd[$best] - @lsuStart[$best] + 1);
+	      #if (@lsuEnd[$best] - @lsuStart[$best] + 1 - $anchorLen > 0) {
+	      #  print LSU substr(@seqDNA[$best], @lsuEnd[$best] - @lsuStart[$best] + 1, $anchorLen);
+	      #} else {
+	      #  print LSU substr(@seqDNA[$best], @lsuEnd[$best] - @lsuStart[$best] + 1);
+	      #}
+	    } else {
+	      print LSU substr(@seqDNA[$best], @lsuStart[$best] - 1, @lsuEnd[$best] - @lsuStart[$best] + 1);
+	    }
+	    print LSU "\n";   # Write DNA sequence
+	  }
+	}
+      }
+
+      ## Write 5.8S sequence to file
+      if ($out_58S == 1) {  # If 5.8S output is on
+	if (@problemCode[$best] !~ m/[125]/) {
+	  $seqPartLen = @midEnd[$best] - @midStart[$best] + 1;
+	  if ($out_preserve == 1) {  # If sequence headers should be preserved
+	    print MID $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	  } else {
+	    print MID ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|5.8S " . "Extracted 5.8S sequence (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	  }
+	  if (uc($anchor) ne "HMM") {
+	    if (@midStart[$best] - 1 - $anchorLen > 0) {
+	      print MID substr(@seqDNA[$best], @midStart[$best] - 1 - $anchorLen, $anchorLen);
+	    } else {
+	      print MID substr(@seqDNA[$best], 0, @midStart[$best] - 1);
+	    }
+	    print MID substr(@seqDNA[$best], @midStart[$best] - 1, @midEnd[$best] - @midStart[$best] + 1);
+	    if (@midEnd[$best] - @midStart[$best] + 1 - $anchorLen > 0) {
+	      print MID substr(@seqDNA[$best], @midEnd[$best], $anchorLen);
+	    } else {
+	      print MID substr(@seqDNA[$best], @midEnd[$best]);
+	    }
+	  } else {
+	    print MID substr(@seqDNA[$best], @midStart[$best] - 1, @midEnd[$best] - @midStart[$best] + 1);
+	  }
+	  print MID "\n";   # Write DNA sequence
+	}
+      }
+
+      ## Write ITS1 sequence to file
+      if ($out_its1 == 1) {  # If ITS1 output is on
+	$seqPartLen = @its1End[$best] - @its1Start[$best] + 1;
+	if ($seqPartLen > 1) {
+	  if ( (($only_full == 0) && (@problemCode[$best] !~ m/[15]/)) || (($only_full == 1) && (@problemCode[$best] !~ m/[S15]/))) {
+	    if ($out_preserve == 1) {  # If sequence headers should be preserved
+	      print ITS1 $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	    } else {
+	      print ITS1 ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|ITS1 " . "Extracted ITS1 sequence " . @its1Start[$best] . "-" . @its1End[$best] . " (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	    }
+	    if (uc($anchor) ne "HMM") {
+	      if (@its1Start[$best] - 1 - $anchorLen > 0) {
+		print ITS1 substr(@seqDNA[$best], @its1Start[$best] - 1 - $anchorLen, $anchorLen);
+	      } else {
+		print ITS1 substr(@seqDNA[$best], 0, @its1Start[$best] - 1);
+	      }
+	      print ITS1 substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);
+	      if (length(@seqDNA[$best]) - @its1End[$best] - $anchorLen > 0) {
+		print ITS1 substr(@seqDNA[$best], @its1End[$best], $anchorLen);
+	      } else {
+		print ITS1 substr(@seqDNA[$best], @its1End[$best]);
+	      }
+	    } else {
+	      if (@its1Start[$best] - 1 - @allanchorLens[0] > 0) {
+		print ITS1 substr(@seqDNA[$best], @its1Start[$best] - 1 - @allanchorLens[0], @allanchorLens[0]);
+	      } else {
+		print ITS1 substr(@seqDNA[$best], 0, @its1Start[$best] - 1);
+	      }
+	      print ITS1 substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);
+	      if (length(@seqDNA[$best]) - @its1End[$best] - @allanchorLens[1] > 0) {
+		print ITS1 substr(@seqDNA[$best], @its1End[$best], @allanchorLens[1]);
+	      } else {
+		print ITS1 substr(@seqDNA[$best], @its1End[$best]);
+	      }
+	      
+	    }
+	    print ITS1 "\n";   # Write DNA sequence
+	  }
+	}
+	if ($out_partial > 0) {
+	  if (@problemCode[$best] !~ m/[S15]/) {
+	    if ($out_preserve == 1) {  # If sequence headers should be preserved
+	      print ITS1PARTIAL $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	    } else {
+	      print ITS1PARTIAL ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|ITS1 " . "Extracted Full ITS1 sequence " . @its1Start[$best] . "-" . @its1End[$best] . " (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	    }
+	    if (uc($anchor) ne "HMM") {
+	      if (@its1Start[$best] - 1 - $anchorLen > 0) {
+		print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1 - $anchorLen, $anchorLen);
+	      } else {
+		print ITS1PARTIAL substr(@seqDNA[$best], 0, @its1Start[$best] - 1);
+	      }
+	      print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);
+	      if (length(@seqDNA[$best]) - @its1End[$best] - $anchorLen > 0) {
+		print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best], $anchorLen);
+	      } else {
+		print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best]);
+	      }
+	    } else {
+	      if (@its1Start[$best] - 1 - @allanchorLens[0] > 0) {
+		print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1 - @allanchorLens[0], @allanchorLens[0]);
+	      } else {
+		print ITS1PARTIAL substr(@seqDNA[$best], 0, @its1Start[$best] - 1);
+	      }
+	      print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);
+	      if (length(@seqDNA[$best]) - @its1End[$best] - @allanchorLens[1] > 0) {
+		print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best], @allanchorLens[1]);
+	      } else {
+		print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best]);
+	      }
+	      
+	    }
+	    print ITS1PARTIAL "\n";   # Write DNA sequence
+	  } else {
+	    if ($out_partial < $seqPartLen) {
+	      if ($out_preserve == 1) {  # If sequence headers should be preserved
+		print ITS1PARTIAL $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	      } else {
+		print ITS1PARTIAL ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|ITS1 " . "Extracted Partial ITS1 sequence " . @its1Start[$best] . "-" . @its1End[$best] . " (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	      }
+	      if (uc($anchor) ne "HMM") {
+		if (@its1Start[$best] - 1 - $anchorLen > 0) {
+		  print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1 - $anchorLen, $anchorLen);
+		} else {
+		  print ITS1PARTIAL substr(@seqDNA[$best], 0, @its1Start[$best] - 1);
+		}
+		print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);
+		if (length(@seqDNA[$best]) - @its1End[$best] - $anchorLen > 0) {
+		  print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best], $anchorLen);
+		} else {
+		  print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best]);
+		}
+	      } else {
+		if (@its1Start[$best] - 1 - @allanchorLens[0] > 0) {
+		  print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1 - @allanchorLens[0], @allanchorLens[0]);
+		} else {
+		  print ITS1PARTIAL substr(@seqDNA[$best], 0, @its1Start[$best] - 1);
+		}
+		print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);
+		if (length(@seqDNA[$best]) - @its1End[$best] - @allanchorLens[1] > 0) {
+		  print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best], @allanchorLens[1]);
+		} else {
+		  print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best]);
+		}
+	      }
+	      print ITS1PARTIAL "\n";   # Write DNA sequence
+	    }
+	  }
+	}
+      }
+      
+      ## Write ITS2 sequence to file
+      if ($out_its2 == 1) {  # If ITS2 output is on
+	$seqPartLen = @its2End[$best] - @its2Start[$best] + 1;
+	if ($seqPartLen > 1) {
+	  if ( (($only_full == 0) && (@problemCode[$best] !~ m/[25]/)) || (($only_full == 1) && (@problemCode[$best] !~ m/[L25]/))) {
+	    if ($out_preserve == 1) {  # If sequence headers should be preserved
+	      print ITS2 $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	    } else {
+	      print ITS2 ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|ITS2 " . "Extracted ITS2 sequence " . @its2Start[$best] . "-" . @its2End[$best] . " (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	    }
+	    if (uc($anchor) ne "HMM") {
+	      if (@its2Start[$best] - 1 - $anchorLen > 0) {
+		print ITS2 substr(@seqDNA[$best], @its2Start[$best] - 1 - $anchorLen, $anchorLen);
+	      } else {
+		print ITS2 substr(@seqDNA[$best], 0, @its2Start[$best] - 1);
+	      }
+	      print ITS2 substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);
+	      if (length(@seqDNA[$best]) - @its2End[$best] - $anchorLen > 0) {
+		print ITS2 substr(@seqDNA[$best], @its2End[$best], $anchorLen);
+	      } else {
+		print ITS2 substr(@seqDNA[$best], @its2End[$best]);
+	      }
+	    } else {
+	      if (@its2Start[$best] - 1 - @allanchorLens[2] > 0) {
+		print ITS2 substr(@seqDNA[$best], @its2Start[$best] - 1 - @allanchorLens[2], @allanchorLens[2]);
+	      } else {
+		print ITS2 substr(@seqDNA[$best], 0, @its2Start[$best] - 1);
+	      }
+	      print ITS2 substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);
+	      if (length(@seqDNA[$best]) - @its2End[$best] - @allanchorLens[3] > 0) {
+		print ITS2 substr(@seqDNA[$best], @its2End[$best], @allanchorLens[3]);
+	      } else {
+		print ITS2 substr(@seqDNA[$best], @its2End[$best]);
+	      }
+	    }
+	    print ITS2 "\n";   # Write DNA sequence
+	  }
+	  if ($out_partial > 0) {
+	    if (@problemCode[$best] !~ m/[L25]/) {
+	      if ($out_preserve == 1) {  # If sequence headers should be preserved
+		print ITS2PARTIAL $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	      } else {
+		print ITS2PARTIAL ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|ITS2 " . "Extracted Full ITS2 sequence " . @its2Start[$best] . "-" . @its2End[$best] . " (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	      }
+	      if (uc($anchor) ne "HMM") {
+		if (@its2Start[$best] - 1 - $anchorLen > 0) {
+		  print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1 - $anchorLen, $anchorLen);
+		} else {
+		  print ITS2PARTIAL substr(@seqDNA[$best], 0, @its2Start[$best] - 1);
+		}
+		print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);
+		if (length(@seqDNA[$best]) - @its2End[$best] - $anchorLen > 0) {
+		  print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best], $anchorLen);
+		} else {
+		  print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best]);
+		}
+	      } else {
+		if (@its2Start[$best] - 1 - @allanchorLens[2] > 0) {
+		  print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1 - @allanchorLens[2], @allanchorLens[2]);
+		} else {
+		  print ITS2PARTIAL substr(@seqDNA[$best], 0, @its2Start[$best] - 1);
+		}
+		print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);
+		if (length(@seqDNA[$best]) - @its2End[$best] - @allanchorLens[3] > 0) {
+		  print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best], @allanchorLens[3]);
+		} else {
+		  print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best]);
+		}
+	      }
+	      print ITS2PARTIAL "\n";   # Write DNA sequence
+	    } else {
+	      if ($out_partial < $seqPartLen) {
+		if ($out_preserve == 1) {  # If sequence headers should be preserved
+		  print ITS2PARTIAL $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+		} else {
+		  print ITS2PARTIAL ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|ITS2 " . "Extracted Partial ITS2 sequence " . @its2Start[$best] . "-" . @its2End[$best] . " (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+		}
+		if (uc($anchor) ne "HMM") {
+		  if (@its2Start[$best] - 1 - $anchorLen > 0) {
+		    print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1 - $anchorLen, $anchorLen);
+		  } else {
+		    print ITS2PARTIAL substr(@seqDNA[$best], 0, @its2Start[$best] - 1);
+		  }
+		  print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);
+		  if (length(@seqDNA[$best]) - @its2End[$best] - $anchorLen > 0) {
+		    print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best], $anchorLen);
+		  } else {
+		    print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best]);
+		  }
+		} else {
+		  if (@its2Start[$best] - 1 - @allanchorLens[2] > 0) {
+		    print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1 - @allanchorLens[2], @allanchorLens[2]);
+		  } else {
+		    print ITS2PARTIAL substr(@seqDNA[$best], 0, @its2Start[$best] - 1);
+		  }
+		  print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);
+		  if (length(@seqDNA[$best]) - @its2End[$best] - @allanchorLens[3] > 0) {
+		    print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best], @allanchorLens[3]);
+		  } else {
+		    print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best]);
+		  }
+		}
+		print ITS2PARTIAL "\n";   # Write DNA sequence
+	      }
+	    }
+	  }
+	}
+      }
+
+      ## Output concatenated ITS1 + ITS2 sequences
+      if ($out_concat == 1) {
+	$seqPartLen1 = @its1End[$best] - @its1Start[$best] + 1;
+	$seqPartLen2 = @its2End[$best] - @its2Start[$best] + 1;
+	if (($seqPartLen1 >= $concat_minlen) && ($seqPartLen2 >= $concat_minlen)) {
+	  if ($out_preserve == 1) {  # If sequence headers should be preserved
+	    print CONCAT $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	  } else {
+	    print CONCAT ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|ITS1+2 " . "Concatenated ITS1 and ITS2 sequences (" . ($seqPartLen1 + $seqPartLen2) . " bp)\n";   # Write FASTA definition line
+	  }
+	  print CONCAT substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);   # Write ITS1 DNA sequence
+	  print CONCAT "-----";  # Write spacer
+	  print CONCAT substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);   # Write ITS2 DNA sequence
+	  print CONCAT "\n";  # Write newline
+	} else {
+	  if ($seqPartLen1 >= $concat_minlen) {
+	    if ($out_preserve == 1) {  # If sequence headers should be preserved
+	      print CONCAT $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	    } else {
+	      print CONCAT ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|ITS1 " . "ITS1 sequence (ITS2 too short) (" . $seqPartLen1 . " bp)\n";   # Write FASTA definition line
+	    }
+	    print CONCAT substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);   # Write ITS1 DNA sequence
+	    print CONCAT "-----";  # Write spacer
+	    print CONCAT "\n";  # Write newline
+	  }
+	  if ($seqPartLen2 >= $concat_minlen) {
+	    if ($out_preserve == 1) {  # If sequence headers should be preserved
+	      print CONCAT $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	    } else {
+	      print CONCAT ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|ITS2 " . "ITS2 sequence (ITS1 too short) (" . $seqPartLen2 . " bp)\n";   # Write FASTA definition line
+	    }
+	    print CONCAT "-----";  # Write spacer
+	    print CONCAT substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);   # Write ITS2 DNA sequence
+	    print CONCAT "\n";  # Write newline
+	  }
+	}
+      }
+
+      ## Print all sequences to the joined file for debugging
+      if ($out_joined == 1) {
+	$seqPartLen =  @ssuEnd[$best] - @ssuStart[$best] + 1;
+	if ($out_preserve == 1) {  # If sequence headers should be preserved
+	  print JOINED $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	} else {
+	  print JOINED ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|SSU " . "Extracted SSU sequence (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	}
+	print JOINED substr(@seqDNA[$best], @ssuStart[$best] - 1, @ssuEnd[$best] - @ssuStart[$best] + 1) . "\n";   # Write DNA sequence
+	
+	$seqPartLen = @its1End[$best] - @its1Start[$best] + 1;
+	if ($out_preserve == 1) {  # If sequence headers should be preserved
+	  print JOINED $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	} else {
+	  print JOINED ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|ITS1 " . "Extracted ITS1 sequence " . @its1Start[$best] . "-" . @its1End[$best] . " (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	}
+	print JOINED substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1) . "\n";   # Write DNA sequence
+	
+	$seqPartLen = @midEnd[$best] - @midStart[$best] + 1;
+	if ($out_preserve == 1) {  # If sequence headers should be preserved
+	  print JOINED $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	} else {
+	  print JOINED ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|5.8S " . "Extracted 5.8S sequence (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	}
+	print JOINED substr(@seqDNA[$best], @midStart[$best] - 1, @midEnd[$best] - @midStart[$best] + 1) . "\n";   # Write DNA sequence
+	
+	$seqPartLen = @its2End[$best] - @its2Start[$best] + 1;
+	if ($out_preserve == 1) {  # If sequence headers should be preserved
+	  print JOINED $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	} else {
+	  print JOINED ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|ITS2 " . "Extracted ITS2 sequence (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	}
+	print JOINED substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1) . "\n";   # Write DNA sequence
+	
+	$seqPartLen = @lsuEnd[$best] - @lsuStart[$best] + 1;
+	if ($out_preserve == 1) {  # If sequence headers should be preserved
+	  print JOINED $headers{@seqID[$best]} . "\n";   # Write FASTA definition line
+	} else {
+	  print JOINED ">" . @seqID[$best] . "|" . @seqITSType[$best] . "|LSU " . "Extracted LSU sequence (" . $seqPartLen . " bp)\n";   # Write FASTA definition line
+	}
+	print JOINED substr(@seqDNA[$best], @lsuStart[$best] - 1, @lsuEnd[$best] - @lsuStart[$best] + 1) . "\n";   # Write DNA sequence
+      }
+      
+      
+      if ($out_results == 1) {
+	## Print info on all matches, also not top ones to the results file...
+	for ($i = 0; $i < scalar(@seqITSType); $i++) {   # Go through all the possible ITS types...
+	  print RESULTS @seqITSType[$i] . ": " . @seqDomCounts[$i] . " " . @seqAvgE[$i] . " " . @seqAvgScore[$i];   # Write some info on this type (Type, Domain count, Average E-value, Average score)
+	  if ($i < scalar(@seqITSType) - 1) {   # If this is no the last domain type
+	    print RESULTS ", ";   # Write a comma
+	  }
+	}
+	print RESULTS "\n";   # Write end of line
+      }
+    }
+    
+    ## Undefine all used arrays for the next round...
+    undef @seqID;
+    undef @seqITSType;
+    undef @seqCompl;
+    undef @seqDomCounts;
+    undef @seqAvgE;
+    undef @seqAvgScore;
+    undef @seqScoreSum;
+    undef @seqDNA;
+    undef @allSeqDomains;
+    undef @dnaStart;
+    undef @dnaEnd;
+    undef @ssuStart;
+    undef @ssuEnd;
+    undef @lsuStart;
+    undef @lsuEnd;
+    undef @midStart;
+    undef @midEnd;
+    undef @its1Start;
+    undef @its1End;
+    undef @its2Start;
+    undef @its2End;
+    undef @startDomain;
+    undef @endDomain;
+    undef @domain_order;
+    undef @sorted_domain_order;
+    undef @problem;
+    undef @problemCode;
+    undef @anchorLens;
+  }
+  if (@item[0] ne "") {   # Add this entry to the set (regardless if the entry has the same ID as entries already in the set), as long as it is non-empty
+    push(@seqID, @item[0]);   # Add sequence ID
+    push(@seqITSType, @item[1]);   # Add ITS type
+    push(@seqCompl, @item[2]);   # Add main/complementary strand info
+    push(@seqDomCounts, @item[3]);   # Add domain count
+    push(@seqAvgE, @item[4]);   # Add average E-value
+    push(@seqAvgScore, @item[5]);   # Add average score
+    push(@seqScoreSum, @item[6]);   # Add sum-of-scores
+    push(@seqDNA, @item[7]);   # Add DNA sequence
+    
+    ## Determine first and last domains, and their positions
+    ## Set variables to unrealistic values
+    $dnaEnd = length(@item[7]);
+    $startDomain = "***";
+    $endDomain = "***";
+    $dnaStart = 1;
+    $allDomains = "";
+    $ssuStart = 1;
+    $ssuEnd = 0;
+    $midStart = 1;
+    $midEnd = 0;
+    $lsuStart = 1;
+    $lsuEnd = 0;
+    $its1Start = 1;
+    $its1End = length(@item[7]);
+    $its2Start = 1;
+    $its2End = length(@item[7]);
+    $problem = "";
+    $problemCode = "";
+
+    $ssuFound = 0;
+    $lsuFound = 0;
+    $midSFound = 0;
+    $midEFound = 0;
+    $order = "";
+    undef @hitanchorlens;
+    
+    for ($i = 8; $i < scalar(@item); $i++) {   # Go through the list of found domains in this sequence
+      ($hitStart,$hitEnd,$hitProfile,$hitScore,$hitEval,$hitanchorlen) = split(';',@item[$i]);   # Separate the hit stats into variables
+      $allDomains = $allDomains . $hitProfile . " ";   # Add found domain to the list of all domains
+
+#      if ($hitStart < $dnaStart) {   # If this domain is the first one so far
+#	$dnaStart = $hitStart;   # Set the start of the ITS sequence to this domain's start
+#	$startDomain = $hitProfile;   # Set this domain as the starting domain
+#      }
+#      if ($hitEnd > $dnaEnd) {   # If this domain is the last one so far
+#	$dnaEnd = $hitEnd;   # Set the end of the ITS sequence to this domain's end
+#	$endDomain = $hitProfile;   # Set this domain as the ending domain
+#      }
+
+      if (substr($hitProfile,0,5) eq "1_SSU" ) {   # If this domain is the SSU's end
+	$dnaStart = $hitStart;   # Set the start of the ITS sequence to this domain's start
+	$startDomain = $hitProfile;   # Set this domain as the starting domain
+	$ssuStart = 1;   # Set the start of the SSU sequence to the start of the sequence
+	$ssuEnd = $hitEnd;   # Set the end of the SSU sequence to this domain's end
+	$its1Start = $hitEnd + 1;   # Set the start of the ITS1 sequence to right after this domain's end
+	if ($midEFound == 0) {
+	  $its2Start = $hitEnd + 1;   # Set the end of the ITS2 sequence to right after this domain's end
+	}
+	$ssuFound = 1;
+	$order = $order . "1";
+	@hitanchorlens[1] = $hitanchorlen;
+      }
+
+      if (substr($hitProfile,0,5) eq "2_5.8" ) {   # If this domain is the 5.8S's start
+	if ($startDomain eq "***") {
+	  $dnaStart = 1;   # Set the start of the ITS sequence to the start of the sequence
+	  $startDomain = $hitProfile;   # Set this domain as the starting domain
+	}
+	$its1End = $hitStart - 1;   # Set the end of the ITS1 sequence to right before this domain's start
+	if ($midEFound == 0) {
+	  $its2Start = $hitEnd + 1;   # Set the start of the ITS2 sequence to right after this domain's end
+	}
+	$midStart = $hitStart;   # Set the start of the 5.8S sequence to this domain's start
+	$midSFound = 1;
+	$order = $order . "2";
+	@hitanchorlens[2] = $hitanchorlen;
+      }
+
+      if (substr($hitProfile,0,5) eq "3_End" ) {   # If this domain is the 5.8S's end
+	if ($startDomain eq "***") {
+	  $dnaStart = 1;   # Set the start of the ITS sequence to the start of the sequence
+	  $startDomain = $hitProfile;   # Set this domain as the starting domain
+	}
+	if ($midSFound == 0) {
+	  $its1End = $hitStart - 1;   # Set the end of the ITS1 sequence to right before this domain's start
+	}
+	$its2Start = $hitEnd + 1;   # Set the end of the ITS2 sequence to right after this domain's end
+	$midEnd = $hitEnd;   # Set the end of the 5.8S sequence to this domain's end
+	$midEFound = 1;
+	$order = $order . "3";
+	@hitanchorlens[3] = $hitanchorlen;
+      }
+
+      if (substr($hitProfile,0,5) eq "4_LSU" ) {   # If this domain is the LSU's start
+	if ($startDomain eq "***") {
+	  $dnaStart = 1;   # Set the start of the ITS sequence to the start of the sequence
+	  $startDomain = $hitProfile;   # Set this domain as the starting domain
+	}
+	$dnaEnd = $hitEnd;   # Set the end of the ITS sequence to this domain's end
+	$endDomain = $hitProfile;   # Set this domain as the ending domain
+	$lsuStart = $hitStart;   # Set the start of the LSU sequence to the start of the this domain
+	$lsuEnd = length(@item[7]);   # Set the end of the LSU sequence to the end of the sequence
+	$its2End = $hitStart - 1;   # Set the end of the ITS2 sequence to right before this domain's start
+	if ($midSFound == 0) {
+	  $its1End = $hitStart - 1;   # Set the end of the ITS1 sequence to right before this domain's start
+	}
+	$lsuFound = 1;
+	$order = $order . "4";
+	@hitanchorlens[4] = $hitanchorlen;
+      }
+    }
+
+    if ($ssuFound == 0) {
+      if ($problem ne "") {
+	$problem = $problem . "; ";
+      }
+      $problem = $problem . "End of SSU sequence not found";
+      $problemCode = $problemCode . "S";
+      $order = "1" .  $order;
+    }
+
+    if ($lsuFound == 0) {
+      if ($problem ne "") {
+	$problem = $problem . "; ";
+      }
+      $problem = $problem . "Start of LSU sequence not found";
+      $problemCode = $problemCode . "L";
+      $order = $order . "4";
+    }
+
+    if (($midSFound == 0) && ($midEFound == 0)) {
+      if ($problem ne "") {
+	$problem = $problem . "; ";
+      }
+      $problem = $problem . "The 5.8S sequence was not found at all";
+      $problemCode = $problemCode . "5";
+    } else {
+      if ($midSFound == 0) {
+	if ($problem ne "") {
+	  $problem = $problem . "; ";
+	}
+	$problem = $problem . "Start of 5.8S sequence not found";
+	$problemCode = $problemCode . "1";
+      }
+      if ($midEFound == 0) {
+	if ($problem ne "") {
+	  $problem = $problem . "; ";
+	}
+	$problem = $problem . "End of 5.8S sequence not found";
+	$problemCode = $problemCode . "2";
+      }
+    }
+
+    if ($dnaEnd - $dnaStart > 1500) {
+      if ($problem ne "") {
+	$problem = $problem . "; ";
+      }
+      $problem = $problem . "ITS region is suspiciously long (> 1500 bp)";
+      $problemCode = $problemCode . "B";
+    }
+
+    if (length($order) == 4) {
+      if ($order ne "1234") {
+	if ($problem ne "") {
+	  $problem = $problem . "; ";
+	}
+	$problem = $problem . "Domains found in wrong order, sequence may be chimeric";
+	$problemCode = $problemCode . "C";
+      }
+    } else {
+      if ((substr($order,0,1) ne "1") || (substr($order,-1) ne "4")) {
+	if ($problem ne "") {
+	  $problem = $problem . "; ";
+	}
+	$problem = $problem . "Domains found in wrong order, sequence may be chimeric";
+	$problemCode = $problemCode . "C";
+      } else {
+	if (($midSFound == 1) && ($midEFound == 1)) {
+	  if ($order !~ m/23/) {
+	    if ($problem ne "") {
+	      $problem = $problem . "; ";
+	    }
+	    $problem = $problem . "Domains found in wrong order, sequence may be chimeric";
+	    $problemCode = $problemCode . "C";
+	  }
+	}
+      }
+    }
+
+    if ($its1End - $its1Start < 0) {  # 5.8S overlaps SSU
+      if (($midSFound == 1) && ($ssuFound == 1)) {
+	$problemCode = $problemCode . "OC";
+	$its1Start = 0;
+	$its1End = 0;
+	if ($problem ne "") {
+	  $problem = $problem . "; ";
+	}
+	$problem = $problem . "SSU seems to overlap 5.8S, sequence may be chimeric";
+      } else {
+	$problemCode = $problemCode . "X";
+	$its1Start = 0;
+	$its1End = 0;
+	if ($problem ne "") {
+	  $problem = $problem . "; ";
+	}
+	$problem = $problem . "No ITS1 sequence";
+      }
+    }
+    if ($its2End - $its2Start < 0) {  # 5.8S overlaps LSU
+      if (($midEFound == 1) && ($lsuFound == 1)) {
+	$problemCode = $problemCode . "PC";
+	$its2Start = 0;
+	$its2End = 0;
+	if ($problem ne "") {
+	  $problem = $problem . "; ";
+	}
+	$problem = $problem . "LSU seems to overlap 5.8S, sequence may be chimeric";
+       } else {
+	$problemCode = $problemCode . "Y";
+	$its2Start = 0;
+	$its2End = 0;
+	if ($problem ne "") {
+	  $problem = $problem . "; ";
+	}
+	$problem = $problem . "No ITS2 sequence";
+      }
+    }
+
+
+    if (($problemCode =~ m/[15]/) && ($problemCode =~ m/[S]/)) {  # Sequence lack all indications of ITS1
+      $its1Start = 0;
+      $its1End = 0;
+    }
+    if (($problemCode =~ m/[25]/) && ($problemCode =~ m/[L]/)) {  # Sequence lack all indications of ITS2
+      $its2Start = 0;
+      $its2End = 0;
+    }
+
+
+    $allhitanchorlens = @hitanchorlens[1] . "," . @hitanchorlens[2] . "," . @hitanchorlens[3] . "," . @hitanchorlens[4];
+
+    push(@allSeqDomains,$allDomains);   # Add list of all domains
+    push(@dnaStart, $dnaStart);   # Add start of the ITS sequence
+    push(@startDomain, $startDomain);   # Add start domain
+    push(@dnaEnd, $dnaEnd);   # Add end of ITS sequence
+    push(@endDomain, $endDomain);   # Add end domain
+    push(@ssuStart, $ssuStart);   # Add start of the SSU sequence
+    push(@ssuEnd, $ssuEnd);   # Add end of the SSU sequence
+    push(@midStart, $midStart);   # Add start of the 5.8S sequence
+    push(@midEnd, $midEnd);   # Add end of the 5.8S sequence
+    push(@lsuStart, $lsuStart);   # Add start of the LSU sequence
+    push(@lsuEnd, $lsuEnd);   # Add end of the LSU sequence
+    push(@its1Start, $its1Start);   # Add start of the ITS1 sequence
+    push(@its1End, $its1End);   # Add end of the ITS1 sequence
+    push(@its2Start, $its2Start);   # Add start of the ITS2 sequence
+    push(@its2End, $its2End);   # Add end of the ITS2 sequence
+    push(@problem, $problem);  # Add potential problem info
+    push(@problemCode, $problemCode);  # Add potential problem info in code form
+    push(@anchorLens, $allhitanchorlens);  # Add the anchor lengths
+  }
+  $lc++;   # Increase the line count by one
+}
+
+## Save results to the summary file
+if ($out_sum == 1) {   # If summary file should be written
+  $itsTotal = 0;   # Reset the total ITS sum
+  foreach $typeCount (@itsCounts) {   # Add ITSs from all different origins
+    $itsTotal += $typeCount;   # Add the number of ITSs for this origin
+  }
+  ## Write info on the found ITS sequences to the summary file
+  print SUMMARY "Sequences detected as ITS by ITSx:\t$itsTotal\n";
+  print SUMMARY "  On main strand:         \t$itsMain\n";
+  print SUMMARY "  On complementary strand:\t$itsCompl\n";
+  if ($allow_reorder == 0) {   # If re-ordering of domains is not allowed
+    print SUMMARY "Sequences detected as chimeric by ITSx:\t$itsChimeric\n";   # Write the number of reported chimeric sequences to the summary file
+  }
+  ## Write info on the found ITS sequence types to the summary file
+  print SUMMARY "ITS sequences by preliminary origin:\n";
+  print SUMMARY "  Alveolates:      \t" . int(@itsCounts[ord("A")]) . "\n";
+  print SUMMARY "  Amoebozoa:       \t" . int(@itsCounts[ord("D")]) . "\n";
+  print SUMMARY "  Bacillariophyta: \t" . int(@itsCounts[ord("C")]) . "\n";
+  print SUMMARY "  Brown algae:     \t" . int(@itsCounts[ord("I")]) . "\n";
+  print SUMMARY "  Bryophytes:      \t" . int(@itsCounts[ord("B")]) . "\n";
+  print SUMMARY "  Euglenozoa:      \t" . int(@itsCounts[ord("E")]) . "\n";
+  print SUMMARY "  Eustigmatophytes:\t" . int(@itsCounts[ord("U")]) . "\n";
+  print SUMMARY "  Fungi:           \t" . int(@itsCounts[ord("F")]) . "\n";
+  print SUMMARY "  Green algae:     \t" . int(@itsCounts[ord("G")]) . "\n";
+  print SUMMARY "  Liverworts:      \t" . int(@itsCounts[ord("L")]) . "\n";
+  print SUMMARY "  Metazoa:         \t" . int(@itsCounts[ord("M")]) . "\n";
+  print SUMMARY "  Microsporidia:   \t" . int(@itsCounts[ord("N")]) . "\n";
+  print SUMMARY "  Oomycetes:       \t" . int(@itsCounts[ord("O")]) . "\n";
+  print SUMMARY "  Prymnesiophytes: \t" . int(@itsCounts[ord("P")]) . "\n";
+  print SUMMARY "  Raphidophytes:   \t" . int(@itsCounts[ord("Q")]) . "\n";
+  print SUMMARY "  Red algae:       \t" . int(@itsCounts[ord("H")]) . "\n";
+  print SUMMARY "  Rhizaria:        \t" . int(@itsCounts[ord("R")]) . "\n";
+  print SUMMARY "  Synurophyceae:   \t" . int(@itsCounts[ord("S")]) . "\n";
+  print SUMMARY "  Tracheophyta:    \t" . int(@itsCounts[ord("T")]) . "\n";
+  print SUMMARY "-----------------------------------------------------------------\n";
+  close (SUMMARY);   # Close the summary file
+}
+
+if ($out_results == 1) {
+  close (RESULTS);   # Close the results file
+}
+close (RAWOUT);   # Close the raw output file
+close (PROBLEM);   # Close the file for problematic entries
+if ($foundProblem == 0) {
+  `rm $output.problematic.txt 2> /dev/null`;
+}
+if ($out_pos == 1) {
+  close (POS);   # Close the positions file
+}
+if ($out_fasta == 1) {   # If FASTA output is on
+  close (FASTA);   # Close the FASTA output file
+  if ($allow_reorder == 0) {   # If re-ordering of domains is not allowed
+    close (CHIMERA);   # Close the chimera file
+    if ($foundChimera == 0) {
+      `rm $output.chimeric.fasta 2> /dev/null`;
+    }
+  }
+  if ($out_partial > 0) {
+    close FULLPARTIAL;
+  }
+}
+if ($out_ssu == 1) {
+  close (SSU);   # Close the SSU file
+}
+if ($out_lsu == 1) {
+  close (LSU);   # Close the LSU file
+}
+if ($out_58S == 1) {
+  close (MID);   # Close the 58S file
+}
+if ($out_its1 == 1) {
+  close (ITS1);   # Close the ITS1 file
+  if ($out_partial > 0) {
+    close ITS1PARTIAL;
+  }
+}
+if ($out_its2 == 1) {
+  close (ITS2);   # Close the ITS2 file
+  if ($out_partial > 0) {
+    close ITS2PARTIAL;
+  }
+}
+if ($out_concat == 1) {
+  close CONCAT;
+}
+
+if ($out_not == 1) {   # If not-found output is on
+  open (NOTFOUND, "$output\_no_detections.txt");   # Open the not-found output file
+  while ($line = <NOTFOUND>) {  # Read all entries from file
+    chomp($line);  # Remove newline char
+    push(@nodetectionlist,$line);  # Add to non-detection list
+  }
+  close NOTFOUND;
+
+  open (NOTFOUND, ">$output\_no_detections.fasta");   # Create a not-found FASTA output file
+  foreach $seqID (@nodetectionlist) {  # For all non-detections
+    $seq = $sequenceDB{"$seqID"};  # Get sequence from sequence database
+    print NOTFOUND ">$seqID\n";  # Print not found sequence ID
+    print NOTFOUND $seq . "\n";  # Print not found sequence
+  }
+  close NOTFOUND;
+}
+
+## Clean up and finish
+
+if ($pipeline == 0) {   # If ITSx is not called from the pipeline mode (i.e. from ITSx)
+  if ($save_raw == 1) {   # If raw data should be saved
+    `mv $tempDir $output\_ITSx_raw_output`;   # Change the name of the temporary directory to ..._ITSx_raw_output
+  } else {   # Else, discard the raw data
+    `rm -rf $tempDir`;   # Remove the temporary directory
+  }
+}
+
+## Get the current time and output a finished message
+$now = localtime;
+if ($silent == 0) {
+  print STDERR "$now : Extraction finished!\n";
+  print STDERR "-----------------------------------------------------------------\n";
+  print STDERR "Thank you for using ITSx!\n";
+  print STDERR "Please report bugs or unsupported lineages to itsx\@microbiology.se\n";
+  print STDERR "\n";
+}
+
+## Write end time a summary file
+if ($pipeline == 0) {    # If not running in pipeline mode
+  if ($out_sum == 1) {   # If summary output is on
+    open (SUMMARY, ">>$output.summary.txt");   # Append to the summary file
+    print SUMMARY "ITSx run finished at $now.\n";   # Write ending time for the analysis
+    close (SUMMARY);   # Close summary file
+  }
+}
+
+sub hmmerSearch {
+  $hmmerCommand = $_[0];
+  $outputFile = $_[1];
+  $strand = $_[2];
+  $profileSet = $_[3];
+  open (HMMEROUTPUT, ">$outputFile");
+  open (HMMER, "$hmmerCommand |");
+  $totalHitCount = 0;
+  $hitCount = 0;
+  $SSUCount = 0;
+  $LSUCount = 0;
+  $startCount = 0;
+  $endCount = 0;
+  $maxCount = 1;
+  while (chomp($line = <HMMER>)) {
+    if (substr($line,0,6) eq "Query:") {
+      $hitCount = 0;
+      $SSUCount = 0;
+      $LSUCount = 0;
+      $startCount = 0;
+      $endCount = 0;
+      undef @bestScore;
+      undef @bestEntry;
+      
+      $query = substr($line,7);
+      $queryLength = $query;
+      $queryLength =~ s/.*  *//;
+      $queryLength =~ s/[^0-9]//g;
+      $query =~ s/  *//;
+      $query =~ s/  *.*//;
+      
+      print HMMEROUTPUT "## New query:\t$query\t$queryLength\n";
+
+    }
+    if (substr($line,0,12) eq "Description:") {
+      $desc = $line;
+    }
+    if (substr($line,0,3) eq ">> ") {
+      ($tempshit,$hmmerSubjectName) = split(' ',$line);
+    }
+    if ($line =~ m/[0-9] ! /) {
+      $stats = $line;
+      $stats =~ s/  */\t/g;
+      ($empty,$no,$excl,$score) = split('\t',$stats);
+      $hitCount++;
+      $totalHitCount++;
+      if ($maxCount == 0) {
+	print HMMEROUTPUT "$query\t$hmmerSubjectName\t$queryLength$stats\n";
+      } else {
+	if (substr($hmmerSubjectName,0,5) eq "1_SSU") {
+	  $SSUCount++;
+	  if ($SSUCount <= $maxCount) {
+	    @bestScore[1] = $score;
+	    @bestEntry[1] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
+	  } else {
+	    if ($score > @bestScore[1]) {
+	      @bestScore[1] = $score;
+	      @bestEntry[1] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
+	    }
+	  }
+	}
+	if (substr($hmmerSubjectName,0,5) eq "4_LSU") {
+	  $LSUCount++;
+	  if ($LSUCount <= $maxCount) {
+	    @bestScore[4] = $score;
+	    @bestEntry[4] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
+	  } else { 
+	    if ($score > @bestScore[4]) {
+	      @bestScore[4] = $score;
+	      @bestEntry[4] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
+	    }
+	  }
+	}
+	if (substr($hmmerSubjectName,0,5) eq "2_5.8") {
+	  $startCount++;
+	  if ($startCount <= $maxCount) {
+	    @bestScore[2] = $score;
+	    @bestEntry[2] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
+	  } else {
+	    if ($score > @bestScore[2]) {
+	      @bestScore[2] = $score;
+	      @bestEntry[2] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
+	    }
+	  }
+	}
+	if (substr($hmmerSubjectName,0,5) eq "3_End") {
+	  $endCount++;
+	  if ($endCount <= $maxCount) {
+	    @bestScore[3] = $score;
+	    @bestEntry[3] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
+	  } else {
+	    if ($score > @bestScore[3]) {
+	      @bestScore[3] = $score;
+	      @bestEntry[3] = "$query\t$hmmerSubjectName\t$queryLength$stats\n"
+	    }
+	  }
+	}
+      }
+    }
+    if (substr($line,0,2) eq "//") {
+      if ($maxCount > 0) {
+	if ($SSUCount > 0) {
+	  print HMMEROUTPUT @bestEntry[1];
+	}
+	if ($startCount > 0) {
+	  print HMMEROUTPUT @bestEntry[2];
+	}
+	if ($endCount > 0) {
+	  print HMMEROUTPUT @bestEntry[3];
+	}
+	if ($LSUCount > 0) {
+	  print HMMEROUTPUT @bestEntry[4];
+	}
+      }
+      print HMMEROUTPUT "//\n";
+    }
+  }
+  close(HMMEROUTPUT);
+  $now = localtime;
+  if ($silent == 0) {
+    if ($strand eq "M") {
+      print STDERR "    $now : " . ucfirst($profileIndex{$profileSet}) . " analysis of main strand finished.\n";   # Print finished type
+    } else {
+      print STDERR "    $now : " . ucfirst($profileIndex{$profileSet}) . " analysis of complementary strand finished.\n";   # Print finished type
+    }
+  }
+}
+
+
+## Please send beers, pizzas, cakes, fruit pies, job positions and other types of feedback to:
+## johan.bengtsson [at] microbiology.se
+## Looking forward to hearing from you.... visit my website: www.microbiology.se for info on my research
+## //Johan Bengtsson, 2012-2014
author	okorol
date	Tue, 24 Mar 2015 12:02:48 -0400
parents
children