@phdthesis{Herbst2019,
author = {Herbst, Lina},
title = {Ancestral sequence reconstruction with Maximum Parsimony and its accuracy in mathematical phylogenetics},
institution = {Institut f{\"u}r Mathematik und Informatik},
pages = {225},
year = {2019},
abstract = {In phylogenetics, evolutionary relationships of different species are represented by phylogenetic trees. In this thesis, we are mainly concerned with the reconstruction of ancestral sequences and the accuracy of this reconstruction given a rooted binary phylogenetic tree. For example, we wish to estimate the DNA sequences of the ancestors given the observed DNA sequences of today living species. In particular, we are interested in reconstructing the DNA sequence of the last common ancestor of all species under consideration. Note that this last common ancestor corresponds to the root of the tree. There exist various methods for the reconstruction of ancestral sequences. A widely used principle for ancestral sequence reconstruction is the principle of parsimony (Maximum Parsimony). This principle means that the simplest explanation it the best. Applied to the reconstruction of ancestral sequences this means that a sequence which requires the fewest evolutionary changes along the tree is reconstructed. Thus, the number of changes is minimized, which explains the name of Maximum Parsimony. Instead of estimating a whole DNA sequence, Maximum Parsimony considers each position in the sequence separately. Thus in the following, each sequence position is regarded separately, and we call a single position in a sequence state. It can happen that the state of the last common ancestor is reconstructed unambiguously, for example as A. On the other hand, Maximum Parsimony might be indecisive between two DNA nucleotides, say for example A and C. In this case, the last common ancestor will be reconstructed as {A,C}. Therefore we consider, after an introduction and some preliminary definitions, the following question in Section 3: how many present-day species need to be in a certain state, for example A, such that the Maximum Parsimony estimate of the last common ancestor is also {A}? The answer of this question depends on the tree topology as well as on the number of different states. In Section 4, we provide a sufficient condition for Maximum Parsimony to recover the ancestral state at the root correctly from the observed states at the leaves. The so-called reconstruction accuracy for the reconstruction of ancestral states is introduced in Section 5. The reconstruction accuracy is the probability that the true root state is indeed reconstructed and always takes two processes into account: on the one hand the approach to reconstruct ancestral states, and on the other hand the way how the states evolve along the edges of the tree. The latter is given by an evolutionary model. In the present thesis, we focus on a simple symmetric model, the Neyman model. The symmetry of the model means for example that a change from A to C is equally likely than a change from C to A. Intuitively, one could expect that the reconstruction accuracy it the highest when all present-day species are taken into account. However, it has long been known that the reconstruction accuracy improves when some taxa are disregarded for the estimation. Therefore, the question if there exits at least a lower bound for the reconstruction accuracy arises, i.e. if it is best to consider all today living species instead of just one for the reconstruction. This is bad news for Maximum Parsimony as a criterion for ancestral state reconstruction, and therefore the question if there exists at least a lower bound for the reconstruction accuracy arises. In Section 5, we start with considering ultrametric trees, which are trees where the expected number of substitutions from the root to each leaf is the same. For such trees, we investigate a lower bound for the reconstruction accuracy, when the number of different states at the leaves of the tree is 3 or 4. Subsequently in Section 6, in order to generalize this result, we introduce a new method for ancestral state reconstruction: the coin-toss method. We obtain new results for the reconstruction accuracy of Maximum Parsimony by relating Maximum Parsimony to the coin-toss method. Some of these results do not require the underlying tree to be ultrametric. Then, in Section 7 we investigate the influence of specific tree topologies on the reconstruction accuracy of Maximum Parsimony. In particular, we consider balanced and imbalanced trees as the balance of a tree may have an influence on the reconstruction accuracy. We end by introducing the Colless index in Section 8, an index which measures the degree of balance a rooted binary tree can have, and analyze its extremal properties.},
subject = {Mathematische Phylogenetik},
language = {en}
}