@inproceedings{402fad9d79b9453b92fdb31b528becaa,
title = "Blocked Schur algorithms for computing the matrix square root",
abstract = "The Schur method for computing a matrix square root reduces the matrix to the Schur triangular form and then computes a square root of the triangular matrix. We show that by using either standard blocking or recursive blocking the computation of the square root of the triangular matrix can be made rich in matrix multiplication. Numerical experiments making appropriate use of level 3 BLAS show significant speedups over the point algorithm, both in the square root phase and in the algorithm as a whole. In parallel implementations, recursive blocking is found to provide better performance than standard blocking when the parallelism comes only from threaded BLAS, but the reverse is true when parallelism is explicitly expressed using OpenMP. The excellent numerical stability of the point algorithm is shown to be preserved by blocking. These results are extended to the real Schur method. Blocking is also shown to be effective for multiplying triangular matrices. {\textcopyright} 2013 Springer-Verlag.",
author = "Edvin Deadman and Higham, {Nicholas J.} and Rui Ralha",
year = "2013",
doi = "10.1007/978-3-642-36803-5_12",
language = "English",
isbn = "9783642368028",
volume = "7782",
series = "Lecture Notes in Computer Science",
publisher = "Springer Nature",
pages = "171--182",
booktitle = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)|Lect. Notes Comput. Sci.",
address = "United States",
note = "11th International Conference on Applied Parallel and Scientific Computing, PARA 2012 ; Conference date: 01-07-2013",
}