@inproceedings{f6ed2c6403574acf8192f33bf96fb33d,
title = "Benchmarking and Continuous Performance Monitoring of Ookami, an ARM Fujitsu A64FX Testbed Cluster",
abstract = "Continuous performance monitoring is critical for maintaining optimal performance of High-Performance Computing resources. This is especially important for technological test bed systems, in which software updates occur often, and performance degradation in one place can be masked by performance improvement in other places. This paper reports on our experience running continuous performance monitoring on Ookami, an ARM Fujitsu A64FX machine (the first ARM CPU with SVE-512 support) using XDMoD. After over three years of monitoring, we found that the applications and numerical library performance improved the most on the initial release with new technology support, followed by a series of smaller performance gains. Another interesting observation about numeric libraries is that the most invested vendors produce optimized code faster than community codes.",
keywords = "Benchmarks, High-Performance Computing, Performance Monitoring, Regression Tests",
author = "Simakov, \{Nikolay A.\} and White, \{Joseph P.\} and Jones, \{Matthew D.\} and Eva Siegmann and Wood, \{Daniel G.\} and Firat Coskun and Harrison, \{Robert J.\}",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 2024 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC Workshops 2024 ; Conference date: 17-11-2024 Through 22-11-2024",
year = "2024",
doi = "10.1109/SCW63240.2024.00081",
language = "English",
series = "Proceedings of SC 2024-W: Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "588--594",
booktitle = "Proceedings of SC 2024-W",
address = "United States",
}