{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T07:01:22Z","timestamp":1772866882553,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T00:00:00Z","timestamp":1740700800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"University of Electronic Science and Technology of China","doi-asserted-by":"publisher","award":["A1098531023601465"],"award-info":[{"award-number":["A1098531023601465"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"China Computer Federation","doi-asserted-by":"publisher","award":["CCF-GH OF 2024002"],"award-info":[{"award-number":["CCF-GH OF 2024002"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,2,28]]},"DOI":"10.1145\/3710848.3710894","type":"proceedings-article","created":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T06:20:57Z","timestamp":1740723657000},"page":"469-480","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Improving Tridiagonalization Performance on GPU Architectures"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-0035-2323","authenticated-orcid":false,"given":"Hansheng","family":"Wang","sequence":"first","affiliation":[{"name":"University of Electronic Science and Technology of China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0283-8419","authenticated-orcid":false,"given":"Zhekai","family":"Duan","sequence":"additional","affiliation":[{"name":"University of Edinburgh"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8605-0938","authenticated-orcid":false,"given":"Zitian","family":"Zhao","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4459-4387","authenticated-orcid":false,"given":"Siqi","family":"Wu","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4569-1552","authenticated-orcid":false,"given":"Saiqi","family":"Zheng","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong-Liverpool University"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4579-4268","authenticated-orcid":false,"given":"Qiao","family":"Li","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2675-2895","authenticated-orcid":false,"given":"Xu","family":"Jiang","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9525-1659","authenticated-orcid":false,"given":"Shaoshuai","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}]}],"member":"320","published-online":{"date-parts":[[2025,2,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Principal component analysis","author":"Abdi Herv\u00e9","year":"2010","unstructured":"Herv\u00e9 Abdi and Lynne J Williams. 2010. Principal component analysis. Wiley interdisciplinary reviews: computational statistics 2, 4 (2010), 433--459."},{"key":"e_1_3_2_1_2_1","volume-title":"Anne Greenbaum, Sven Hammarling, Alan McKenney, et al.","author":"Anderson Edward","year":"1999","unstructured":"Edward Anderson, Zhaojun Bai, Christian Bischof, L Susan Blackford, James Demmel, Jack Dongarra, Jeremy Du Croz, Anne Greenbaum, Sven Hammarling, Alan McKenney, et al. 1999. LAPACK Users' guide. SIAM."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1137\/0908009"},{"key":"e_1_3_2_1_4_1","volume-title":"Density functional theory: a powerful tool for theoretical studies in coordination chemistry. Coordination chemistry reviews 178","author":"Chermette H","year":"1998","unstructured":"H Chermette. 1998. Density functional theory: a powerful tool for theoretical studies in coordination chemistry. Coordination chemistry reviews 178 (1998), 699--721."},{"key":"e_1_3_2_1_5_1","volume-title":"Nvidia hopper h100 gpu: Scaling performance","author":"Choquette Jack","year":"2023","unstructured":"Jack Choquette. 2023. Nvidia hopper h100 gpu: Scaling performance. IEEE Micro (2023)."},{"key":"e_1_3_2_1_6_1","volume-title":"Quantum chemical studies of light harvesting. Chemical reviews 117, 2","author":"Curutchet Carles","year":"2017","unstructured":"Carles Curutchet and Benedetta Mennucci. 2017. Quantum chemical studies of light harvesting. Chemical reviews 117, 2 (2017), 294--343."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3264491"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/0377-0427(89)90367-1"},{"key":"e_1_3_2_1_9_1","unstructured":"Sebastian Gant. [n.d.]. Chasing the Bulge. ([n.d.])."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2017.10.004"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0377-0427(00)00413-1"},{"key":"e_1_3_2_1_12_1","volume-title":"Matrix computations","author":"Golub Gene H","unstructured":"Gene H Golub and Charles F Van Loan. 2013. Matrix computations. JHU press."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479892241287"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063394"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevB.101.205140"},{"key":"e_1_3_2_1_16_1","volume-title":"Applications, Tools and Techniques on the Road to Exascale Computing","author":"Ltaief Hatem","unstructured":"Hatem Ltaief, Piotr Luszczek, Azzam Haidar, and Jack Dongarra. 2012. Solving the generalized symmetric eigenvalue problem using tile algorithms on multicore architectures. In Applications, Tools and Techniques on the Road to Exascale Computing. IOS Press, 397--404."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2011.91"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1088\/0953-8984\/26\/21\/213201"},{"key":"e_1_3_2_1_19_1","volume-title":"DGEMM on integer matrix multiplication unit. The International Journal of High Performance Computing Applications","author":"Ootomo Hiroyuki","year":"2024","unstructured":"Hiroyuki Ootomo, Katsuhisa Ozaki, and Rio Yokota. 2024. DGEMM on integer matrix multiplication unit. The International Journal of High Performance Computing Applications (2024), 10943420241239588."},{"key":"e_1_3_2_1_20_1","unstructured":"Jeffery D Rutter. 1991. A Serial Implementation of Cuppen's Divide and Conquer Algorithm."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/64884.64889"},{"key":"e_1_3_2_1_22_1","series-title":"November 2009","volume-title":"UTK","author":"Tomov Stanimire","year":"2011","unstructured":"Stanimire Tomov, Rajib Nath, Peng Du, and Jack Dongarra. 2011. MAGMA Users' Guide. ICL, UTK (November 2009) (2011)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2925426.2926256"},{"key":"e_1_3_2_1_24_1","series-title":"SIAM review 24, 4","volume-title":"Understanding the QR algorithm","author":"Watkins David S","year":"1982","unstructured":"David S Watkins. 1982. Understanding the QR algorithm. SIAM review 24, 4 (1982), 427--440."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2020.107808"},{"key":"e_1_3_2_1_27_1","volume-title":"Basic Linear Algebra Operations on TensorCore GPU. In 2020 IEEE\/ACM 11th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems (ScalA). IEEE, 44--52","author":"Zhang Shaoshuai","year":"2020","unstructured":"Shaoshuai Zhang, Vivek Karihaloo, and Panruo Wu. 2020. Basic Linear Algebra Operations on TensorCore GPU. In 2020 IEEE\/ACM 11th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems (ScalA). IEEE, 44--52."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3572848.3577516"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3392717.3392770"}],"event":{"name":"PPoPP '25: The 30th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming","location":"Las Vegas NV USA","acronym":"PPoPP '25","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the 30th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3710848.3710894","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3710848.3710894","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:13:54Z","timestamp":1755875634000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3710848.3710894"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,28]]},"references-count":29,"alternative-id":["10.1145\/3710848.3710894","10.1145\/3710848"],"URL":"https:\/\/doi.org\/10.1145\/3710848.3710894","relation":{},"subject":[],"published":{"date-parts":[[2025,2,28]]},"assertion":[{"value":"2025-02-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}