{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T13:56:37Z","timestamp":1769349397768,"version":"3.49.0"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,10,19]],"date-time":"2022-10-19T00:00:00Z","timestamp":1666137600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,10,19]],"date-time":"2022-10-19T00:00:00Z","timestamp":1666137600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"the National Key R &D Program","award":["2020YFB0204601"],"award-info":[{"award-number":["2020YFB0204601"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["CCF Trans. HPC"],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1007\/s42514-022-00126-8","type":"journal-article","created":{"date-parts":[[2022,10,19]],"date-time":"2022-10-19T14:03:48Z","timestamp":1666188228000},"page":"56-71","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["xMath2.0: a high-performance extended math library for SW26010-Pro many-core processor"],"prefix":"10.1007","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7344-7493","authenticated-orcid":false,"given":"Fangfang","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenjing","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuwen","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daokun","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi","family":"Hu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qinglin","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"WanWang","family":"Yin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinhui","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lijuan","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Min","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongsen","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinyu","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chao","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,10,19]]},"reference":[{"key":"126_CR2","doi-asserted-by":"crossref","unstructured":"Ali, A., Johnsson, L., Subhlok, J.:. Scheduling FFT computation on SMP and multicore systems. In Proceedings of the 21st annual international conference on Supercomputing pp. 293-301. (2007}","DOI":"10.1145\/1274971.1275011"},{"key":"126_CR3","unstructured":"Demmel, J., et al.: Communication-avoiding parallel and sequential QR factorizations. (2008)"},{"key":"126_CR4","unstructured":"Demmel, J., Grigori, L., Hoe mm en, M. et al.: Communication-optimal parallel and sequential QR and LU factorizations: theory and practice (2008)"},{"issue":"1","key":"126_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/77626.79170","volume":"16","author":"J Dongarra","year":"1990","unstructured":"Dongarra, J., Du Croz, J., Hammarling, S., Duff, I.S.: A set of level 3 basic linear algebra subprograms. ACM Transactions on Mathematical Software (TOMS) 16(1), 1\u201317 (1990)","journal-title":"ACM Transactions on Mathematical Software (TOMS)"},{"key":"126_CR6","doi-asserted-by":"crossref","unstructured":"Georgios, Karakasis, Vasileios, et al.: An Extended Compression Format for the Optimization of Sparse Matrix-Vector Multiplication. IEEE Transactions on Parallel and Distributed Systems: A Publication of the IEEE Computer Society (2013)","DOI":"10.1109\/TPDS.2012.290"},{"issue":"3","key":"126_CR7","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1356052.1356053","volume":"34","author":"K Goto","year":"2008","unstructured":"Goto, K., Geijn, R.A.V.D.: Anatomy of high-performance matrix multiplication. ACM Transactions on Mathematical Software (TOMS) 34(3), 1\u201325 (2008)","journal-title":"ACM Transactions on Mathematical Software (TOMS)"},{"key":"126_CR19","unstructured":"Hu, Y., Chen, D.K., Yang, C., Liu, F.F., Ma, W.J., Yin, W.W., Yuan, X.H., LIN, R.F.: Many-core Optimization of Level 1 and Level 2 BLAS Routines on the New Domestic SW26010-Pro Processor. Ruan Jian Xue Bao\/J. Software (2021) (in Chinese)"},{"key":"126_CR20","doi-asserted-by":"crossref","unstructured":"Jack, D., et al.: HPC Programming on Intel Many-Integrated-Core Hardware with MAGMA Port to Xeon Phi. Scient. Program. (2015)","DOI":"10.1155\/2015\/502593"},{"key":"126_CR21","unstructured":"Jack, D., Gates, M., Haidar, A., et al.: Accelerating Numerical Dense Linear Algebra Calculations with GPUs. Springer International Publishing (2014)"},{"key":"126_CR22","doi-asserted-by":"crossref","unstructured":"Jiang, L., Yang, C., Ao, Y., et al.: Towards Highly Efficient DGEMM on the Emerging SW26010 Many-Core Processor. Int. Confer. Parallel Proc. IEEE (2017)","DOI":"10.1109\/ICPP.2017.51"},{"key":"126_CR23","unstructured":"Liang, G., Li, X., Siegel, J.: An empirically tuned 2D and 3D FFT library on CUDA GPU, International Conference on Supercomputing DBLP (2010)"},{"key":"126_CR24","doi-asserted-by":"crossref","unstructured":"Liu, X., Smelyanskiy, M., Chow, E., et al.: Efficient sparse matrix-vector multiplication on x86-based many-core processors, Proceedings of the 27th International ACM Conference on International Conference on Supercomputing. ACM (2013)","DOI":"10.1145\/2464996.2465013"},{"key":"126_CR25","doi-asserted-by":"crossref","unstructured":"Liu, W., Vinter, B.: CSR5: An Efficient Storage Format for Cross-Platform Sparse Matrix-Vector Multiplication, The 29th ACM Int. Confer. Supercomput. (ICS \u201915). ACM, (2015)","DOI":"10.1145\/2751205.2751209"},{"issue":"6","key":"126_CR26","doi-asserted-by":"publisher","first-page":"989","DOI":"10.1007\/s11390-014-1484-z","volume":"29","author":"Y Liu","year":"2014","unstructured":"Liu, Y., et al.: Memory Efficient Two-Pass 3D FFT Algorithm for Intel Xeon Phi TM Coprocessor. J. Comput. Sci. Technol. 29(6), 989\u20131002 (2014)","journal-title":"J. Comput. Sci. Technol."},{"issue":"12","key":"126_CR27","first-page":"3921","volume":"29","author":"F Liu","year":"2018","unstructured":"Liu, F., Yang, C., Yuan, X., Wu, C., Ao, Y.: A General SpMV Implementation in Many-Core Domestic Sunway 26010 Processor. J. Software 29(12), 3921\u20133932 (2018)","journal-title":"J. Software"},{"issue":"1","key":"126_CR28","first-page":"34","volume":"40","author":"F Liu","year":"2019","unstructured":"Liu, F., Chen, D., Yang, C., Zhao, Y.: Research on heterogeneous many-core fully-implicit solver for MHD dynamical equations. J. Numer. Methods Comput. Appl. 40(1), 34\u201350 (2019)","journal-title":"J. Numer. Methods Comput. Appl."},{"issue":"2","key":"126_CR29","doi-asserted-by":"publisher","first-page":"232","DOI":"10.1109\/JPROC.2004.840306","volume":"93","author":"P Markus","year":"2005","unstructured":"Markus, P., et al.: SPIRAL: Code Generation for DSP Transforms. Proc. IEEE 93(2), 232\u2013275 (2005)","journal-title":"Proc. IEEE"},{"issue":"2","key":"126_CR30","doi-asserted-by":"publisher","first-page":"216","DOI":"10.1109\/JPROC.2004.840301","volume":"93","author":"F Matteo","year":"2005","unstructured":"Matteo, F., Johnson, S.G.: The Design and Implementation of FFTW3. Proc. IEEE 93(2), 216\u2013231 (2005)","journal-title":"Proc. IEEE"},{"key":"126_CR1","doi-asserted-by":"crossref","unstructured":"Monakov, A., Lokhmotov, A., Avetisyan, A.: Automatically tuning sparse matrix-vector multiplication for GPU architectures. In International Conference on High-Performance Embedded Architectures and Compilers pp. 111-125. Springer, Berlin, Heidelberg (2010)","DOI":"10.1007\/978-3-642-11515-8_10"},{"key":"126_CR31","unstructured":"Nathan, B., Garland, M.: Implementing sparse matrix-vector multiplication on throughput-oriented processors. Confer. High Perform. Comput. Networking ACM (2009)"},{"key":"126_CR32","unstructured":"Rajib, N., Stanimire, T., Jack, D.: An improved magma gemm for fermi graphics processing units. Int. J. High Perform. Comput. Appl. (2010)"},{"key":"126_CR33","doi-asserted-by":"crossref","unstructured":"Tomas, A., Bai, Z., Hern\u00e1ndez, V.: Parallelization of the QR Decomposition with Column Pivoting Using Column Cyclic Distribution on Multicore and GPU Processors, International Conference on High Performance Computing for Computational Science. Springer, Berlin, Heidelberg, (2012)","DOI":"10.1007\/978-3-642-38718-0_8"},{"key":"126_CR34","doi-asserted-by":"crossref","unstructured":"Wang, Q., et al.: AUGEM: Automatically generate high performance Dense Linear Algebra kernels on x86 CPUs. Storage & Analysis IEEE. High Perform. Comput. Netw. (2013)","DOI":"10.1145\/2503210.2503219"},{"key":"126_CR35","unstructured":"Wang, J., Jaja, J.: High Performance FFT Based Poisson Solver on a CPU-GPU Heterogeneous Platform.\u201dIEEE Int. Parallel Distrib. Proc. Sympos"},{"key":"126_CR36","doi-asserted-by":"crossref","unstructured":"Williams, S., Vuduc, R., liker, L., et al. Optimizing sparse matrix-vector multiply on emerging multicore platforms. Parallel Computing, 35(3) (2009) 178-194","DOI":"10.1016\/j.parco.2008.12.006"},{"key":"126_CR900","doi-asserted-by":"publisher","unstructured":"Wu, J., Jaja, J.: High performance FFT based poisson solver on a CPU-GPU heterogeneous platform. In: Proceedings of the 2013 IEEE 27th International Symposium on Parallel and Distributed Processing, ser. IPDPS \u201913, pp. 115\u2013125. IEEE Computer Society, Washington, DC, USA (2013). https:\/\/doi.org\/10.1109\/IPDPS.2013.18","DOI":"10.1109\/IPDPS.2013.18"},{"key":"126_CR37","doi-asserted-by":"crossref","unstructured":"Yan, S., Li, C., et al.: YaSpMV: Yet another SpMV framework on GPUs, ACM SIGPLAN Notices (2014)","DOI":"10.1145\/2555243.2555255"},{"issue":"10","key":"126_CR38","first-page":"3184","volume":"31","author":"Y Zhao","year":"2020","unstructured":"Zhao, Y., Ao, Y., Yang, C., Yin, W., Lin, R.: A general implementation of 1-d fft on the sunway 26010 processor. J. Software 31(10), 3184\u20133196 (2020)","journal-title":"J. Software"}],"updated-by":[{"DOI":"10.1007\/s42514-022-00130-y","type":"correction","label":"Correction","source":"publisher","updated":{"date-parts":[[2022,12,7]],"date-time":"2022-12-07T00:00:00Z","timestamp":1670371200000}}],"container-title":["CCF Transactions on High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-022-00126-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42514-022-00126-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-022-00126-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,26]],"date-time":"2023-03-26T21:40:07Z","timestamp":1679866807000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42514-022-00126-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,19]]},"references-count":28,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,3]]}},"alternative-id":["126"],"URL":"https:\/\/doi.org\/10.1007\/s42514-022-00126-8","relation":{"correction":[{"id-type":"doi","id":"10.1007\/s42514-022-00130-y","asserted-by":"object"}]},"ISSN":["2524-4922","2524-4930"],"issn-type":[{"value":"2524-4922","type":"print"},{"value":"2524-4930","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10,19]]},"assertion":[{"value":"30 October 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 September 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 October 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 December 2022","order":4,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Correction","order":5,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"A Correction to this paper has been published:","order":6,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"https:\/\/doi.org\/10.1007\/s42514-022-00130-y","URL":"https:\/\/doi.org\/10.1007\/s42514-022-00130-y","order":7,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"On behalf of all authors, the corresponding author states that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}