{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T06:36:00Z","timestamp":1742970960522,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031226762"},{"type":"electronic","value":"9783031226779"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-22677-9_34","type":"book-chapter","created":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T09:04:32Z","timestamp":1673341472000},"page":"642-659","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Efficient Multiple-Precision and\u00a0Mixed-Precision Floating-Point Fused Multiply-Accumulate Unit for\u00a0HPC and\u00a0AI Applications"],"prefix":"10.1007","author":[{"given":"Hongbing","family":"Tan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Run","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ling","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Libo","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liquan","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qianming","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,1,11]]},"reference":[{"key":"34_CR1","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1016\/j.micpro.2017.12.009","volume":"57","author":"V Arunachalam","year":"2018","unstructured":"Arunachalam, V., Raj, A.N.J., Hampannavar, N., Bidul, C.: Efficient dual-precision floating-point fused-multiply-add architecture. Microprocess. Microsyst. 57, 23\u201331 (2018)","journal-title":"Microprocess. Microsyst."},{"issue":"12","key":"34_CR2","doi-asserted-by":"publisher","first-page":"2526","DOI":"10.1016\/j.cpc.2008.11.005","volume":"180","author":"M Baboulin","year":"2009","unstructured":"Baboulin, M., et al.: Accelerating scientific computations with mixed precision algorithms. Comput. Phys. Commun. 180(12), 2526\u20132533 (2009)","journal-title":"Comput. Phys. Commun."},{"issue":"2","key":"34_CR3","doi-asserted-by":"publisher","first-page":"236","DOI":"10.1093\/qjmam\/4.2.236","volume":"4","author":"AD Booth","year":"1951","unstructured":"Booth, A.D.: A signed binary multiplication technique. Q. J. Mech. Appl. Math. 4(2), 236\u2013240 (1951)","journal-title":"Q. J. Mech. Appl. Math."},{"doi-asserted-by":"crossref","unstructured":"Bruguera, J.D., Lang, T.: Floating-point fused multiply-add: reduced latency for floating-point addition. In: 17th IEEE Symposium on Computer Arithmetic (ARITH 2005), pp. 42\u201351. IEEE (2005)","key":"34_CR4","DOI":"10.1109\/ARITH.2005.22"},{"doi-asserted-by":"crossref","unstructured":"Chowdhary, K.: Natural language processing. In: Fundamentals of Artificial Intelligence, pp. 603\u2013649 (2020)","key":"34_CR5","DOI":"10.1007\/978-81-322-3972-7_19"},{"unstructured":"Dan, Z., et al.: IEEE standard for floating-point arithmetic. IEEE Std 754-2008, pp. 1\u201370 (2008)","key":"34_CR6"},{"key":"34_CR7","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.330","volume":"7","author":"M Fasi","year":"2021","unstructured":"Fasi, M., Higham, N.J., Mikaitis, M., Pranesh, S.: Numerical behavior of NVIDIA tensor cores. PeerJ Comput. Sci. 7, e330 (2021)","journal-title":"PeerJ Comput. Sci."},{"doi-asserted-by":"crossref","unstructured":"Haidar, A., Tomov, S., Dongarra, J., Higham, N.J.: Harnessing GPU tensor cores for fast FP16 arithmetic to speed up mixed-precision iterative refinement solvers. In: SC18: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 603\u2013613. IEEE (2018)","key":"34_CR8","DOI":"10.1109\/SC.2018.00050"},{"unstructured":"Hauser, J.: Berkeley testfloat, June 2018. http:\/\/www.jhauser.us\/arithmetic\/TestFloat.html","key":"34_CR9"},{"doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","key":"34_CR10","DOI":"10.1109\/CVPR.2016.90"},{"issue":"5","key":"34_CR11","doi-asserted-by":"publisher","first-page":"745","DOI":"10.1109\/TC.2011.77","volume":"61","author":"L Huang","year":"2011","unstructured":"Huang, L., Ma, S., Shen, L., Wang, Z., Xiao, N.: Low-cost binary128 floating-point FMA unit design with SIMD support. IEEE Trans. Comput. 61(5), 745\u2013751 (2011)","journal-title":"IEEE Trans. Comput."},{"doi-asserted-by":"crossref","unstructured":"Huang, L., Shen, L., Dai, K., Wang, Z.: A new architecture for multiple-precision floating-point multiply-add fused unit design. In: 18th IEEE Symposium on Computer Arithmetic (ARITH 2007), pp. 69\u201376. IEEE (2007)","key":"34_CR12","DOI":"10.1109\/ARITH.2007.5"},{"unstructured":"Kalamkar, D., et al.: A study of BFLOAT16 for deep learning training. arXiv preprint arXiv:1905.12322 (2019)","key":"34_CR13"},{"unstructured":"Karatsuba, A.A., Ofman, Y.P.: Multiplication of many-digital numbers by automatic computers. In: Doklady Akademii Nauk, vol. 145, pp. 293\u2013294. Russian Academy of Sciences (1962)","key":"34_CR14"},{"unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, vol. 25 (2012)","key":"34_CR15"},{"doi-asserted-by":"crossref","unstructured":"Kurth, T., et al.: Exascale deep learning for climate analytics. In: SC18: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 649\u2013660. IEEE (2018)","key":"34_CR16","DOI":"10.1109\/SC.2018.00054"},{"issue":"8","key":"34_CR17","doi-asserted-by":"publisher","first-page":"988","DOI":"10.1109\/TC.2004.44","volume":"53","author":"T Lang","year":"2004","unstructured":"Lang, T., Bruguera, J.D.: Floating-point multiply-add-fused with reduced latency. IEEE Trans. Comput. 53(8), 988\u20131003 (2004)","journal-title":"IEEE Trans. Comput."},{"doi-asserted-by":"crossref","unstructured":"Langou, J., Langou, J., Luszczek, P., Kurzak, J., Buttari, A., Dongarra, J.: Exploiting the performance of 32 bit floating point arithmetic in obtaining 64 bit accuracy (revisiting iterative refinement for linear systems). In: SC 2006: Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing, pp. 50\u201350. IEEE (2006)","key":"34_CR18","DOI":"10.1109\/SC.2006.30"},{"doi-asserted-by":"crossref","unstructured":"Manolopoulos, K., Reisis, D., Chouliaras, V.A.: An efficient dual-mode floating-point multiply-add fused unit. In: 2010 17th IEEE International Conference on Electronics, Circuits and Systems, pp. 5\u20138. IEEE (2010)","key":"34_CR19","DOI":"10.1109\/ICECS.2010.5724440"},{"key":"34_CR20","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1016\/j.mejo.2015.10.012","volume":"49","author":"K Manolopoulos","year":"2016","unstructured":"Manolopoulos, K., Reisis, D., Chouliaras, V.A.: An efficient multiple precision floating-point multiply-add fused unit. Microelectron. J. 49, 10\u201318 (2016)","journal-title":"Microelectron. J."},{"doi-asserted-by":"crossref","unstructured":"Mathuriya, A., et al.: CosmoFlow: using deep learning to learn the universe at scale. In: SC18: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 819\u2013829. IEEE (2018)","key":"34_CR21","DOI":"10.1109\/SC.2018.00068"},{"doi-asserted-by":"crossref","unstructured":"Quinnell, E., Swartzlander, E.E., Lemonds, C.: Bridge floating-point fused multiply-add design. IEEE Trans. Very Large Scale Integr. (VLSI) Syst. 16(12), 1727\u20131731 (2008)","key":"34_CR22","DOI":"10.1109\/TVLSI.2008.2001944"},{"issue":"5","key":"34_CR23","doi-asserted-by":"publisher","first-page":"1878","DOI":"10.1093\/bib\/bby061","volume":"20","author":"AS Rifaioglu","year":"2019","unstructured":"Rifaioglu, A.S., Atas, H., Martin, M.J., Cetin-Atalay, R., Atalay, V., Do\u011fan, T.: Recent applications of deep learning and machine intelligence on in silico drug discovery: methods, tools and databases. Brief. Bioinform. 20(5), 1878\u20131912 (2019)","journal-title":"Brief. Bioinform."},{"doi-asserted-by":"crossref","unstructured":"Schmookler, M.S., Nowka, K.J.: Leading zero anticipation and detection-a comparison of methods. In: Proceedings 15th IEEE Symposium on Computer Arithmetic, ARITH-15 2001, pp. 7\u201312. IEEE (2001)","key":"34_CR24","DOI":"10.1109\/ARITH.2001.930098"},{"key":"34_CR25","series-title":"Signals and Communication Technology","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-5779-3","volume-title":"Automatic Speech Recognition","author":"D Yu","year":"2015","unstructured":"Yu, D., Deng, L.: Automatic Speech Recognition. SCT, Springer, London (2015). https:\/\/doi.org\/10.1007\/978-1-4471-5779-3"},{"issue":"7","key":"34_CR26","doi-asserted-by":"publisher","first-page":"1035","DOI":"10.1109\/TC.2019.2895031","volume":"68","author":"H Zhang","year":"2019","unstructured":"Zhang, H., Chen, D., Ko, S.B.: Efficient multiple-precision floating-point fused multiply-add with mixed-precision support. IEEE Trans. Comput. 68(7), 1035\u20131048 (2019)","journal-title":"IEEE Trans. Comput."},{"issue":"1","key":"34_CR27","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/TC.2019.2936192","volume":"69","author":"H Zhang","year":"2019","unstructured":"Zhang, H., Chen, D., Ko, S.B.: New flexible multiple-precision multiply-accumulate unit for deep neural network training and inference. IEEE Trans. Comput. 69(1), 26\u201338 (2019)","journal-title":"IEEE Trans. Comput."},{"doi-asserted-by":"crossref","unstructured":"Zhang, H., Lee, H.J., Ko, S.B.: Efficient fixed\/floating-point merged mixed-precision multiply-accumulate unit for deep learning processors. In: 2018 IEEE International Symposium on Circuits and Systems (ISCAS), pp. 1\u20135. IEEE (2018)","key":"34_CR28","DOI":"10.1109\/ISCAS.2018.8351354"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-22677-9_34","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,11]],"date-time":"2024-10-11T22:41:44Z","timestamp":1728686504000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-22677-9_34"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031226762","9783031226779"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-22677-9_34","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"11 January 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Copenhagen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Denmark","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"91","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"36% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}