{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:23:50Z","timestamp":1750220630709,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,6,29]],"date-time":"2020-06-29T00:00:00Z","timestamp":1593388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001691","name":"Japan Society for the Promotion of Science","doi-asserted-by":"publisher","award":["18H05239, 18K18873"],"award-info":[{"award-number":["18H05239, 18K18873"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,6,29]]},"DOI":"10.1145\/3394277.3401860","type":"proceedings-article","created":{"date-parts":[[2020,6,18]],"date-time":"2020-06-18T23:03:38Z","timestamp":1592521418000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Low-Order Finite Element Solver with Small Matrix-Matrix Multiplication Accelerated by AI-Specific Hardware for Crustal Deformation Computation"],"prefix":"10.1145","author":[{"given":"Takuma","family":"Yamaguchi","sequence":"first","affiliation":[{"name":"The University of Tokyo, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kohei","family":"Fujita","sequence":"additional","affiliation":[{"name":"The University of Tokyo, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tsuyoshi","family":"Ichimura","sequence":"additional","affiliation":[{"name":"The University of Tokyo, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Akira","family":"Naruse","sequence":"additional","affiliation":[{"name":"NVIDIA corporation, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jack C.","family":"Wells","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Knoxville, Tennessee"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christopher J.","family":"Zimmer","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Knoxville, Tennessee"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tjerk P.","family":"Straatsma","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Knoxville, Tennessee"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Muneo","family":"Hori","sequence":"additional","affiliation":[{"name":"Japan Agency for Marine-Earth Science and Technology, Kanagawa, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lalith","family":"Maddegedara","sequence":"additional","affiliation":[{"name":"The University of Tokyo, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Naonori","family":"Ueda","sequence":"additional","affiliation":[{"name":"RIKEN, Kobe, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2020,6,29]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Giga-voxel computational morphogenesis for structural design. Nature 550, 7674","author":"Aage Niels","year":"2017","unstructured":"Niels Aage , Erik Andreassen , Boyan S Lazarov , and Ole Sigmund . 2017. Giga-voxel computational morphogenesis for structural design. Nature 550, 7674 ( 2017 ), 84. Niels Aage, Erik Andreassen, Boyan S Lazarov, and Ole Sigmund. 2017. Giga-voxel computational morphogenesis for structural design. Nature 550, 7674 (2017), 84."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2016.05.302"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2019.00022"},{"key":"e_1_3_2_1_4_1","unstructured":"Analog Computation in Flash Memory for Datacenter-scale AI Inference in a Small Chip [Online]. 2018. https:\/\/www.hotchips.org\/hc30\/2conf2.05_Mythic_Mythic_Hot_Chips_2018_V5.pdf.  Analog Computation in Flash Memory for Datacenter-scale AI Inference in a Small Chip [Online]. 2018. https:\/\/www.hotchips.org\/hc30\/2conf2.05_Mythic_Mythic_Hot_Chips_2018_V5.pdf."},{"key":"e_1_3_2_1_5_1","unstructured":"Arm's First-Generation Machine Learning Processor [Online]. 2018. https:\/\/www.hotchips.org\/hc30\/2conf2.07_ARM_ML_Processor_HC30_ARM_2018_08_17.pdf.  Arm's First-Generation Machine Learning Processor [Online]. 2018. https:\/\/www.hotchips.org\/hc30\/2conf2.07_ARM_ML_Processor_HC30_ARM_2018_08_17.pdf."},{"key":"e_1_3_2_1_6_1","unstructured":"Japan Hydrographic Association. 2013. JTOPO30 (30-second grid water depth data in Japan's coastal waters) [Online]. http:\/\/www.mirc.jha.jp\/products\/finished\/JTOPO30\/.  Japan Hydrographic Association. 2013. JTOPO30 (30-second grid water depth data in Japan's coastal waters) [Online]. http:\/\/www.mirc.jha.jp\/products\/finished\/JTOPO30\/."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1137\/17M1140819"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2010.05.002"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3093172.3093236"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1365-246X.2005.02594.x"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1137\/S1064827597323415"},{"key":"e_1_3_2_1_12_1","unstructured":"Google Announces Cloud TPU v2 Beta Availability for Google Cloud Platform [Online]. 2018. https:\/\/www.anandtech.com\/show\/12429\/google-cloud-announces-cloud-tpu-beta-availability.  Google Announces Cloud TPU v2 Beta Availability for Google Cloud Platform [Online]. 2018. https:\/\/www.anandtech.com\/show\/12429\/google-cloud-announces-cloud-tpu-beta-availability."},{"key":"e_1_3_2_1_13_1","unstructured":"GPUDirect [Online]. 2019. https:\/\/developer.nvidia.com\/gpudirect.  GPUDirect [Online]. 2019. https:\/\/developer.nvidia.com\/gpudirect."},{"key":"e_1_3_2_1_14_1","volume-title":"International Conference on Machine Learning. 1737--1746","author":"Gupta Suyog","year":"2015","unstructured":"Suyog Gupta , Ankur Agrawal , Kailash Gopalakrishnan , and Pritish Narayanan . 2015 . Deep learning with limited numerical precision . In International Conference on Machine Learning. 1737--1746 . Suyog Gupta, Ankur Agrawal, Kailash Gopalakrishnan, and Pritish Narayanan. 2015. Deep learning with limited numerical precision. In International Conference on Machine Learning. 1737--1746."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-93698-7_45"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00050"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00024-004-2548-8"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.epsl.2010.02.043"},{"volume-title":"The finite element method: linear static and dynamic finite element analysis","author":"Hughes Thomas JR","key":"e_1_3_2_1_19_1","unstructured":"Thomas JR Hughes . 2012. The finite element method: linear static and dynamic finite element analysis . Courier Corporation . Thomas JR Hughes. 2012. The finite element method: linear static and dynamic finite element analysis. Courier Corporation."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807674"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.7"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00052"},{"key":"e_1_3_2_1_23_1","volume-title":"Research Poster for SC19: International Conference for High Performance Computing, Networking, Storage and Analysis","author":"Ichimura Tsuyoshi","year":"2019","unstructured":"Tsuyoshi Ichimura , Kohei Fujita , Takuma Yamaguchi , Akira Naruse , Jack C. Wells , Christopher J. Zimmer , Tjerk P. Straatsma , Takane Hori , Simone Puel , Thorsten W. Becker , Muneo Hori , and Naonori Ueda . 2019 . 416-PFLOPS fast scalable implicit solver on low-ordered unstructured finite elements accelerated by 1.10-ExaFLOPS kernel with reformulated AI-like algorithm: For equation-based earthquake modeling . Research Poster for SC19: International Conference for High Performance Computing, Networking, Storage and Analysis (2019). Tsuyoshi Ichimura, Kohei Fujita, Takuma Yamaguchi, Akira Naruse, Jack C. Wells, Christopher J. Zimmer, Tjerk P. Straatsma, Takane Hori, Simone Puel, Thorsten W. Becker, Muneo Hori, and Naonori Ueda. 2019. 416-PFLOPS fast scalable implicit solver on low-ordered unstructured finite elements accelerated by 1.10-ExaFLOPS kernel with reformulated AI-like algorithm: For equation-based earthquake modeling. Research Poster for SC19: International Conference for High Performance Computing, Networking, Storage and Analysis (2019)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1785\/0120060175"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2014.09.003"},{"key":"e_1_3_2_1_26_1","volume-title":"Dissecting the NVIDIA Volta GPU architecture via microbenchmarking. arXiv preprint arXiv:1804.06826","author":"Jia Zhe","year":"2018","unstructured":"Zhe Jia , Marco Maggioni , Benjamin Staiger , and Daniele P Scarpazza . 2018. Dissecting the NVIDIA Volta GPU architecture via microbenchmarking. arXiv preprint arXiv:1804.06826 ( 2018 ). Zhe Jia, Marco Maggioni, Benjamin Staiger, and Daniele P Scarpazza. 2018. Dissecting the NVIDIA Volta GPU architecture via microbenchmarking. arXiv preprint arXiv:1804.06826 (2018)."},{"key":"e_1_3_2_1_27_1","first-page":"94720","article-title":"IEEE standard 754 for binary floating-point arithmetic","volume":"754","author":"Kahan William","year":"1996","unstructured":"William Kahan . 1996 . IEEE standard 754 for binary floating-point arithmetic . Lecture Notes on the Status of IEEE 754 , 94720 -- 91776 (1996), 11. William Kahan. 1996. IEEE standard 754 for binary floating-point arithmetic. Lecture Notes on the Status of IEEE 754, 94720--1776 (1996), 11.","journal-title":"Lecture Notes on the Status of IEEE"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1002\/jgrb.50265"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2018.00091"},{"key":"e_1_3_2_1_30_1","article-title":"Finite element model predictions of static deformation from dislocation sources in a subduction zone: Sensitivities to homogeneous, isotropic, Poisson-solid, and half-space assumptions","volume":"108","author":"Masterlark Timothy","year":"2003","unstructured":"Timothy Masterlark . 2003 . Finite element model predictions of static deformation from dislocation sources in a subduction zone: Sensitivities to homogeneous, isotropic, Poisson-solid, and half-space assumptions . Journal of Geophysical Research: Solid Earth 108 , B11 (2003). Timothy Masterlark. 2003. Finite element model predictions of static deformation from dislocation sources in a subduction zone: Sensitivities to homogeneous, isotropic, Poisson-solid, and half-space assumptions. Journal of Geophysical Research: Solid Earth 108, B11 (2003).","journal-title":"Journal of Geophysical Research: Solid Earth"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1513895.1513905"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1093\/gji\/ggt180"},{"key":"e_1_3_2_1_33_1","first-page":"31","article-title":"cuBLAS library. NVIDIA Corporation, Santa Clara","volume":"15","author":"NVIDIA.","year":"2008","unstructured":"NVIDIA. 2008 . cuBLAS library. NVIDIA Corporation, Santa Clara , California 15 , 27 (2008), 31 . NVIDIA. 2008. cuBLAS library. NVIDIA Corporation, Santa Clara, California 15, 27 (2008), 31.","journal-title":"California"},{"key":"e_1_3_2_1_34_1","unstructured":"NVIDIA. 2017. NVIDIA Tesla V100 GPU Architecture [Online]. http:\/\/images.nvidia.com\/content\/volta-architecture\/pdf\/volta-architecture-whitepaper.pdf.  NVIDIA. 2017. NVIDIA Tesla V100 GPU Architecture [Online]. http:\/\/images.nvidia.com\/content\/volta-architecture\/pdf\/volta-architecture-whitepaper.pdf."},{"key":"e_1_3_2_1_35_1","unstructured":"Geospatial Information Authority of Japan. 2010. GNSS earth observation network system [Online]. http:\/\/terras.gsi.go.jp\/geo_info\/geonet_top.html.  Geospatial Information Authority of Japan. 2010. GNSS earth observation network system [Online]. http:\/\/terras.gsi.go.jp\/geo_info\/geonet_top.html."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Yoshimitsu Okada. 1985. Surface deformation due to shear and tensile faults in a half-space. Bulletin of the seismological society of America 75 4(1985) 1135--1154.  Yoshimitsu Okada. 1985. Surface deformation due to shear and tensile faults in a half-space. Bulletin of the seismological society of America 75 4(1985) 1135--1154.","DOI":"10.1785\/BSSA0750041135"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00024-008-0325-9"},{"key":"e_1_3_2_1_38_1","volume-title":"Modeling Deep Learning Accelerator Enabled GPUs. In 2019 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). IEEE, 79--92","author":"Raihan Md Aamir","year":"2019","unstructured":"Md Aamir Raihan , Negar Goli , and Tor M Aamodt . 2019 . Modeling Deep Learning Accelerator Enabled GPUs. In 2019 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). IEEE, 79--92 . Md Aamir Raihan, Negar Goli, and Tor M Aamodt. 2019. Modeling Deep Learning Accelerator Enabled GPUs. In 2019 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). IEEE, 79--92."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807675"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1137\/0914028"},{"volume-title":"Iterative methods for sparse linear systems","author":"Saad Yousef","key":"e_1_3_2_1_41_1","unstructured":"Yousef Saad . 2003. Iterative methods for sparse linear systems . Vol. 82 . siam. Yousef Saad. 2003. Iterative methods for sparse linear systems. Vol. 82. siam."},{"key":"e_1_3_2_1_42_1","volume-title":"Deep convolutional neural networks for computer-aided detection: CNN architectures, dataset characteristics and transfer learning","author":"Shin Hoo-Chang","year":"2016","unstructured":"Hoo-Chang Shin , Holger R Roth , Mingchen Gao , Le Lu , Ziyue Xu , Isabella Nogues , Jianhua Yao , Daniel Mollura , and Ronald M Summers . 2016. Deep convolutional neural networks for computer-aided detection: CNN architectures, dataset characteristics and transfer learning . IEEE transactions on medical imaging 35, 5 ( 2016 ), 1285--1298. Hoo-Chang Shin, Holger R Roth, Mingchen Gao, Le Lu, Ziyue Xu, Isabella Nogues, Jianhua Yao, Daniel Mollura, and Ronald M Summers. 2016. Deep convolutional neural networks for computer-aided detection: CNN architectures, dataset characteristics and transfer learning. IEEE transactions on medical imaging 35, 5 (2016), 1285--1298."},{"key":"e_1_3_2_1_43_1","unstructured":"Japan Seismic Hazard Information Station. 2010. National Research Institute for Earth Science and Disaster Resilience [Online]. https:\/\/www.j-shis.bosai.go.jp\/download.  Japan Seismic Hazard Information Station. 2010. National Research Institute for Earth Science and Disaster Resilience [Online]. https:\/\/www.j-shis.bosai.go.jp\/download."},{"key":"e_1_3_2_1_44_1","unstructured":"Summit [Online]. 2018. https:\/\/www.olcf.ornl.gov\/olcf-resources\/compute-systems\/summit\/.  Summit [Online]. 2018. https:\/\/www.olcf.ornl.gov\/olcf-resources\/compute-systems\/summit\/."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2009.12.005"},{"key":"e_1_3_2_1_46_1","unstructured":"Using bfloat16 with tensorflow models [Online]. 2019. https:\/\/cloud.google.com\/ttpu\/docs\/bfloat16.  Using bfloat16 with tensorflow models [Online]. 2019. https:\/\/cloud.google.com\/ttpu\/docs\/bfloat16."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1002\/2017JB014620"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/0045-7825(85)90015-5"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3065870"}],"event":{"name":"PASC '20: Platform for Advanced Scientific Computing Conference","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","CSCS Swiss National Supercomputing Centre","ETH Zurich Federal Institute of Technology - University of Zurich"],"location":"Geneva Switzerland","acronym":"PASC '20"},"container-title":["Proceedings of the Platform for Advanced Scientific Computing Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394277.3401860","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3394277.3401860","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:01:41Z","timestamp":1750197701000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394277.3401860"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,29]]},"references-count":49,"alternative-id":["10.1145\/3394277.3401860","10.1145\/3394277"],"URL":"https:\/\/doi.org\/10.1145\/3394277.3401860","relation":{},"subject":[],"published":{"date-parts":[[2020,6,29]]},"assertion":[{"value":"2020-06-29","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}