{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T16:00:34Z","timestamp":1779292834421,"version":"3.51.4"},"reference-count":41,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"7","license":[{"start":{"date-parts":[[2017,7,1]],"date-time":"2017-07-01T00:00:00Z","timestamp":1498867200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100001700","name":"MEXT","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001700","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Development of Innovative Clean Energy","award":["22686086"],"award-info":[{"award-number":["22686086"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2017,7,1]]},"DOI":"10.1109\/tpds.2016.2633349","type":"journal-article","created":{"date-parts":[[2016,11,29]],"date-time":"2016-11-29T14:09:57Z","timestamp":1480428597000},"page":"1974-1988","source":"Crossref","is-referenced-by-count":12,"title":["Optimization of Fusion Kernels on Accelerators with Indirect or Strided Memory Access Patterns"],"prefix":"10.1109","volume":"28","author":[{"given":"Yuuichi","family":"Asahi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guillaume","family":"Latu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Takuya","family":"Ina","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yasuhiro","family":"Idomura","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Virginie","family":"Grandgirard","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xavier","family":"Garbet","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2008.5222004"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2014.142"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/HPCSim.2016.7568318"},{"key":"ref32","article-title":"An empirical study of Intel Xeon Phi","author":"fang","year":"0","journal-title":"CoRR"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-803819-2.00001-X"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"ref37","article-title":"The chronicles of Phi-part 5-plesiochronous phasing\n barrier-tiled_HT3","author":"dempsey","year":"2014"},{"key":"ref36","first-page":"87","article-title":"Plesiochronous phasing barriers","author":"dempsey","year":"2014","journal-title":"High Performance Parallelism Pearls Multicore and Many-Core Programming Approaches"},{"key":"ref35","first-page":"4","article-title":"A comparison of compiler tiling algorithms","author":"rivera","year":"2000","journal-title":"Proc ACM\/IEEE Conf Supercomputing"},{"key":"ref34","article-title":"Multicore-optimized wavefront diamond blocking for optimizing stencil updates","author":"malas","year":"2014"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.compfluid.2012.09.013"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/2304576.2304619"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2014.82"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-803819-2.00024-0"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-803819-2.00022-7"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2014.111"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.cnsns.2007.05.016"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2008.04.005"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1103\/RevModPhys.79.421"},{"key":"ref18","article-title":"Intel&#x00AE;Xeon&#x00AE; processor E5-2680 (20M Cache, 2.70 GHz, 8.00 GT\/s Intel QPI)","year":"0"},{"key":"ref19","article-title":"FUJITSU supercomputer PRIMEHPC FX100","year":"0"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1137\/0705041"},{"key":"ref4","first-page":"40","article-title":"Lattice QCD on Intel Xeon Phi coprocessors","author":"jo","year":"2013","journal-title":"Proc of the 28th Intl Supercomputing Conf"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/0021-9991(76)90053-X"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2009.5160980"},{"key":"ref6","article-title":"GPU accelerated applications","year":"0"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1006\/jcph.1998.5962"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TNB.2015.2403776"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2010.10.031"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1002\/jcc.20829"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/1094342013490973"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1115\/1.4001192"},{"key":"ref1","first-page":"117","article-title":"Scaling GYSELA code beyond 32k-cores on Blue Gene\/Q","author":"bigot","year":"2012","journal-title":"Proc Luminy"},{"key":"ref20","article-title":"Intel&#x00AE; Xeon Phi&#x2122; coprocessor 5110P (8GB, 1.053 GHz, 60 core)","year":"0"},{"key":"ref22","year":"2015"},{"key":"ref21","article-title":"TESLA K20X GPU Accelerator","year":"0"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2016.05.007"},{"key":"ref41","article-title":"Communication-avoiding Krylov subspace methods","author":"hoemmen","year":"2010"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1088\/0029-5515\/50\/4\/043002"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1051\/proc\/201653013"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/1122501.1122503"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/71\/7946232\/07762180.pdf?arnumber=7762180","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T11:40:56Z","timestamp":1641987656000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7762180\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7,1]]},"references-count":41,"journal-issue":{"issue":"7"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2016.2633349","relation":{},"ISSN":["1045-9219"],"issn-type":[{"value":"1045-9219","type":"print"}],"subject":[],"published":{"date-parts":[[2017,7,1]]}}}