{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T07:43:21Z","timestamp":1769499801081,"version":"3.49.0"},"publisher-location":"Berlin, Heidelberg","reference-count":25,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642387494","type":"print"},{"value":"9783642387500","type":"electronic"}],"license":[{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-38750-0_4","type":"book-chapter","created":{"date-parts":[[2013,6,10]],"date-time":"2013-06-10T01:26:27Z","timestamp":1370827587000},"page":"40-54","source":"Crossref","is-referenced-by-count":27,"title":["Lattice QCD on Intel\u00ae Xeon PhiTM Coprocessors"],"prefix":"10.1007","author":[{"given":"B\u00e1lint","family":"Jo\u00f3","sequence":"first","affiliation":[]},{"given":"Dhiraj D.","family":"Kalamkar","sequence":"additional","affiliation":[]},{"given":"Karthikeyan","family":"Vaidyanathan","sequence":"additional","affiliation":[]},{"given":"Mikhail","family":"Smelyanskiy","sequence":"additional","affiliation":[]},{"given":"Kiran","family":"Pamnany","sequence":"additional","affiliation":[]},{"given":"Victor W.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Pradeep","family":"Dubey","sequence":"additional","affiliation":[]},{"suffix":"III","given":"William","family":"Watson","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"6","key":"4_CR1","doi-asserted-by":"publisher","first-page":"409","DOI":"10.6028\/jres.049.044","volume":"49","author":"M.R. Hestenes","year":"1952","unstructured":"Hestenes, M.R., Stiefel, E.: Methods of Conjugate Gradients for Solving Linear Systems. Journal of Research of the National Bureau of Standards\u00a049(6), 409\u2013436 (1952)","journal-title":"Journal of Research of the National Bureau of Standards"},{"key":"4_CR2","unstructured":"Creutz, M.: Quarks, Gluons and Lattices. Cambridge Monographs on Mathematical Physics, 169 p. Univ. Pr., Cambridge (1983)"},{"key":"4_CR3","first-page":"69","volume-title":"New Phenomena in Subnuclear Physics","author":"K.G. Wilson","year":"1975","unstructured":"Wilson, K.G.: Quarks and Strings on a Lattice. In: Zichichi, A. (ed.) New Phenomena in Subnuclear Physics, p. 69. Plenum Press, New York (1975)"},{"issue":"2","key":"4_CR4","doi-asserted-by":"publisher","first-page":"631","DOI":"10.1137\/0913035","volume":"13","author":"H.A. van der Vorst","year":"1992","unstructured":"van der Vorst, H.A.: Bi-CGSTAB: A Fast and Smoothly Converging Variant of Bi-CG for the Solution of Nonsymmetric Linear Systems. SIAM Journal on Scientific and Statistical Computing\u00a013(2), 631\u2013644 (1992)","journal-title":"SIAM Journal on Scientific and Statistical Computing"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Smelyanskiy, M., Vaidyanathan, K., Choi, J., Jo\u00f3, B., Chhugani, J., Clark, M.A., Dubey, P.: High-performance lattice QCD for multi-core based parallel systems using a cache-friendly hybrid threaded-MPI approach. In: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2011, pp. 69:1\u201369:11 (2011)","DOI":"10.1145\/2063384.2063477"},{"key":"4_CR6","doi-asserted-by":"publisher","first-page":"1517","DOI":"10.1016\/j.cpc.2010.05.002","volume":"181","author":"M.A. Clark","year":"2010","unstructured":"Clark, M.A., Babich, R., Barros, K., Brower, R.C., Rebbi, C.: Solving Lattice QCD systems of equations using mixed precision solvers on GPUs. Comput. Phys. Commun.\u00a0181, 1517\u20131528 (2010)","journal-title":"Comput. Phys. Commun."},{"key":"4_CR7","unstructured":"OpenMP Architecture Review Board: OpenMP Application Program Interface (2011)"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Nguyen, A.D., Satish, N., Chhugani, J., Kim, C., Dubey, P.: 3.5-D blocking optimization for stencil computations on modern CPUs and GPUs. In: SC, pp. 1\u201313 (2010)","DOI":"10.1109\/SC.2010.2"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Babich, R., Clark, M.A., Jo\u00f3, B.: Parallelizing the QUDA Library for Multi-GPU Calculations in Lattice Quantum Chromodynamics. In: Proceedings of the 2010 ACM\/IEEE International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2010, pp. 1\u201311 (2010)","DOI":"10.1109\/SC.2010.40"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Boyle, P.A.: The BlueGene\/Q supercomputer. PoS LATTICE 2012, 020 (2012)","DOI":"10.22323\/1.164.0020"},{"key":"4_CR11","unstructured":"MPI: A Message-Passing Interface Standard (March 1994)"},{"issue":"1","key":"4_CR12","first-page":"12034","volume":"78","author":"B. Jo\u00f3","year":"2007","unstructured":"Jo\u00f3, B.: SciDAC-2 software infrastructure for lattice QCD. Journal of Physics: Conference Series\u00a078(1), 012034 (2007)","journal-title":"Journal of Physics: Conference Series"},{"issue":"5","key":"4_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1147\/JRD.2009.5429074","volume":"53","author":"S. Pakin","year":"2009","unstructured":"Pakin, S., Lang, M., Kerbyson, D.J.: The reverse-acceleration model for programming petascale hybrid systems. IBM Journal of Research and Development\u00a053(5), 8:1\u20138:15 (2009)","journal-title":"IBM Journal of Research and Development"},{"key":"4_CR14","doi-asserted-by":"crossref","unstructured":"Heinecke, A., et al.: Design and Implementation of the Linpack Benchmark for Single and Multi-Node Systems Based on Intel(R) Xeon Phi(TM) Coprocessor. In: Proceedings of IPDPS Conference (2013)","DOI":"10.1109\/IPDPS.2013.113"},{"key":"4_CR15","doi-asserted-by":"crossref","unstructured":"Strzodka, R., G\u00f6ddeke, D.: Pipelined mixed precision algorithms on FPGAs for fast and accurate PDE solvers from low precision components. In: IEEE Symposium on Field-Programmable Custom Computing Machines (FCCM 2006), pp. 259\u2013268 (April 2006)","DOI":"10.1109\/FCCM.2006.57"},{"key":"4_CR16","first-page":"1","volume-title":"Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, SC 2012","author":"J. Doi","year":"2012","unstructured":"Doi, J.: Peta-scale lattice quantum chromodynamics on a blue gene\/Q supercomputer. In: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, SC 2012, pp. 1\u201345. IEEE Computer Society Press, Los Alamitos (2012)"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Alexandru, A., Lujan, M., Pelissier, C., Gamari, B., Lee, F.X.: Efficient implementation of the overlap operator on multi-GPUs (2011)","DOI":"10.1109\/SAAHPC.2011.13"},{"key":"4_CR18","unstructured":"Kowalski, A., Shen, X.: Implementing the Dslash Operator in OpenCL. College of William and Mary Technical Report (2010)"},{"key":"4_CR19","doi-asserted-by":"crossref","unstructured":"Bach, M., Lindenstruth, V., Philipsen, O., Pinke, C.: Lattice QCD based on OpenCL (2012)","DOI":"10.1016\/j.cpc.2013.03.020"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Clark, M.A., Babich, R.: High-efficiency lattice QCD computations on the fermi architecture. In: Innovative Parallel Computing (InPar), pp. 1\u20139 (May 2012)","DOI":"10.1109\/InPar.2012.6339591"},{"key":"4_CR21","first-page":"1","volume-title":"Proceedings of the 1998 ACM\/IEEE Conference on Supercomputing (CDROM), Supercomputing 1998","author":"D. Chen","year":"1998","unstructured":"Chen, D., et al.: QCDSP machines: design, performance and cost. In: Proceedings of the 1998 ACM\/IEEE Conference on Supercomputing (CDROM), Supercomputing 1998, pp. 1\u20136. IEEE Computer Society, Washington, DC (1998)"},{"key":"4_CR22","volume-title":"Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing, SC 2006","author":"P. Vranas","year":"2006","unstructured":"Vranas, P., et al.: The BlueGene\/L supercomputer and quantum ChromoDynamics. In: Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing, SC 2006. ACM, New York (2006)"},{"key":"4_CR23","doi-asserted-by":"crossref","unstructured":"Boyle, P.A.: The BAGEL assembler generation library. Computer Physics Communications\u00a0180(12), 2739\u20132748 (2009) 40 YEARS OF CPC: A celebratory issue focused on quality software for high performance, grid and novel computing architectures","DOI":"10.1016\/j.cpc.2009.08.010"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Pochinsky, A.: Writing efficient QCD code made simpler: QA(0). PoS LATTICE 2008, 040 (2008)","DOI":"10.22323\/1.066.0040"},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Chen, J., Watson, W., Mao, W.: GMH: A Message Passing Toolkit for GPU Clusters. In: 2010 IEEE 16th International Conference on Parallel and Distributed Systems (ICPADS), pp. 35\u201342 (December 2010)","DOI":"10.1109\/ICPADS.2010.35"}],"container-title":["Lecture Notes in Computer Science","Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-38750-0_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,12,17]],"date-time":"2021-12-17T09:11:20Z","timestamp":1639732280000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-38750-0_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013]]},"ISBN":["9783642387494","9783642387500"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-38750-0_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013]]}}}