{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T20:19:16Z","timestamp":1725913156419},"publisher-location":"Singapore","reference-count":21,"publisher":"Springer Singapore","isbn-type":[{"type":"print","value":"9789811064418"},{"type":"electronic","value":"9789811064425"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-981-10-6442-5_57","type":"book-chapter","created":{"date-parts":[[2017,10,5]],"date-time":"2017-10-05T05:54:47Z","timestamp":1507182887000},"page":"610-625","source":"Crossref","is-referenced-by-count":0,"title":["Experiences of Performance Optimization for Large Eddy Simulation on Intel MIC Platforms"],"prefix":"10.1007","author":[{"given":"Zhengxiong","family":"Hou","sequence":"first","affiliation":[]},{"given":"Chengwen","family":"Zhong","sequence":"additional","affiliation":[]},{"given":"Christian","family":"Perez","sequence":"additional","affiliation":[]},{"given":"Qing","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yunlan","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,10,6]]},"reference":[{"issue":"6","key":"57_CR1","doi-asserted-by":"crossref","first-page":"479","DOI":"10.1007\/s00607-013-0356-7","volume":"96","author":"QJ Li","year":"2014","unstructured":"Li, Q.J., Zhong, C.W., et al.: A parallel lattice Boltzmann method for large eddy simulation on multiple GPUs. Computing 96(6), 479\u2013501 (2014)","journal-title":"Computing"},{"key":"57_CR2","unstructured":"Top500 supercomputers. http:\/\/www.top500.org . Accessed 10 June 2017"},{"issue":"2","key":"57_CR3","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1109\/MM.2016.25","volume":"36","author":"A Sodani","year":"2016","unstructured":"Sodani, A., Gramunt, R., et al.: Knights landing: second-generation Intel Xeon Phi product. IEEE Micro 36(2), 34\u201346 (2016)","journal-title":"IEEE Micro"},{"key":"57_CR4","doi-asserted-by":"crossref","unstructured":"Dongarra, J.: The LINPACK benchmark: an explanation. In: Proceedings of 1st International Conference on Supercomputing, pp. 456\u2013474 (1989)","DOI":"10.1007\/3-540-18991-2_27"},{"key":"57_CR5","unstructured":"Gupta, V., Kim, H., Schwan, K.: Evaluating scalability of multi-threaded applications on a many-core platform. Technical report, Georgia Institute of Technology (2012)"},{"key":"57_CR6","doi-asserted-by":"crossref","unstructured":"Chen, X., et al.: Evaluating scalability of emerging multithreaded applications on commodity multicore server. In: International Conference on Information Technology, Computer Engineering and Management Sciences (ICM), pp. 332\u2013335 (2011)","DOI":"10.1109\/ICM.2011.373"},{"key":"57_CR7","doi-asserted-by":"crossref","unstructured":"Heirman, W., Carlson, T.E., Craeynest, K.V., Hur, I., Jaleel, A., Eeckhout, L.: Undersubscribed threading on clustered cache architectures. In: 20th IEEE International Symposium on High-Performance Computer Architecture, HPCA 2014, pp. 678\u2013689 (2014)","DOI":"10.1109\/HPCA.2014.6835975"},{"issue":"1","key":"57_CR8","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1109\/MCSE.2012.37","volume":"15","author":"R Deleon","year":"2013","unstructured":"Deleon, R., Jacobsen, D., Senocak, I.: Large-eddy simulations of turbulent incompressible flows on GPU clusters. Comput. Sci. Eng. 15(1), 26\u201333 (2013)","journal-title":"Comput. Sci. Eng."},{"key":"57_CR9","first-page":"1","volume":"1","author":"W Wang","year":"2014","unstructured":"Wang, W., Shangguan, Y.Q., et al.: Direct numerical simulation and large eddy simulation on a turbulent wall-bounded flow using lattice Boltzmann method and multiple GPUs. Math. Probl. Eng. 1, 1\u201310 (2014)","journal-title":"Math. Probl. Eng."},{"issue":"5","key":"57_CR10","doi-asserted-by":"crossref","first-page":"715","DOI":"10.1175\/BAMS-D-14-00114.1","volume":"96","author":"J Schalkwijk","year":"2015","unstructured":"Schalkwijk, J., et al.: Weather forecasting using GPU-based large-eddy simulations. Bull. Am. Meteor. Soc. 96(5), 715\u2013724 (2015)","journal-title":"Bull. Am. Meteor. Soc."},{"key":"57_CR11","doi-asserted-by":"crossref","unstructured":"Lopez-Morales, M.R., et al.: Verification and validation of HiFiLES: a high-order LES unstructured solver on multi-GPU platforms. In: 32nd AIAA Applied Aero dynamics Conference (2014)","DOI":"10.2514\/6.2014-3168"},{"issue":"1","key":"57_CR12","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1007\/s11227-014-1245-3","volume":"70","author":"YG Che","year":"2014","unstructured":"Che, Y.G., et al.: Micro architectural performance comparison of Intel Knights Corner and Intel Sandy Bridge with CFD applications. J. Supercomput. 70(1), 321\u2013348 (2014)","journal-title":"J. Supercomput."},{"key":"57_CR13","doi-asserted-by":"crossref","unstructured":"Natarajan, C., Beckmann, C., et al.: Simulating stencil-based application on future Xeon Phi processor. In: Proceedings of 6th International Workshop in Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems, PMBS 2015, SC 2015 (2015)","DOI":"10.1145\/2832087.2832096"},{"key":"57_CR14","unstructured":"McCalpin, J.D.: Memory bandwidth and machine balance in current high performance computers. In: IEEE Computer Society Technical Committee on Computer Architecture (TCCA) Newsletter (1995)"},{"key":"57_CR15","volume-title":"Intel Xeon Phi Co-processor High-Performance Programming","author":"J Jeffers","year":"2013","unstructured":"Jeffers, J., Reinders, J.: Intel Xeon Phi Co-processor High-Performance Programming. Morgan Kaufmann, Burlington (2013)"},{"key":"57_CR16","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4302-5927-5","volume-title":"Intel Xeon Phi Co-processor Architecture and Tools - The Guide for Application Developers","author":"R Rahman","year":"2013","unstructured":"Rahman, R.: Intel Xeon Phi Co-processor Architecture and Tools - The Guide for Application Developers. Apress Open, New York (2013)"},{"issue":"3","key":"57_CR17","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1177\/1094342014524807","volume":"28","author":"Y Yang","year":"2014","unstructured":"Yang, Y., et al.: Evaluating multi-core and many-core architectures through accelerating the three dimensional Lax-Wendroff correction stencil. Int. J. High Perform. Comput. Appl. 28(3), 301\u2013318 (2014)","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"57_CR18","volume-title":"Using MPI: Portable Parallel Programming with the Message-Passing Interface","author":"W Gropp","year":"1999","unstructured":"Gropp, W., Lusk, E., Skjellum, A.: Using MPI: Portable Parallel Programming with the Message-Passing Interface. MIT Press, Cambridge (1999)"},{"key":"57_CR19","volume-title":"Using OpenMP: Portable Shared Memory Parallel Programming (Scientific and Engineering Computation)","author":"C Barbara","year":"2007","unstructured":"Barbara, C., Gabriele, J., van der Ruud, P.: Using OpenMP: Portable Shared Memory Parallel Programming (Scientific and Engineering Computation). MIT Press, Cambridge (2007)"},{"key":"57_CR20","doi-asserted-by":"crossref","DOI":"10.1201\/EBK1439811924","volume-title":"Introduction to High-Performance Computing for Scientists and Engineers","author":"G Hager","year":"2010","unstructured":"Hager, G., Wellein, G.: Introduction to High-Performance Computing for Scientists and Engineers. CRC Press, Boca Raton (2010)"},{"key":"57_CR21","doi-asserted-by":"crossref","unstructured":"Hou, Z.X., Perez, C.: Performance evaluation and tuning of 2D Jacobi iteration on many-core machines. In: 2013 IEEE International Conference on High Performance Computing and Communications, pp. 603\u2013610, IEEE, Zhangjiajie (2013)","DOI":"10.1109\/HPCC.and.EUC.2013.91"}],"container-title":["Communications in Computer and Information Science","Parallel Architecture, Algorithm and Programming"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-10-6442-5_57","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,4]],"date-time":"2019-10-04T08:10:16Z","timestamp":1570176616000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-981-10-6442-5_57"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9789811064418","9789811064425"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-981-10-6442-5_57","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2017]]}}}