{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T14:37:33Z","timestamp":1726065453758},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030389901"},{"type":"electronic","value":"9783030389918"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-38991-8_8","type":"book-chapter","created":{"date-parts":[[2020,1,22]],"date-time":"2020-01-22T01:34:32Z","timestamp":1579656872000},"page":"107-121","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Accelerating Lattice Boltzmann Method by Fully Exposing Vectorizable Loops"],"prefix":"10.1007","author":[{"given":"Bin","family":"Qu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Song","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hailong","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiajun","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qian","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weiguo","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,1,22]]},"reference":[{"key":"8_CR1","unstructured":"AOS and soa. \nhttps:\/\/en.wikipedia.org\/wiki\/AOS_and_SOA\n\n. Accessed 1 Apr 2019"},{"key":"8_CR2","unstructured":"Intel$$\\textregistered $$ c++ compiler 19.0 developer guide and reference. \nhttps:\/\/software.intel.com\/en-us\/cpp-compiler-developer-guide-and-reference-vectorization-and-loops\n\n. Accessed 6 June 2019"},{"key":"8_CR3","unstructured":"openlbmflow. \nhttps:\/\/sourceforge.net\/projects\/lbmflow\n\n. Accessed 15 June 2019"},{"key":"8_CR4","unstructured":"Pluto - an automatic parallelizer and locality optimizer for affine loop nests. \nhttp:\/\/pluto-compiler.sourceforge.net\n\n. Accessed 7 June 2019"},{"key":"8_CR5","unstructured":"Acharya, A., Bondhugula, U.: PLUTO+: near-complete modeling of affine transformations for parallelism and locality. In: Proceedings of the 20th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP 2015, San Francisco, CA, USA, 7\u201311 February, 2015, pp. 54\u201364 (2015)"},{"key":"8_CR6","doi-asserted-by":"publisher","first-page":"757","DOI":"10.1109\/PGEC.1966.264565","volume":"5","author":"AJ Bernstein","year":"1966","unstructured":"Bernstein, A.J.: Analysis of programs for parallel processing. IEEE Trans. Electron. Comput. 5, 757\u2013763 (1966)","journal-title":"IEEE Trans. Electron. Comput."},{"key":"8_CR7","unstructured":"Bondhugula, U., Hartono, A., Ramanujam, J., Sadayappan, P.: A practical automatic polyhedral program optimization system. In: ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI), June 2008"},{"issue":"1","key":"8_CR8","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1146\/annurev.fluid.30.1.329","volume":"30","author":"S Chen","year":"1998","unstructured":"Chen, S., Doolen, G.D.: Lattice boltzmann method for fluid flows. Ann. Rev. Fluid Mechan. 30(1), 329\u2013364 (1998)","journal-title":"Ann. Rev. Fluid Mechan."},{"issue":"3","key":"8_CR9","first-page":"4038","volume":"5","author":"PS Devan","year":"2014","unstructured":"Devan, P.S., Kamat, R.: A review-loop dependence analysis for parallelizing compiler. Int. J. Comput. Sci. Inf. Technol. 5(3), 4038\u20134046 (2014)","journal-title":"Int. J. Comput. Sci. Inf. Technol."},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Di, P., Ye, D., Su, Y., Sui, Y., Xue, J.: Automatic parallelization of tiled loop nests with enhanced fine-grained parallelism on gpus. In: 2012 41st International Conference on Parallel Processing, pp. 350\u2013359. IEEE (2012)","DOI":"10.1109\/ICPP.2012.19"},{"key":"8_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"122","DOI":"10.1007\/978-3-030-05051-1_9","volume-title":"Algorithms and Architectures for Parallel Processing","author":"X Du","year":"2018","unstructured":"Du, X., et al.: Comparative study of distributed deep learning tools on supercomputers. In: Vaidya, J., Li, J. (eds.) ICA3PP 2018. LNCS, vol. 11334, pp. 122\u2013137. Springer, Cham (2018). \nhttps:\/\/doi.org\/10.1007\/978-3-030-05051-1_9"},{"issue":"5","key":"8_CR12","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1007\/BF01407835","volume":"21","author":"P Feautrier","year":"1992","unstructured":"Feautrier, P.: Some efficient solutions to the affine scheduling problem. i. one-dimensional time. Int. J. Parallel Program. 21(5), 313\u2013347 (1992)","journal-title":"Int. J. Parallel Program."},{"issue":"6","key":"8_CR13","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1007\/BF01379404","volume":"21","author":"P Feautrier","year":"1992","unstructured":"Feautrier, P.: Some efficient solutions to the affine scheduling problem. part ii. multidimensional time. Int. J. Parallel Program. 21(6), 389\u2013420 (1992)","journal-title":"Int. J. Parallel Program."},{"key":"8_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"421","DOI":"10.1007\/978-3-030-05051-1_29","volume-title":"Algorithms and Architectures for Parallel Processing","author":"Y Feng","year":"2018","unstructured":"Feng, Y., Tang, J., Wang, C., Xie, J.: CuAPSS: a hybrid CUDA solution for all pairs similarity search. In: Vaidya, J., Li, J. (eds.) ICA3PP 2018. LNCS, vol. 11334, pp. 421\u2013436. Springer, Cham (2018). \nhttps:\/\/doi.org\/10.1007\/978-3-030-05051-1_29"},{"key":"8_CR15","unstructured":"Kong, M., Veras, R., Stock, K., Franchetti, F., Pouchet, L., Sadayappan, P.: When polyhedral transformations meet SIMD code generation. In: ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI 2013, Seattle, WA, USA, 16\u201319 June, 2013, pp. 127\u2013138 (2013)"},{"issue":"01n02","key":"8_CR16","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1142\/S0217979203017059","volume":"17","author":"M Krafczyk","year":"2003","unstructured":"Krafczyk, M., T\u00f6lke, J., Luo, L.S.: Large-eddy simulations with a multiple-relaxation-time lbe model. Int. J. Modern Phys. B 17(01n02), 33\u201339 (2003)","journal-title":"Int. J. Modern Phys. B"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Lim, A.W., Lam, M.S.: Maximizing parallelism and minimizing synchronization with affine transforms. In: Proceedings of the 24th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, pp. 201\u2013214. ACM (1997)","DOI":"10.1145\/263699.263719"},{"key":"8_CR18","doi-asserted-by":"crossref","unstructured":"Liu, S., Zou, N., Cui, Y., Wu, W.: Accelerating the parallelization of lattice boltzmann method by exploiting the temporal locality. In: 2017 IEEE International Symposium on Parallel and Distributed Processing with Applications and 2017 IEEE International Conference on Ubiquitous Computing and Communications (ISPA\/IUCC), pp. 1186\u20131193. IEEE (2017)","DOI":"10.1109\/ISPA\/IUCC.2017.00178"},{"key":"8_CR19","unstructured":"Pouchet, L.N.: Interative optimization in the polyhedral model. Ph.D. thesis, University of Paris-Sud 11, Orsay, France, January 2010"},{"issue":"6","key":"8_CR20","doi-asserted-by":"publisher","first-page":"479","DOI":"10.1209\/0295-5075\/17\/6\/001","volume":"17","author":"Y Qian","year":"1992","unstructured":"Qian, Y., d\u2019Humi\u00e8res, D., Lallemand, P.: Lattice BGK models for navier-stokes equation. EPL (Europhys. Lett.) 17(6), 479 (1992)","journal-title":"EPL (Europhys. Lett.)"},{"key":"8_CR21","volume-title":"Pentium Pro and Pentium II System Architecture","author":"T Shanley","year":"1998","unstructured":"Shanley, T.: Pentium Pro and Pentium II System Architecture. Addison-Wesley Professional, Boston (1998)"},{"key":"8_CR22","doi-asserted-by":"crossref","unstructured":"Tran, N.P., Lee, M., Choi, D.H.: Memory-efficient parallelization of 3D lattice boltzmann flow solver on a gpu. In: 2015 IEEE 22nd International Conference on High Performance Computing (HiPC), pp. 315\u2013324. IEEE (2015)","DOI":"10.1109\/HiPC.2015.49"},{"key":"8_CR23","doi-asserted-by":"crossref","unstructured":"Trifunovic, K., Nuzman, D., Cohen, A., Zaks, A., Rosen, I.: Polyhedral-model guided loop-nest auto-vectorization. In: 2009 18th International Conference on Parallel Architectures and Compilation Techniques, pp. 327\u2013337. IEEE (2009)","DOI":"10.1109\/PACT.2009.18"},{"key":"8_CR24","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-4337-4","volume-title":"Loop Tiling for Parallelism","author":"J Xue","year":"2012","unstructured":"Xue, J.: Loop Tiling for Parallelism, vol. 575. Springer Science & Business Media, New York (2012). \nhttps:\/\/doi.org\/10.1007\/978-1-4615-4337-4"},{"key":"8_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1007\/978-3-030-05051-1_24","volume-title":"Algorithms and Architectures for Parallel Processing","author":"W Zhang","year":"2018","unstructured":"Zhang, W., Zhang, L., Chen, Y.: Asynchronous parallel Dijkstra\u2019s algorithm on intel xeon phi processor. In: Vaidya, J., Li, J. (eds.) ICA3PP 2018. LNCS, vol. 11334, pp. 337\u2013357. Springer, Cham (2018). \nhttps:\/\/doi.org\/10.1007\/978-3-030-05051-1_24"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-38991-8_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,1,22]],"date-time":"2020-01-22T01:46:36Z","timestamp":1579657596000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-38991-8_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030389901","9783030389918"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-38991-8_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"22 January 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Melbourne, VIC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 December 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 December 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/nsclab.org\/ica3pp2019\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"251","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"73","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5.8","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}