{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T20:21:22Z","timestamp":1743020482825,"version":"3.40.3"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030870485"},{"type":"electronic","value":"9783030870492"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-87049-2_4","type":"book-chapter","created":{"date-parts":[[2022,3,3]],"date-time":"2022-03-03T05:04:20Z","timestamp":1646283860000},"page":"93-120","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Efficient Parallel Implementation of Cellular Automata and Stencil Computations in Current Processors"],"prefix":"10.1007","author":[{"given":"Fernando","family":"Diaz-del-Rio","sequence":"first","affiliation":[]},{"given":"Daniel","family":"Cagigas-Mu\u00f1iz","sequence":"additional","affiliation":[]},{"given":"Jose Luis","family":"Guisado-Lizar","sequence":"additional","affiliation":[]},{"given":"Jose Luis","family":"Sevillano-Ramos","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,3,3]]},"reference":[{"volume-title":"Simulating Complex Systems by Cellular Automata","year":"2010","key":"4_CR1","unstructured":"Hoekstra, A.G., Kroc, J., Sloot, P. (eds.): Simulating Complex Systems by Cellular Automata. Springer, Berlin, Heidelberg (2010)"},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Bajz\u00e1t, T., Hajnal, E.: Cell automaton modelling algorithms: implementation and testing in GPU systems. In: INES 2011, 15th International Conference on Intelligent Engineering Systems (2011)","DOI":"10.1109\/INES.2011.5954741"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Balasalle, J., Lopez, M., Rutherford, M.: Optimizing Memory Access Patterns for Cellular Automata on GPUs, pp. 67\u201375. Elsevier\u2013Morgan Kaufmann\u2013NVIDIA (2011)","DOI":"10.1016\/B978-0-12-385963-1.00006-X"},{"key":"4_CR4","doi-asserted-by":"crossref","unstructured":"Bandman, O.: Using multi core computers for implementing cellular automata systems. Lect.ure Notes Comput. Sci. 6873(1), 140\u2013151 (2011)","DOI":"10.1007\/978-3-642-23178-0_12"},{"key":"4_CR5","doi-asserted-by":"publisher","first-page":"189","DOI":"10.3390\/electronics9010189","volume":"9","author":"D Cagigas-Mu\u00f1iz","year":"2020","unstructured":"Cagigas-Mu\u00f1iz, D., Diaz-del Rio, F., L\u00f3pez-Torres, M., Jim\u00e9nez-Morales, F., Guisado, J.L.: Developing efficient discrete simulations on multicore and GPU architectures. Electronics 9, 189 (2020). https:\/\/doi.org\/10.3390\/electronics9010189","journal-title":"Electronics"},{"issue":"4","key":"4_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2842615","volume":"12","author":"R Cattaneo","year":"2015","unstructured":"Cattaneo, R., Natale, G., Sicignano, C., Sciuto, D., Santambrogio, M.D.: On how to accelerate iterative stencil loops: a scalable streaming-based approach. ACM Trans. Archit. Code Optim. 12(4), 1\u201326 (2015)","journal-title":"ACM Trans. Archit. Code Optim."},{"key":"4_CR7","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511549755","volume-title":"Cellular Automata Modeling of Physical Systems","author":"B Chopard","year":"1998","unstructured":"Chopard, B., Droz, M.: Cellular Automata Modeling of Physical Systems. Cambridge University Press, Cambridge, MA, USA (1998)"},{"key":"4_CR8","doi-asserted-by":"publisher","first-page":"192","DOI":"10.1007\/3-540-55984-1_18","volume-title":"Compiler Construction","author":"E Duesterwald","year":"1992","unstructured":"Duesterwald, E., Gupta, R., Soffa, M.L.: Register pipelining: an integrated approach to register allocation for scalar and subscripted variables. In: Kastens, U., Pfahler, P. (eds.) Compiler Construction, pp. 192\u2013206. Springer, Berlin, Heidelberg (1992)"},{"key":"4_CR9","volume-title":"Winning Ways for your Mathematical Plays","author":"ER Berlekamp","year":"2001","unstructured":"Berlekamp, E.R., Conway, J.H., Guy, R.K.: Winning Ways for your Mathematical Plays, 2nd edn. A K Peters\/CRC Press, New York, USA (2001)","edition":"2"},{"issue":"4","key":"4_CR10","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1038\/scientificamerican1070-120","volume":"223","author":"M Gardner","year":"1970","unstructured":"Gardner, M.: Mathematical games: the fantastic combinations of John Conway\u2019s new solitaire game & \u201cLife\u2019\u2019. Sci. Am. 223(4), 120\u2013123 (1970)","journal-title":"Sci. Am."},{"key":"4_CR11","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1016\/j.jpdc.2014.10.011","volume":"77","author":"MJ Gibson","year":"2015","unstructured":"Gibson, M.J., Keedwell, E.C., Savi\u0107, D.A.: An investigation of the efficient implementation of cellular automata on multi-core CPU and GPU hardware. J. Parallel Distrib. Comput. 77, 11\u201325 (2015)","journal-title":"J. Parallel Distrib. Comput."},{"issue":"Suppl. 1","key":"4_CR12","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1142\/S0219525907001100","volume":"10","author":"J Guisado","year":"2007","unstructured":"Guisado, J., Jim\u00e9nez-Morales, F., Fern\u00e1ndez-de Vega, F.: Cellular automata and cluster computing: an application to the simulation of laser dynamics. Adv. Complex Syst. 10(Suppl. 1), 167\u2013190 (2007)","journal-title":"Adv. Complex Syst."},{"key":"4_CR13","volume-title":"Computer Architecture, Sixth Edition: A Quantitative Approach","author":"JL Hennessy","year":"2017","unstructured":"Hennessy, J.L., Patterson, D.A.: Computer Architecture, Sixth Edition: A Quantitative Approach, 6th edn. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA (2017)","edition":"6"},{"key":"4_CR14","unstructured":"Hwu, W.m.: GPU Computing Gems Jade Edition, 1st edn. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA (2011)"},{"key":"4_CR15","doi-asserted-by":"publisher","DOI":"10.1142\/4702","volume-title":"Cellular Automata: A Discrete Universe","author":"A Ilachinski","year":"2001","unstructured":"Ilachinski, A.: Cellular Automata: A Discrete Universe. World Scientific, Singapore (2001)"},{"issue":"1","key":"4_CR16","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/L-CA.2013.6","volume":"13","author":"A Ilic","year":"2014","unstructured":"Ilic, A., Pratas, F., Sousa, L.: Cache-aware roofline model: upgrading the loft. IEEE Comput. Archit. Lett. 13(1), 21\u201324 (2014). https:\/\/doi.org\/10.1109\/L-CA.2013.6","journal-title":"IEEE Comput. Archit. Lett."},{"key":"4_CR17","unstructured":"Intel: Intel intrinsics guide. https:\/\/software.intel.com\/sites\/landingpage\/IntrinsicsGuide\/"},{"issue":"1\u20133","key":"4_CR18","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.tcs.2004.11.021","volume":"334","author":"J Kari","year":"2005","unstructured":"Kari, J.: Theory of cellular automata: a survey. Theor. Comput. Sci. 334(1\u20133), 3\u201333 (2005)","journal-title":"Theor. Comput. Sci."},{"key":"4_CR19","unstructured":"Kirk, D.B., Hwu, W.m.W.: Programming Massively Parallel Processors: A Hands-on Approach. Morgan Kaufmann Publishers, Burlington, MA (2010)"},{"key":"4_CR20","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1007\/978-3-319-92040-5_12","volume-title":"High Performance Computing","author":"T Koskela","year":"2018","unstructured":"Koskela, T., Matveev, Z., Yang, C., Adedoyin, A., Belenov, R., Thierry, P., Zhao, Z., Gayatri, R., Shan, H., Oliker, L., Deslippe, J., Green, R., Williams, S.: A novel multi-level integrated roofline model approach for performance characterization. In: Yokota, R., Weiland, M., Keyes, D., Trinitis, C. (eds.) High Performance Computing, pp. 226\u2013245. Springer, Cham (2018)"},{"issue":"6","key":"4_CR21","doi-asserted-by":"publisher","first-page":"975","DOI":"10.1145\/1034774.1034777","volume":"26","author":"Z Li","year":"2004","unstructured":"Li, Z., Song, Y.: Automatic tiling of iterative stencil loops. ACM Trans. Progr. Lang. Syst. 26(6), 975\u20131028 (2004)","journal-title":"ACM Trans. Progr. Lang. Syst."},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Linares-Barranco, A., Sevillano, J., Obaidat, M.S.: AER filtering using glider: VHDL cellular automata description. In: 15th IEEE International Conference on Electronics, Circuits and Systems, pp. 614\u2013617 (2008)","DOI":"10.1109\/ICECS.2008.4674928"},{"key":"4_CR23","unstructured":"Lopez-Torres, M., Guisado, J., Jimenez-Morales, F., Diaz-del Rio, F.: GPU-based cellular automata simulations of laser dynamics. In: Proceedings of the XXIII Jornadas de Paralelismo: Jornadas SARTECO 2012, pp. 261\u2013266. SARTECO, Elche (2012). http:\/\/www.jornadassarteco.org\/js2012\/papers\/paper_151.pdf"},{"key":"4_CR24","doi-asserted-by":"publisher","unstructured":"Matsumura, K., Zohouri, H., Wahib, M., Endo, T., Matsuoka, S.: AN5D: automated stencil framework for high-degree temporal blocking on GPUS. In: International Symposium on Code Generation and Optimization, pp. 199\u2013211 (2020). https:\/\/doi.org\/10.1145\/3368826.3377904","DOI":"10.1145\/3368826.3377904"},{"key":"4_CR25","unstructured":"Mill\u00f1in, E., Mart\u00ednez, P., Gil\u00a0Costa, G., Piccoli, M., Printista, A., Bederian, C., Garc\u00eda\u00a0Garino, C., Bringa, E.: Parallel implementation of a cellular automata in a hybrid CPU\/GPU environment. In: XVIII Congreso Argentino de Ciencias de la Computaci\u00f3n, pp. 184\u2013193 (2013)"},{"key":"4_CR26","volume-title":"Theory of Self-reproducing Automata","author":"J von Neumann","year":"1966","unstructured":"von Neumann, J.: Theory of Self-reproducing Automata. University of Illinois Press, Urbana (1966)"},{"key":"4_CR27","doi-asserted-by":"crossref","unstructured":"Nguyen, A.D., Satish, N., Chhugani, J., Kim, C., Dubey, P.: 3.5-D blocking optimization for stencil computations on modern CPUS and GPUS . In: SC, pp. 1\u201313. IEEE (2010)","DOI":"10.1109\/SC.2010.2"},{"issue":"1","key":"4_CR28","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1016\/j.jocs.2013.07.005","volume":"5","author":"G Oxman","year":"2014","unstructured":"Oxman, G., Weiss, S., Be\u2019ery, Y.: Computational methods for Conway\u2019s Game of Life cellular automaton. J. Comput. Sci. 5(1), 24\u201331 (2014)","journal-title":"J. Comput. Sci."},{"key":"4_CR29","volume-title":"Computer Systems: A Programmer\u2019s Perspective","author":"RE Bryant","year":"2016","unstructured":"Bryant, R.E., O\u2019Hallaron, D.R.: Computer Systems: A Programmer\u2019s Perspective, 3rd edn. Pearson, London, UK (2016)","edition":"3"},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Rybacki, S., Himmelspach, J., Uhrmacher, A.: CPU and GPU based simulation of cellular automata\u2014a performance comparison. In: Proceedings of the 1st SIMUL, pp. 62\u201367 (2009)","DOI":"10.1109\/SIMUL.2009.36"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Song, Y., Li, Z.: New tiling techniques to improve cache temporal locality. In: Proceedings of the ACM SIGPLAN 1999 Conference on Programming Language Design and Implementation, PLDI \u201999, pp. 215\u2013228. Association for Computing Machinery, New York, NY, USA (1999). https:\/\/doi.org\/10.1145\/301618.301668","DOI":"10.1145\/301618.301668"},{"key":"4_CR32","doi-asserted-by":"crossref","unstructured":"Stengel, H., Treibig, J., Hager, G., Wellein, G.: Quantifying performance bottlenecks of stencil computations using the execution-cache-memory model. In: Proceedings of the 29th ACM on International Conference on Supercomputing, ICS \u201915, pp. 207\u2013216. Association for Computing Machinery, New York, NY, USA (2015). https:\/\/doi.org\/10.1145\/2751205.2751240","DOI":"10.1145\/2751205.2751240"},{"issue":"4","key":"4_CR33","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams, S., Waterman, A., Patterson, D.: Roofline: an insightful visual performance model for multicore architectures. Commun. ACM 52(4), 65\u201376 (2009)","journal-title":"Commun. ACM"},{"issue":"20","key":"4_CR34","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1002\/cpe.5547","volume":"32","author":"C Yang","year":"2020","unstructured":"Yang, C., Kurth, T., Williams, S.: Hierarchical Roofline analysis for GPUS: accelerating performance optimization for the NERSC-9 Perlmutter system. Concurr. Comput. 32(20), 1\u201312 (2020)","journal-title":"Concurr. Comput."}],"container-title":["Lecture Notes in Networks and Systems","Advances in Computing, Informatics, Networking and Cybersecurity"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-87049-2_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,19]],"date-time":"2024-09-19T18:00:35Z","timestamp":1726768835000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-87049-2_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783030870485","9783030870492"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-87049-2_4","relation":{},"ISSN":["2367-3370","2367-3389"],"issn-type":[{"type":"print","value":"2367-3370"},{"type":"electronic","value":"2367-3389"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"3 March 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}