{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T04:36:58Z","timestamp":1742963818614,"version":"3.40.3"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031725661"},{"type":"electronic","value":"9783031725678"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-72567-8_6","type":"book-chapter","created":{"date-parts":[[2024,9,19]],"date-time":"2024-09-19T16:19:25Z","timestamp":1726762765000},"page":"79-93","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multilayer Multipurpose Caches for\u00a0OpenMP Target Regions on\u00a0FPGAs"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6491-8871","authenticated-orcid":false,"given":"Julian","family":"Brandner","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5673-0457","authenticated-orcid":false,"given":"Florian","family":"Mayer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3202-2904","authenticated-orcid":false,"given":"Michael","family":"Philippsen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,16]]},"reference":[{"doi-asserted-by":"publisher","unstructured":"Adler, M., Fleming, K.E., Parashar, A., Pellauer, M., Emer, J.: Leap scratchpads: automatic memory and cache management for reconfigurable logic. In: Proceedings of International Symposium on Field Programmable Gate Arrays (FPGA 2011), pp. 25\u201328. Monterey, CA (2011). https:\/\/doi.org\/10.1145\/1950413.1950421","key":"6_CR1","DOI":"10.1145\/1950413.1950421"},{"doi-asserted-by":"publisher","unstructured":"Blachut, K., Kryjak, T.: Real-time efficient FPGA implementation of the multi-scale Lucas-Kanade and Horn-Schunck optical flow algorithms for a 4k video stream. Sensors 22(13), 5017\u20135049 (2022). https:\/\/doi.org\/10.3390\/s22135017, https:\/\/www.mdpi.com\/1424-8220\/22\/13\/5017. Accessed 18 July 2024","key":"6_CR2","DOI":"10.3390\/s22135017"},{"key":"6_CR3","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1007\/978-3-031-15922-0_7","volume-title":"IWOMP 2022","author":"J Brandner","year":"2022","unstructured":"Brandner, J., Mayer, F., Philippsen, M.: Reducing OpenMP to FPGA round-trip times with predictive modelling. In: Klemm, M., de Supinski, B.R., Klinkenberg, J., Neth, B. (eds.) IWOMP 2022. LNCS, vol. 13527, pp. 94\u2013108. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-15922-0_7"},{"key":"6_CR4","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1007\/978-3-031-40744-4_10","volume-title":"IWOMP 2023","author":"J Brandner","year":"2023","unstructured":"Brandner, J., Mayer, F., Philippsen, M.: Multipurpose cacheing to accelerate OpenMP target regions on FPGAs. In: McIntosh-Smith, S., Klemm, M., de Supinski, B.R., Deakin, T., Klinkenberg, J. (eds.) IWOMP 2023. LNCS, vol. 14114, pp. 147\u2013162. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-40744-4_10"},{"doi-asserted-by":"publisher","unstructured":"Brandner, J., Mayer, F., Philippsen, M.: Dataset for: \u201cMultilayer multipurpose caches for OpenMP target regions on FPGAs\u201d (2024). https:\/\/doi.org\/10.5281\/zenodo.12755510","key":"6_CR5","DOI":"10.5281\/zenodo.12755510"},{"key":"6_CR6","doi-asserted-by":"publisher","first-page":"118858","DOI":"10.1109\/ACCESS.2022.3219868","volume":"10","author":"G Brignone","year":"2022","unstructured":"Brignone, G., Usman Jamal, M., Lazarescu, M.T., Lavagno, L.: Array-specific dataflow caches for high-level synthesis of memory-intensive algorithms on FPGAs. IEEE Access 10, 118858\u2013118877 (2022). https:\/\/doi.org\/10.1109\/ACCESS.2022.3219868","journal-title":"IEEE Access"},{"doi-asserted-by":"publisher","unstructured":"Castells-Rufas, D., et al.: OpenCL-based FPGA accelerator for semi-global approximate string matching using diagonal bit-vectors. In: Proceedings of International Conference on Field Programmable Logic and Applications (FPL 2021), pp. 174\u2013178. Dresden, Germany (2021). https:\/\/doi.org\/10.1109\/FPL53798.2021.00036","key":"6_CR7","DOI":"10.1109\/FPL53798.2021.00036"},{"doi-asserted-by":"publisher","unstructured":"Chen, R., Siriyal, S., Prasanna, V.: Energy and memory efficient mapping of bitonic sorting on FPGA. In: Proceedings of International Symposium on Field Programmable Gate Arrays (FPGA 2015), pp. 240\u2013249. Monterey, CA (2015). https:\/\/doi.org\/10.1145\/2684746.2689068","key":"6_CR8","DOI":"10.1145\/2684746.2689068"},{"doi-asserted-by":"publisher","unstructured":"Cheng, S., Lin, M., Liu, H.J., Scott, S., Wawrzynek, J.: Exploiting memory-level parallelism in reconfigurable accelerators. In: Proceedings of International Symposium on Field-Programmable Custom Computing Machines (FCCM 2012), pp. 157\u2013160. Toronto, Canada (2012). https:\/\/doi.org\/10.1109\/FCCM.2012.35","key":"6_CR9","DOI":"10.1109\/FCCM.2012.35"},{"doi-asserted-by":"publisher","unstructured":"Choi, J., Nam, K., Canis, A., Anderson, J., Brown, S., Czajkowski, T.: Impact of cache architecture and interface on performance and area of FPGA-based processor\/parallel-accelerator systems. In: Proc. International Symposium on Field-Programmable Custom Computing Machines (FCCM 2012), pp. 17\u201324. Toronto, Canada (2012). https:\/\/doi.org\/10.1109\/FCCM.2012.13","key":"6_CR10","DOI":"10.1109\/FCCM.2012.13"},{"doi-asserted-by":"publisher","unstructured":"Escobar, F.A., Kolar, A., Harb, N., Vinci Dos Santos, F., Valderrama, C.: Scalable shared-memory architecture to solve the knapsack 0\/1 problem. Microprocess. Microsyst. 50(3), 189\u2013201 (2017). https:\/\/doi.org\/10.1016\/j.micpro.2017.04.001","key":"6_CR11","DOI":"10.1016\/j.micpro.2017.04.001"},{"key":"6_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1007\/978-3-030-58144-2_17","volume-title":"OpenMP: Portable Multi-Level Parallelism on Modern Systems","author":"J Huthmann","year":"2020","unstructured":"Huthmann, J., Sommer, L., Podobas, A., Koch, A., Sano, K.: OpenMP device offloading to FPGAs using the Nymble infrastructure. In: Milfeld, K., de Supinski, B.R., Koesterke, L., Klinkenberg, J. (eds.) IWOMP 2020. LNCS, vol. 12295, pp. 265\u2013279. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58144-2_17"},{"key":"6_CR13","doi-asserted-by":"publisher","first-page":"18953","DOI":"10.1109\/ACCESS.2017.2750923","volume":"5","author":"L Ma","year":"2017","unstructured":"Ma, L., Lavagno, L., Lazarescu, M.T., Arif, A.: Acceleration by inline cache for memory-intensive algorithms on FPGA via high-level synthesis. IEEE Access 5, 18953\u201318974 (2017). https:\/\/doi.org\/10.1109\/ACCESS.2017.2750923","journal-title":"IEEE Access"},{"doi-asserted-by":"publisher","unstructured":"Matthews, E., Doyle, N.C., Shannon, L.: Design space exploration of L1 data caches for FPGA-based multiprocessor systems. In: Proceedings of International Symposium on Field Programmable Gate Arrays (FPGA 2015), pp. 156\u2013159. Monterey, CA (2015). https:\/\/doi.org\/10.1145\/2684746.2689083","key":"6_CR14","DOI":"10.1145\/2684746.2689083"},{"key":"6_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1007\/978-3-030-99372-6_6","volume-title":"Languages and Compilers for Parallel Computing","author":"F Mayer","year":"2022","unstructured":"Mayer, F., Brandner, J., Hellmann, M., Schwarzer, J., Philippsen, M.: The ORKA-HPC compiler\u2014practical OpenMP for\u00a0FPGAs. In: Li, X., Chandrasekaran, S. (eds.) LCPC 2021. LNCS, vol. 13181, pp. 83\u201397. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-030-99372-6_6"},{"key":"6_CR16","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1007\/978-3-031-31445-2_4","volume-title":"LCPC 2022","author":"F Mayer","year":"2022","unstructured":"Mayer, F., Brandner, J., Philippsen, M.: Employing polyhedral methods to reduce data movement in FPGA stencil codes. In: Mendis, C., Rauchwerger, L. (eds.) LCPC 2022. LNCS, vol. 13829, pp. 47\u201363. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-31445-2_4"},{"doi-asserted-by":"publisher","unstructured":"Mayer, F., Brandner, J., Philippsen, M.: Employing polyhedral methods to optimize stencils on FPGAs with stencil-specific caches, data reuse, and wide data bursts. In: 14th International Workshop Polyhedral Compilation Techniques (IMPACT 2024). Munich, Germany (2024). https:\/\/doi.org\/10.48550\/arXiv.2401.13645","key":"6_CR17","DOI":"10.48550\/arXiv.2401.13645"},{"key":"6_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1007\/978-3-030-28596-8_7","volume-title":"OpenMP: Conquering the Full Hardware Spectrum","author":"F Mayer","year":"2019","unstructured":"Mayer, F., Knaust, M., Philippsen, M.: OpenMP on FPGAs\u2014a survey. In: Fan, X., de Supinski, B.R., Sinnen, O., Giacaman, N. (eds.) IWOMP 2019. LNCS, vol. 11718, pp. 94\u2013108. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-28596-8_7"},{"issue":"7","key":"6_CR19","doi-asserted-by":"publisher","first-page":"3009","DOI":"10.1109\/TSP.2007.914926","volume":"56","author":"PK Meher","year":"2008","unstructured":"Meher, P.K., Chandrasekaran, S., Amira, A.: FPGA realization of FIR filters by efficient and flexible systolization using distributed arithmetic. IEEE Trans. Signal Process. 56(7), 3009\u20133017 (2008). https:\/\/doi.org\/10.1109\/TSP.2007.914926","journal-title":"IEEE Trans. Signal Process."},{"doi-asserted-by":"publisher","unstructured":"Moss, D.J., et al.: A customizable matrix multiplication framework for the Intel HARPv2 Xeon+FPGA platform: a deep learning case study. In: Proceedings of International Symposium on Field Programmable Gate Arrays (FPGA 2018), pp. 107\u2013116. Monterey, CA (2018). https:\/\/doi.org\/10.1145\/3174243.3174258","key":"6_CR20","DOI":"10.1145\/3174243.3174258"},{"doi-asserted-by":"publisher","unstructured":"Nepomuceno, R., Sterle, R., Valarini, G., Pereira, M., Yviquel, H., Araujo, G.: Enabling OpenMP task parallelism on Multi-FPGAs. arXiv:2103.10573 [cs.DC] (2021). https:\/\/doi.org\/10.1109\/FCCM51124.2021.00047","key":"6_CR21","DOI":"10.1109\/FCCM51124.2021.00047"},{"doi-asserted-by":"publisher","unstructured":"Nibbelink, K., Rajopadhye, S., McConnell, R.: 0\/1 knapsack on hardware: a complete solution. In: Proceedings of International Conference on on Application-specific Systems, Architectures and Processors (ASAP 2007), pp. 160\u2013167. Montr\u00e9al, Canada (2007). https:\/\/doi.org\/10.1109\/ASAP.2007.4429974","key":"6_CR22","DOI":"10.1109\/ASAP.2007.4429974"},{"doi-asserted-by":"publisher","unstructured":"Park, S.Y., Meher, P.K.: Efficient FPGA and ASIC realizations of a DA-based reconfigurable FIR digital filter. IEEE Trans. Circuits and Syst. II: Express Briefs 61(7), 511\u2013515 (2014). https:\/\/doi.org\/10.1109\/TCSII.2014.2324418","key":"6_CR23","DOI":"10.1109\/TCSII.2014.2324418"},{"doi-asserted-by":"crossref","unstructured":"Pouchet, L.N., Zhang, P., Sadayappan, P., Cong, J.: Polyhedral-based data reuse optimization for configurable computing. In: Proceedings of International Symposium on Field Programmable Gate Arrays (FPGA 2013), pp. 29\u201338. Montery, CA (2013)","key":"6_CR24","DOI":"10.1145\/2435264.2435273"},{"unstructured":"Programming Systems Group, Friedrich-Alexander Universit\u00e4t Erlangen-N\u00fcrnberg: Orka Compiler Distribution. https:\/\/cs2-gitlab.cs.fau.de\/orka\/orkadistro\/-\/tags\/MultilayerCacheReproduction. Accessed 18 July 2024","key":"6_CR25"},{"issue":"3","key":"6_CR26","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1145\/1555815.1555804","volume":"37","author":"A Putnam","year":"2009","unstructured":"Putnam, A., et al.: Performance and power of cache-based reconfigurable computing. SIGARCH Comput. Archit. News 37(3), 395\u2013405 (2009). https:\/\/doi.org\/10.1145\/1555815.1555804","journal-title":"SIGARCH Comput. Archit. News"},{"issue":"5","key":"6_CR27","doi-asserted-by":"publisher","first-page":"470","DOI":"10.1016\/j.micpro.2014.03.003","volume":"38","author":"V Sklyarov","year":"2014","unstructured":"Sklyarov, V., Skliarova, I.: High-performance implementation of regular and easily scalable sorting networks on an FPGA. Microprocess. Microsyst. 38(5), 470\u2013484 (2014). https:\/\/doi.org\/10.1016\/j.micpro.2014.03.003","journal-title":"Microprocess. Microsyst."},{"unstructured":"SPEC: SPEC CPU 2006. https:\/\/www.spec.org\/cpu2006\/. Accessed 18 July 2024","key":"6_CR28"},{"doi-asserted-by":"publisher","unstructured":"Winterstein, F., Fleming, K., Yang, H.J., Wickerson, J., Constantinides, G.: Custom-sized caches in application-specific memory hierarchies. In: Proceedings of International Conference on Field Programmable Technology (FPT 2015), pp. 144\u2013151. Queenstown, New Zealand (2015). https:\/\/doi.org\/10.1109\/FPT.2015.7393141","key":"6_CR29","DOI":"10.1109\/FPT.2015.7393141"},{"unstructured":"Xilinx (AMD): System Cache LogiCORE IP Product Guide. https:\/\/docs.amd.com\/r\/en-US\/pg118-system-cache. Accessed 18 July 18 2024","key":"6_CR30"},{"unstructured":"Xilinx (AMD): Xilinx Vitis HLS. https:\/\/www.xilinx.com\/products\/design-tools\/vitis\/vitis-hls.html. Accessed 18 July 2024","key":"6_CR31"},{"unstructured":"Xilinx (AMD): Xilinx Vitis PRAGMA HLS CACHE Documentation. https:\/\/docs.amd.com\/r\/en-US\/ug1399-vitis-hls\/pragma-HLS-cache. Accessed 18 July 2024","key":"6_CR32"},{"key":"6_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"372","DOI":"10.1007\/978-3-642-12133-3_36","volume-title":"Reconfigurable Computing: Architectures, Tools and Applications","author":"M Yoshimi","year":"2010","unstructured":"Yoshimi, M., Nishikawa, Y., Miki, M., Hiroyasu, T., Amano, H., Mencer, O.: A performance evaluation of CUBE: one-dimensional 512 FPGA cluster. In: Sirisuk, P., Morgan, F., El-Ghazawi, T., Amano, H. (eds.) ARC 2010. LNCS, vol. 5992, pp. 372\u2013381. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-12133-3_36"}],"container-title":["Lecture Notes in Computer Science","Advancing OpenMP for Future Accelerators"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72567-8_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,19]],"date-time":"2024-09-19T16:20:20Z","timestamp":1726762820000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72567-8_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031725661","9783031725678"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72567-8_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"16 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IWOMP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on OpenMP","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Perth, WA","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iwomp2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iwomp.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}