{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T15:41:04Z","timestamp":1780674064860,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":17,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,23]],"date-time":"2024-06-23T00:00:00Z","timestamp":1719100800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2021YFB0300300"],"award-info":[{"award-number":["2021YFB0300300"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Programs of National Natural Science Foundation of China","award":["62072165"],"award-info":[{"award-number":["62072165"]}]},{"name":"Programs of National Natural Science Foundation of China","award":["62202154"],"award-info":[{"award-number":["62202154"]}]},{"name":"Programs of National Natural Science Foundation of China","award":["62172151"],"award-info":[{"award-number":["62172151"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,23]]},"DOI":"10.1145\/3649329.3657336","type":"proceedings-article","created":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T19:27:22Z","timestamp":1731007642000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["A Real-time Execution System of Multimodal Transformer through PIM-GPU Collaboration"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-3254-2152","authenticated-orcid":false,"given":"Shengyi","family":"Ji","sequence":"first","affiliation":[{"name":"College of Information Science and Engineering, Hunan University, Changsha, Hunan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2372-6715","authenticated-orcid":false,"given":"Chubo","family":"Liu","sequence":"additional","affiliation":[{"name":"College of Information Science and Engineering, Hunan University, Changsha, Hunan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6956-9260","authenticated-orcid":false,"given":"Yan","family":"Ding","sequence":"additional","affiliation":[{"name":"College of Information Science and Engineering, Hunan University, Changsha, Hunan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1012-5301","authenticated-orcid":false,"given":"Qing","family":"Liao","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Harbin Institute of Technology (Shenzhen), Shenzhen, GuangDong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9081-8153","authenticated-orcid":false,"given":"Zhuo","family":"Tang","sequence":"additional","affiliation":[{"name":"College of Information Science and Engineering, Hunan University, Changsha, Hunan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,11,7]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding,\" arXiv preprint arXiv:1810.04805","year":"2018","unstructured":"Devlin et al., \"Bert: Pre-training of deep bidirectional transformers for language understanding,\" arXiv preprint arXiv:1810.04805, 2018."},{"key":"e_1_3_2_1_2_1","first-page":"389","article-title":"Turbotransformers: an efficient gpu serving system for transformer models","author":"Fang J.","year":"2021","unstructured":"J. Fang et al., \"Turbotransformers: an efficient gpu serving system for transformer models,\" in PPoPP, 2021, pp. 389--402.","journal-title":"PPoPP"},{"key":"e_1_3_2_1_3_1","first-page":"1","article-title":"HAIMA: A Hybrid SRAM and DRAM Accelerator-in-Memory Architecture for Transformer","author":"Ding Y.","year":"2023","unstructured":"Y. Ding et al., \"HAIMA: A Hybrid SRAM and DRAM Accelerator-in-Memory Architecture for Transformer,\" in DAC, 2023, pp. 1--6.","journal-title":"DAC"},{"key":"e_1_3_2_1_4_1","first-page":"578","article-title":"{TVM}: An automated {End-to-End} optimizing compiler for deep learning","author":"Chen T.","year":"2018","unstructured":"T. Chen et al., \"{TVM}: An automated {End-to-End} optimizing compiler for deep learning,\" in OSDI, 2018, pp. 578--594.","journal-title":"OSDI"},{"key":"e_1_3_2_1_5_1","volume-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks,\" Advances in neural information processing systems","author":"Lu J.","year":"2019","unstructured":"J. Lu et al., \"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks,\" Advances in neural information processing systems, vol. 32, 2019."},{"key":"e_1_3_2_1_6_1","volume-title":"Lxmert: Learning cross-modality encoder representations from transformers,\" arXiv preprint arXiv:1908.07490","author":"Tan H.","year":"2019","unstructured":"H. Tan et al., \"Lxmert: Learning cross-modality encoder representations from transformers,\" arXiv preprint arXiv:1908.07490, 2019."},{"key":"e_1_3_2_1_7_1","first-page":"1931","article-title":"Unifying vision-and-language tasks via text generation","author":"Cho J.","year":"2021","unstructured":"J. Cho et al., \"Unifying vision-and-language tasks via text generation,\" ICML, 2021, pp. 1931--1942.","journal-title":"ICML"},{"key":"e_1_3_2_1_8_1","volume-title":"Simvlm: Simple visual language model pretraining with weak supervision,\" arXiv preprint arXiv:2108.10904","author":"Wang Z.","year":"2021","unstructured":"Z. Wang et al., \"Simvlm: Simple visual language model pretraining with weak supervision,\" arXiv preprint arXiv:2108.10904, 2021."},{"key":"e_1_3_2_1_9_1","volume-title":"Pali: A jointly-scaled multilingual language-image model,\" arXiv preprint arXiv:2209.06794","author":"Chen X.","year":"2022","unstructured":"X. Chen et al., \"Pali: A jointly-scaled multilingual language-image model,\" arXiv preprint arXiv:2209.06794, 2022."},{"key":"e_1_3_2_1_10_1","volume-title":"Image as a foreign language: Beit pretraining for all vision and vision-language tasks,\" arXiv preprint arXiv:2208.10442","author":"Wang W.","year":"2022","unstructured":"W. Wang et al., \"Image as a foreign language: Beit pretraining for all vision and vision-language tasks,\" arXiv preprint arXiv:2208.10442, 2022."},{"key":"e_1_3_2_1_11_1","first-page":"372","article-title":"Newton: A dram-maker's accelerator-in-memory (aim) architecture for machine learning","author":"He M.","year":"2020","unstructured":"M. He et al., \"Newton: A dram-maker's accelerator-in-memory (aim) architecture for machine learning,\" in MICRO, 2020, pp. 372--385.","journal-title":"MICRO"},{"key":"e_1_3_2_1_12_1","volume-title":"Deep learning recommendation model for personalization and recommendation systems,\" arXiv preprint arXiv:1906.00091","author":"Naumov M.","year":"2019","unstructured":"M. Naumov et al., \"Deep learning recommendation model for personalization and recommendation systems,\" arXiv preprint arXiv:1906.00091, 2019."},{"key":"e_1_3_2_1_13_1","first-page":"1071","article-title":"Transpim: A memory-based acceleration via software-hardware co-design for transformer","author":"Zhou M.","year":"2022","unstructured":"M. Zhou et al., \"Transpim: A memory-based acceleration via software-hardware co-design for transformer,\" in HPCA, 2022, pp. 1071--1085.","journal-title":"HPCA"},{"key":"e_1_3_2_1_14_1","unstructured":"(2019) Open neural network exchange. [Online]. Available: https:\/\/onnx.ai\/"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2015.2414456"},{"key":"e_1_3_2_1_16_1","first-page":"473","article-title":"Accel-sim: An extensible simulation framework for validated gpu modeling","author":"Khairy M.","year":"2020","unstructured":"M. Khairy et al., \"Accel-sim: An extensible simulation framework for validated gpu modeling,\" in ISCA, 2020, pp. 473--486.","journal-title":"ISCA"},{"key":"e_1_3_2_1_17_1","first-page":"12888","article-title":"Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation","author":"Li J.","year":"2022","unstructured":"J. Li et al., \"Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation,\" in ICML, 2022, pp. 12888--12900.","journal-title":"ICML"}],"event":{"name":"DAC '24: 61st ACM\/IEEE Design Automation Conference","location":"San Francisco CA USA","acronym":"DAC '24","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE-CEDA","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 61st ACM\/IEEE Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649329.3657336","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3649329.3657336","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:00Z","timestamp":1750295880000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649329.3657336"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,23]]},"references-count":17,"alternative-id":["10.1145\/3649329.3657336","10.1145\/3649329"],"URL":"https:\/\/doi.org\/10.1145\/3649329.3657336","relation":{},"subject":[],"published":{"date-parts":[[2024,6,23]]},"assertion":[{"value":"2024-11-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}