{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T01:55:01Z","timestamp":1773194101898,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,5]],"date-time":"2024-08-05T00:00:00Z","timestamp":1722816000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,5]]},"DOI":"10.1145\/3665314.3672281","type":"proceedings-article","created":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T19:31:18Z","timestamp":1725910278000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Heterogeneous Memory Integration and Optimization for Energy-Efficient Multi-Task NLP Edge Inference"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-4334-2031","authenticated-orcid":false,"given":"Zirui","family":"Fu","sequence":"first","affiliation":[{"name":"Tufts University, Medford, MA, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8112-356X","authenticated-orcid":false,"given":"Aleksandre","family":"Avaliani","sequence":"additional","affiliation":[{"name":"Tufts University, Medford, MA, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9354-3447","authenticated-orcid":false,"given":"Marco","family":"Donato","sequence":"additional","affiliation":[{"name":"Tufts University, Medford, MA, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,9,9]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems.","author":"Cai H.","unstructured":"H. Cai, C. Gan, L. Zhu, and S. Han. 2020. TinyTL: reduce memory, not parameters for efficient on-device learning. In Proceedings of the 34th International Conference on Neural Information Processing Systems."},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the Conference for Next Generation Arithmetic","author":"Carmichael Z.","year":"2019","unstructured":"Z. Carmichael, H. F. Langroudi, C. Khazanov, J. Lillie, J. L. Gustafson, and D. Kudithipudi. 2019. Performance-Efficiency Trade-off of Low-Precision Numerical Formats in Deep Neural Networks. In Proceedings of the Conference for Next Generation Arithmetic 2019."},{"key":"e_1_3_2_1_3_1","volume-title":"2022 IEEE International Solid-State Circuits Conference (ISSCC).","author":"Chang M.","unstructured":"M. Chang, S. D. Spetalnick, B. Crafton, W.-S. Khwa, Y.-D. Chih, M.-F. Chang, and A. Raychowdhury. 2022. A 40nm 60.64TOPS\/W ECC-Capable Compute-in-Memory\/Digital 2.25MB\/768KB RRAM\/SRAM System with Embedded Cortex M3 Microprocessor for Edge Recommendation Systems. In 2022 IEEE International Solid-State Circuits Conference (ISSCC)."},{"key":"e_1_3_2_1_4_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In North American","author":"Devlin J.","year":"2019","unstructured":"J. Devlin, M.-W. Chang, K. Lee, and K. Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In North American Chapter of the Association for Computational Linguistics."},{"key":"e_1_3_2_1_5_1","volume-title":"MEMTI: Optimizing On-Chip Nonvolatile Storage for Visual Multitask Inference at the Edge","author":"Donato M.","year":"2019","unstructured":"M. Donato, L. Pentecost, D. Brooks, and G.-Y. Wei. 2019. MEMTI: Optimizing On-Chip Nonvolatile Storage for Visual Multitask Inference at the Edge. IEEE Micro 39 (Nov. 2019), 73--81."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems.","author":"Gao M.","unstructured":"M. Gao, J. Pu, X. Yang, M. Horowitz, and C. Kozyrakis. 2017. TETRIS: Scalable and Efficient Neural Network Acceleration with 3D Memory. In Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems."},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 5th Workshop on Representation Learning for NLP.","author":"Gordon M.","unstructured":"M. Gordon, K. Duh, and N. Andrews. 2020. Compressing BERT: Studying the Effects of Weight Pruning on Transfer Learning. In Proceedings of the 5th Workshop on Representation Learning for NLP."},{"key":"e_1_3_2_1_8_1","volume-title":"International conference on machine learning.","author":"Houlsby N.","unstructured":"N. Houlsby, A. Giurgiu, S. Jastrzebski, B. Morrone, Q. De Laroussilhe, A. Gesmundo, M. Attariyan, and S. Gelly. 2019. Parameter-efficient transfer learning for NLP. In International conference on machine learning."},{"key":"e_1_3_2_1_9_1","unstructured":"E. J. Hu Y. Shen P. Wallis Z. Allen-Zhu Y. Li S. Wang L. Wang and W. Chen. 2021. LoRA: Low-Rank Adaptation of Large Language Models. arXiv:2106.09685"},{"key":"e_1_3_2_1_10_1","volume-title":"ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. arXiv:1909.11942 [cs.CL]","author":"Lan Z.","year":"2020","unstructured":"Z. Lan, M. Chen, S. Goodman, K. Gimpel, P. Sharma, and R. Soricut. 2020. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. arXiv:1909.11942 [cs.CL]"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"T. Liang J. Glossner L. Wang S. Shi and X. Zhang. 2021. Pruning and quantization for deep neural network acceleration: A survey. Neurocomput. 461 (Oct. 2021) 370--403.","DOI":"10.1016\/j.neucom.2021.07.045"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems.","author":"Lin J.","unstructured":"J. Lin, W.-M. Chen, Y. Lin, J. Cohn, C. Gan, and S. Han. 2020. MCUNet: tiny deep learning on IoT devices. In Proceedings of the 34th International Conference on Neural Information Processing Systems."},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics.","author":"Liu X.","unstructured":"X. Liu, P. He, W. Chen, and J. Gao. 2019. Multi-Task Deep Neural Networks for Natural Language Understanding. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics."},{"key":"e_1_3_2_1_14_1","volume-title":"NVMExplorer: A Framework for Cross-Stack Comparisons of Embedded Non-Volatile Memories. In 2022 IEEE International Symposium on High-Performance Computer Architecture (HPCA).","author":"Pentecost L.","unstructured":"L. Pentecost, A. Hankin, M. Donato, M. Hempstead, G.-Y. Wei, and D. Brooks. 2022. NVMExplorer: A Framework for Cross-Stack Comparisons of Embedded Non-Volatile Memories. In 2022 IEEE International Symposium on High-Performance Computer Architecture (HPCA)."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems.","author":"Rebuffi S.-A.","unstructured":"S.-A. Rebuffi, H. Bilen, and A. Vedaldi. 2017. Learning multiple visual domains with residual adapters. In Proceedings of the 31st International Conference on Neural Information Processing Systems."},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition.","author":"Rebuffi S.-A.","unstructured":"S.-A. Rebuffi, H. Bilen, and A. Vedaldi. 2018. Efficient parametrization of multi-domain deep neural networks. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2023.3282046"},{"key":"e_1_3_2_1_18_1","first-page":"12991","article-title":"LST: Ladder side-tuning for parameter and memory efficient transfer learning","volume":"35","author":"Sung Y.-L.","year":"2022","unstructured":"Y.-L. Sung, J. Cho, and M. Bansal. 2022. LST: Ladder side-tuning for parameter and memory efficient transfer learning. Advances in Neural Information Processing Systems 35 (2022), 12991--13005.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_19_1","volume-title":"EdgeBERT: Sentence-Level Energy Optimizations for Latency-Aware Multi-Task NLP Inference. In 54th Annual IEEE\/ACM International Symposium on Microarchitecture.","author":"Tambe T.","year":"2021","unstructured":"T. Tambe, C. Hooper, L. Pentecost, T. Jia, E.-Y. Yang, M. Donato, V. Sanh, P. Whatmough, A. M. Rush, D. Brooks, and G.-Y. Wei. 2021. EdgeBERT: Sentence-Level Energy Optimizations for Latency-Aware Multi-Task NLP Inference. In 54th Annual IEEE\/ACM International Symposium on Microarchitecture."},{"key":"e_1_3_2_1_20_1","unstructured":"T. Tambe E.-Y. Yang Z. Wan Y. Deng V. J. Reddi A. Rush D. Brooks and G.-Y. Wei. 2020. AdaptivFloat: A Floating-point based Data Type for Resilient Deep Learning Inference. arXiv:1909.13271 [cs.LG]"},{"key":"e_1_3_2_1_21_1","volume-title":"GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In International Conference on Learning Representations.","author":"Wang A.","unstructured":"A. Wang, A. Singh, J. Michael, F. Hill, O. Levy, and S. R. Bowman. 2019. GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_22_1","volume-title":"2014 IEEE 20th International Symposium on High Performance Computer Architecture (HPCA).","author":"Wang Z.","unstructured":"Z. Wang, D. A. Jim\u00e9nez, C. Xu, G. Sun, and Y. Xie. 2014. Adaptive placement and migration policy for an STT-RAM-based hybrid cache. In 2014 IEEE 20th International Symposium on High Performance Computer Architecture (HPCA)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"F. Zhang L. Yang J. Meng J.-s. Seo Y. Cao and D. Fan. 2022. XMA2: A crossbar-aware multi-task adaption framework via 2-tier masks. Frontiers in Electronics 3 (2022).","DOI":"10.3389\/felec.2022.1032485"}],"event":{"name":"ISLPED '24: 29th ACM\/IEEE International Symposium on Low Power Electronics and Design","location":"Newport Beach CA USA","acronym":"ISLPED '24","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE CAS","IEEE EDA"]},"container-title":["Proceedings of the 29th ACM\/IEEE International Symposium on Low Power Electronics and Design"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3665314.3672281","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3665314.3672281","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:51Z","timestamp":1750294671000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3665314.3672281"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,5]]},"references-count":23,"alternative-id":["10.1145\/3665314.3672281","10.1145\/3665314"],"URL":"https:\/\/doi.org\/10.1145\/3665314.3672281","relation":{},"subject":[],"published":{"date-parts":[[2024,8,5]]},"assertion":[{"value":"2024-09-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}