{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T00:28:55Z","timestamp":1765499335511,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","funder":[{"name":"Institute of Information & communications Technology Planning & Evaluation","award":["RS-2022-II220184 & RS-2021-II211343 & RS-2021-II212068"],"award-info":[{"award-number":["RS-2022-II220184 & RS-2021-II211343 & RS-2021-II212068"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3761011","type":"proceedings-article","created":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T23:59:18Z","timestamp":1762559958000},"page":"3805-3813","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Unplug and Play Language Models: Decomposing Experts in Language Models at Inference Time"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2196-5149","authenticated-orcid":false,"given":"Nakyeong","family":"Yang","sequence":"first","affiliation":[{"name":"Seoul National University, Seoul, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1863-4220","authenticated-orcid":false,"given":"Jiwon","family":"Moon","sequence":"additional","affiliation":[{"name":"Seoul National University, Seoul, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1531-1415","authenticated-orcid":false,"given":"Junseok","family":"Kim","sequence":"additional","affiliation":[{"name":"Seoul National University, Seoul, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2805-7530","authenticated-orcid":false,"given":"Yunah","family":"Jang","sequence":"additional","affiliation":[{"name":"Seoul National University, Seoul, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2547-7051","authenticated-orcid":false,"given":"Kyomin","family":"Jung","sequence":"additional","affiliation":[{"name":"Seoul National University, Seoul, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Agarwal Rishabh","year":"2024","unstructured":"Rishabh Agarwal, Nino Vieillard, Yongchao Zhou, Piotr Stanczyk, Sabela Ramos Garea, Matthieu Geist, and Olivier Bachem. 2024. On-policy distillation of language models: Learning from self-generated mistakes. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_2_1","volume-title":"Fewshot Unified Question Answering: Tuning Models or Prompts? arXiv preprint arXiv:2305.14569","author":"Bansal Srijan","year":"2023","unstructured":"Srijan Bansal, Semih Yavuz, Bo Pang, Meghana Bhat, and Yingbo Zhou. 2023. Fewshot Unified Question Answering: Tuning Models or Prompts? arXiv preprint arXiv:2305.14569 (2023)."},{"key":"e_1_3_2_1_3_1","volume-title":"Discoprompt: Path prediction prompt tuning for implicit discourse relation recognition. arXiv preprint arXiv:2305.03973","author":"Chan Chunkit","year":"2023","unstructured":"Chunkit Chan, Xin Liu, Jiayang Cheng, Zihan Li, Yangqiu Song, Ginny Y Wong, and Simon See. 2023. Discoprompt: Path prediction prompt tuning for implicit discourse relation recognition. arXiv preprint arXiv:2305.03973 (2023)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i22.34529"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3663363"},{"key":"e_1_3_2_1_6_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_7_1","volume-title":"The lottery ticket hypothesis: Finding sparse, trainable neural networks. arXiv preprint arXiv:1803.03635","author":"Frankle Jonathan","year":"2018","unstructured":"Jonathan Frankle and Michael Carbin. 2018. The lottery ticket hypothesis: Finding sparse, trainable neural networks. arXiv preprint arXiv:1803.03635 (2018)."},{"key":"e_1_3_2_1_8_1","volume-title":"MiniLLM: Knowledge distillation of large language models. arXiv preprint arXiv:2306.08543","author":"Gu Yuxian","year":"2023","unstructured":"Yuxian Gu, Li Dong, Furu Wei, and Minlie Huang. 2023. MiniLLM: Knowledge distillation of large language models. arXiv preprint arXiv:2306.08543 (2023)."},{"key":"e_1_3_2_1_9_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_10_1","volume-title":"The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691","author":"Lester Brian","year":"2021","unstructured":"Brian Lester, Rami Al-Rfou, and Noah Constant. 2021. The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691 (2021)."},{"key":"e_1_3_2_1_11_1","volume-title":"Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461","author":"Lewis Mike","year":"2019","unstructured":"Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov, and Luke Zettlemoyer. 2019. Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461 (2019)."},{"key":"e_1_3_2_1_12_1","volume-title":"Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190","author":"Li Xiang Lisa","year":"2021","unstructured":"Xiang Lisa Li and Percy Liang. 2021. Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190 (2021)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Zujie Liang Feng Wei Yin Jie Yuxi Qian Zhenghong Hao and Bing Han. 2023. Prompts Can Play Lottery Tickets Well: Achieving Lifelong Information Extraction via Lottery Prompt Tuning.","DOI":"10.18653\/v1\/2023.acl-long.16"},{"key":"e_1_3_2_1_14_1","volume-title":"Zhengxiao Du, Zhilin Yang, and Jie Tang.","author":"Liu Xiao","year":"2021","unstructured":"Xiao Liu, Kaixuan Ji, Yicheng Fu, Weng Lam Tam, Zhengxiao Du, Zhilin Yang, and Jie Tang. 2021. P-tuning v2: Prompt tuning can be comparable to fine-tuning universally across scales and tasks. arXiv preprint arXiv:2110.07602 (2021)."},{"key":"e_1_3_2_1_15_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_16_1","volume-title":"International Conference on Machine Learning. PMLR, 22137--22176","author":"Liu Zichang","year":"2023","unstructured":"Zichang Liu, Jue Wang, Tri Dao, Tianyi Zhou, Binhang Yuan, Zhao Song, Anshumali Shrivastava, Ce Zhang, Yuandong Tian, Christopher Re, et al. 2023. Deja vu: Contextual sparsity for efficient llms at inference time. In International Conference on Machine Learning. PMLR, 22137--22176."},{"key":"e_1_3_2_1_17_1","volume-title":"Xprompt: Exploring the extreme of prompt tuning. arXiv preprint arXiv:2210.04457","author":"Ma Fang","year":"2022","unstructured":"Fang Ma, Chen Zhang, Lei Ren, Jingang Wang, Qifan Wang, Wei Wu, Xiaojun Quan, and Dawei Song. 2022. Xprompt: Exploring the extreme of prompt tuning. arXiv preprint arXiv:2210.04457 (2022)."},{"key":"e_1_3_2_1_18_1","volume-title":"Llm-pruner: On the structural pruning of large language models. Advances in neural information processing systems 36","author":"Ma Xinyin","year":"2023","unstructured":"Xinyin Ma, Gongfan Fang, and Xinchao Wang. 2023. Llm-pruner: On the structural pruning of large language models. Advances in neural information processing systems 36 (2023), 21702--21720."},{"volume-title":"Learning Word Vectors for Sentiment Analysis","author":"Maas Andrew L.","key":"e_1_3_2_1_19_1","unstructured":"Andrew L. Maas, Raymond E. Daly, Peter T. Pham, Dan Huang, Andrew Y. Ng, and Christopher Potts. 2011. Learning Word Vectors for Sentiment Analysis. Association for Computational Linguistics. http:\/\/www.aclweb.org\/anthology\/ P11--1015"},{"key":"e_1_3_2_1_20_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arXiv:2303.08774 [cs.CL]"},{"key":"e_1_3_2_1_21_1","volume-title":"Task-Specific Skill Localization in Fine-tuned Language Models. arXiv preprint arXiv:2302.06600","author":"Panigrahi Abhishek","year":"2023","unstructured":"Abhishek Panigrahi, Nikunj Saunshi, Haoyu Zhao, and Sanjeev Arora. 2023. Task-Specific Skill Localization in Fine-tuned Language Models. arXiv preprint arXiv:2302.06600 (2023)."},{"key":"e_1_3_2_1_22_1","volume-title":"Adapters: A unified library for parameter-efficient and modular transfer learning. arXiv preprint arXiv:2311.11077","author":"Poth Clifton","year":"2023","unstructured":"Clifton Poth, Hannah Sterz, Indraneil Paul, Sukannya Purkayastha, Leon Engl\u00e4nder, Timo Imhof, Ivan Vuli?, Sebastian Ruder, Iryna Gurevych, and Jonas Pfeiffer. 2023. Adapters: A unified library for parameter-efficient and modular transfer learning. arXiv preprint arXiv:2311.11077 (2023)."},{"key":"e_1_3_2_1_23_1","unstructured":"Alec Radford JeffreyWu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_2_1_24_1","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J Liu. 2020. Exploring the limits of transfer learning with a unified text-to-text transformer. Journal of machine learning research 21, 140 (2020), 1--67.","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_25_1","volume-title":"Omniquant: Omnidirectionally calibrated quantization for large language models. arXiv preprint arXiv:2308.13137","author":"Shao Wenqi","year":"2023","unstructured":"Wenqi Shao, Mengzhao Chen, Zhaoyang Zhang, Peng Xu, Lirui Zhao, Zhiqian Li, Kaipeng Zhang, Peng Gao, Yu Qiao, and Ping Luo. 2023. Omniquant: Omnidirectionally calibrated quantization for large language models. arXiv preprint arXiv:2308.13137 (2023)."},{"key":"e_1_3_2_1_26_1","volume-title":"Not just a black box: Interpretable deep learning by propagating activation differences. arXiv preprint arXiv:1605.01713 4","author":"Shrikumar Avanti","year":"2016","unstructured":"Avanti Shrikumar, Peyton Greenside, Anna Shcherbina, and Anshul Kundaje. 2016. Not just a black box: Interpretable deep learning by propagating activation differences. arXiv preprint arXiv:1605.01713 4 (2016)."},{"key":"e_1_3_2_1_27_1","volume-title":"International conference on machine learning. PMLR, 3319-- 3328","author":"Sundararajan Mukund","year":"2017","unstructured":"Mukund Sundararajan, Ankur Taly, and Qiqi Yan. 2017. Axiomatic attribution for deep networks. In International conference on machine learning. PMLR, 3319-- 3328."},{"key":"e_1_3_2_1_28_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_29_1","volume-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461","author":"Singh Amanpreet","year":"2018","unstructured":"AlexWang, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel R Bowman. 2018. GLUE: A multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461 (2018)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25363"},{"key":"e_1_3_2_1_31_1","volume-title":"Onebit: Towards extremely low-bit large language models. arXiv preprint arXiv:2402.11295","author":"Xu Yuzhuang","year":"2024","unstructured":"Yuzhuang Xu, Xu Han, Zonghan Yang, Shuo Wang, Qingfu Zhu, Zhiyuan Liu, Weidong Liu, and Wanxiang Che. 2024. Onebit: Towards extremely low-bit large language models. arXiv preprint arXiv:2402.11295 (2024)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.490"},{"key":"e_1_3_2_1_33_1","volume-title":"Wkvquant: Quantizing weight and key\/value cache for large language models gains more. arXiv preprint arXiv:2402.12065","author":"Yue Yuxuan","year":"2024","unstructured":"Yuxuan Yue, Zhihang Yuan, Haojie Duanmu, Sifan Zhou, Jianlong Wu, and Liqiang Nie. 2024. Wkvquant: Quantizing weight and key\/value cache for large language models gains more. arXiv preprint arXiv:2402.12065 (2024)."},{"key":"e_1_3_2_1_34_1","volume-title":"Character-level convolutional networks for text classification. Advances in neural information processing systems 28","author":"Zhang Xiang","year":"2015","unstructured":"Xiang Zhang, Junbo Zhao, and Yann LeCun. 2015. Character-level convolutional networks for text classification. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_2_1_35_1","volume-title":"A survey of multi-task learning in natural language processing: Regarding task relatedness and training methods. arXiv preprint arXiv:2204.03508","author":"Zhang Zhihan","year":"2022","unstructured":"Zhihan Zhang, Wenhao Yu, Mengxia Yu, Zhichun Guo, and Meng Jiang. 2022. A survey of multi-task learning in natural language processing: Regarding task relatedness and training methods. arXiv preprint arXiv:2204.03508 (2022)."}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Seoul Republic of Korea","acronym":"CIKM '25"},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3761011","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T00:23:57Z","timestamp":1765499037000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3761011"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":35,"alternative-id":["10.1145\/3746252.3761011","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3761011","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}