{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T04:44:16Z","timestamp":1773377056102,"version":"3.50.1"},"reference-count":23,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,22]]},"DOI":"10.1109\/isit63088.2025.11195330","type":"proceedings-article","created":{"date-parts":[[2025,10,20]],"date-time":"2025-10-20T17:48:08Z","timestamp":1760982488000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["Leveraging Conditional Mutual Information to Improve Large Language Model Fine-Tuning for Classification"],"prefix":"10.1109","author":[{"given":"Thanushon","family":"Sivakaran","sequence":"first","affiliation":[{"name":"University of Waterloo,Department of Electrical and Computer Engineering,Waterloo,Ontario"}]},{"given":"En-Hui","family":"Yang","sequence":"additional","affiliation":[{"name":"University of Waterloo,Department of Electrical and Computer Engineering,Waterloo,Ontario"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Improving language understanding by generative pre-training,","volume-title":"OpenAI, Tech. Rep.","author":"Radford","year":"2018"},{"key":"ref2","article-title":"Language models are unsupervised multitask learners,","volume-title":"OpenAI, Tech. Rep.","author":"Radford","year":"2019"},{"key":"ref3","volume-title":"Language models are few-shot learners","author":"Brown","year":"2020"},{"key":"ref4","first-page":"4171","article-title":"BERT: Pretraining of deep bidirectional transformers for language understanding,","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin","year":"2019"},{"key":"ref5","first-page":"3730","article-title":"Text summarization with pretrained encoders,","volume-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","author":"Liu","year":"2019"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"7871","DOI":"10.18653\/v1\/2020.acl-main.703","article-title":"BART: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension,","volume-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics","author":"Lewis","year":"2020"},{"key":"ref7","first-page":"59986008","article-title":"Attention is all you need,","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","volume":"30","author":"Vaswani","year":"2017"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"726","DOI":"10.1162\/tacl_a_00343","article-title":"Multilingual denoising pre-training for neural machine translation,","volume":"8","author":"Liu","year":"2020","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"ref9","volume-title":"XLNet: generalized autoregressive pretraining for language understanding","author":"Yang","year":"2019"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1948.tb01338.x"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/18.841161"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2003.818411"},{"key":"ref13","article-title":"Language modeling is compression,","volume-title":"Proceedings of the 2024 International Conference on Learning Representations (ICLR)","author":"Del\u00e9tang","year":"2024"},{"key":"ref14","article-title":"Transformers are universal predictors,","volume-title":"ICML 2023 Workshop Neural Compression: From Information Theory to Applications","author":"Basu","year":"2023"},{"key":"ref15","volume-title":"Conditional mutual information constrained deep learning for classification,","author":"Yang","year":"2023"},{"issue":"2","key":"ref16","first-page":"23","article-title":"A new algorithm for data compression","volume":"12","author":"Gage","year":"1994","journal-title":"C Users Journal"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/18.841160"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"535","DOI":"10.1145\/1150402.1150464","article-title":"Model compression,","volume-title":"Proceedings of the 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD","author":"Bucil","year":"2006"},{"key":"ref19","volume-title":"Distilling the knowledge in a neural network,","author":"Hinton","year":"2015"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT57864.2024.10619241"},{"key":"ref21","article-title":"Bayes conditional distribution estimation for knowledge distillation based on conditional mutual information,","volume-title":"The Twelfth International Conference on Learning Representations (ICLR)","author":"Ye","year":"2024"},{"key":"ref22","article-title":"Distilbert, a distilled version of bert: smaller, faster, cheaper and lighter,","volume-title":"Proceedings of the NeurIPS 2019 Workshop on Energy Efficient Machine Learning and Cognitive Computing","author":"Sanh","year":"2019"},{"key":"ref23","article-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding,","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Wang","year":"2019"}],"event":{"name":"2025 IEEE International Symposium on Information Theory (ISIT)","location":"Ann Arbor, MI, USA","start":{"date-parts":[[2025,6,22]]},"end":{"date-parts":[[2025,6,27]]}},"container-title":["2025 IEEE International Symposium on Information Theory (ISIT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11195206\/11195207\/11195330.pdf?arnumber=11195330","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T20:30:22Z","timestamp":1773347422000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11195330\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/isit63088.2025.11195330","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]}}}