{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,29]],"date-time":"2025-05-29T18:10:01Z","timestamp":1748542201931,"version":"3.41.0"},"reference-count":69,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2022YFB3104700"],"award-info":[{"award-number":["2022YFB3104700"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62121002","U23B2028","62232006","62272436"],"award-info":[{"award-number":["62121002","U23B2028","62232006","62272436"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Multimedia"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/tmm.2025.3557677","type":"journal-article","created":{"date-parts":[[2025,4,3]],"date-time":"2025-04-03T19:59:13Z","timestamp":1743710353000},"page":"3117-3131","source":"Crossref","is-referenced-by-count":0,"title":["Leveraging Concise Concepts With Probabilistic Modeling for Interpretable Visual Recognition"],"prefix":"10.1109","volume":"27","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-2010-2967","authenticated-orcid":false,"given":"Yixuan","family":"Zhang","sequence":"first","affiliation":[{"name":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2840-6235","authenticated-orcid":false,"given":"Chuanbin","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5052-2582","authenticated-orcid":false,"given":"Yizhi","family":"Liu","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Hunan University of Science and Technology, Hunan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4239-3418","authenticated-orcid":false,"given":"Yifan","family":"Gao","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1720-7193","authenticated-orcid":false,"given":"Zhiying","family":"Lu","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6249-5315","authenticated-orcid":false,"given":"Hongtao","family":"Xie","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1151-1792","authenticated-orcid":false,"given":"Yongdong","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3214431"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2477042"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2993952"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3144804"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-019-0048-x"},{"key":"ref6","first-page":"5338","article-title":"Concept bottleneck models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Koh","year":"2020"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3167702"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2024.3403167"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP53242.2021.9675376"},{"article-title":"Energy-based concept bottleneck models: Unifying prediction, concept intervention, and probabilistic interpretations","year":"2024","author":"Xu","key":"ref10"},{"article-title":"Post-hoc concept bottleneck models","year":"2022","author":"Yuksekgonul","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-020-00265-z"},{"key":"ref13","article-title":"Label-free concept bottleneck models","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Oikarinen","year":"2023"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01839"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00287"},{"key":"ref16","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Brown","year":"2020"},{"issue":"PMLR","key":"ref17","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford","year":"2021"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00182"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02228"},{"article-title":"Concept-based explainable artificial intelligence: A survey","year":"2023","author":"Poeta","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1631\/FITEE.2400250"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00518"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01228-7"},{"issue":"PMLR","key":"ref24","first-page":"2668","article-title":"Interpretability beyond feature attribution: Quantitative testing with concept activation vectors (TCAV)","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kim","year":"2018"},{"article-title":"Now you see me (CME): Concept-based model extraction","year":"2020","author":"Kazhdan","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27944"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00546"},{"article-title":"Visual recognition with deep nearest centroids","year":"2022","author":"Wang","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2009.5459250"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206594"},{"key":"ref31","first-page":"21400","article-title":"Concept embedding models","volume-title":"Proc. NeurIPS 36th Conf. Neural Inf. Process. Syst.","author":"Zarlenga","year":"2022"},{"article-title":"LLaMA: Open and efficient foundation language models","year":"2023","author":"Touvron","key":"ref32"},{"article-title":"LLaMA 2: Open foundation and fine-tuned chat models","year":"2023","author":"Touvron","key":"ref33"},{"key":"ref34","article-title":"GLM-130B: An open bilingual pre-trained model","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Zeng","year":"2023"},{"issue":"240","key":"ref35","first-page":"1","article-title":"PaLM: Scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2023","journal-title":"J. Mach. Learn. Res."},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.201"},{"article-title":"Towards multimodal in-context learning for vision & language models","year":"2024","author":"Doveh","key":"ref37"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.354"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/AICS60730.2023.10470628"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.deelio-1.10"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612012"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01857"},{"key":"ref43","first-page":"9694","article-title":"Align before fuse: Vision and language representation learning with momentum distillation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Li","year":"2021"},{"issue":"PMLR","key":"ref44","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jia","year":"2021"},{"key":"ref45","article-title":"FILIP: Fine-grained interactive language-image pre-training","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yao","year":"2022"},{"key":"ref46","first-page":"35544","article-title":"Improving clip training with language rewrites","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Fan","year":"2023"},{"key":"ref47","first-page":"17085","article-title":"Probabilistic contrastive learning recovers the correct aleatoric uncertainty of ambiguous inputs","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kirchhof","year":"2023"},{"key":"ref48","article-title":"Modeling uncertainty with hedged instance embedding","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Oh","year":"2019"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00831"},{"key":"ref50","first-page":"11934","article-title":"A differentiable semantic metric approximation in probabilistic embedding for cross-modal retrieval","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Li","year":"2022"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00501"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01430"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01262"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00700"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.556"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.759"},{"key":"ref57","first-page":"121475","article-title":"CogVLM: Visual expert for pretrained language models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wang","year":"2023"},{"issue":"1","key":"ref58","first-page":"6474","article-title":"Apricot: Submodular selection for data summarization in python","volume":"21","author":"Schreiber","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref59","article-title":"Auto-encoding variational Bayes","volume-title":"Proc. 2nd Int. Conf. Learn. Representations","author":"Kingma","year":"2014"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"article-title":"The caltech-UCSD birds-200-2011 dataset","year":"2011","author":"Wah","key":"ref61"},{"article-title":"Learning multiple layers of features from tiny images","year":"2009","author":"Krizhevsky","key":"ref62"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10599-4_29"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248092"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.77"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"article-title":"Adam: A method for stochastic optimization","year":"2014","author":"Kingma","key":"ref68"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00292"}],"container-title":["IEEE Transactions on Multimedia"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6046\/10844992\/10948345.pdf?arnumber=10948345","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,29]],"date-time":"2025-05-29T17:30:55Z","timestamp":1748539855000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10948345\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":69,"URL":"https:\/\/doi.org\/10.1109\/tmm.2025.3557677","relation":{},"ISSN":["1520-9210","1941-0077"],"issn-type":[{"type":"print","value":"1520-9210"},{"type":"electronic","value":"1941-0077"}],"subject":[],"published":{"date-parts":[[2025]]}}}