{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:43:52Z","timestamp":1763192632283,"version":"3.45.0"},"reference-count":23,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100002701","name":"Ministry of Education","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002701","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,12]]},"DOI":"10.1109\/waspaa66052.2025.11230949","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:46:47Z","timestamp":1763146007000},"page":"1-5","source":"Crossref","is-referenced-by-count":0,"title":["Contrastive Representation Learning for Privacy-Preserving Fine-Tuning of Audio-Visual Speech Recognition"],"prefix":"10.1109","author":[{"given":"Luca","family":"Becker","sequence":"first","affiliation":[{"name":"Ruhr-Universit&#x00E4;t Bochum,Institute of Communication Acoustics,Bochum,Germany"}]},{"given":"Rainer","family":"Martin","sequence":"additional","affiliation":[{"name":"Ruhr-Universit&#x00E4;t Bochum,Institute of Communication Acoustics,Bochum,Germany"}]}],"member":"263","reference":[{"article-title":"Learning audiovisual speech representation by masked multimodal cluster prediction","volume-title":"International Conference on Learning Representations (ICLR)","author":"Shi","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11671"},{"article-title":"Representation learning with contrastive predictive coding","year":"2019","author":"van den Oord","key":"ref3"},{"article-title":"Adalora: Adaptive budget allocation for parameter-efficient fine-tuning","volume-title":"International Conference on Learning Representations","author":"Zhang","key":"ref4"},{"article-title":"Lrs3ted: a large-scale dataset for visual speech recognition","year":"2018","author":"Afouras","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/FG59268.2024.10582018"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2018-1929"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/11787006_1"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-48910-X_16"},{"key":"ref10","first-page":"368","article-title":"The information bottleneck method","volume-title":"The 37th annual Allerton Conference on Communication, Control, and Computing","author":"Tishby"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3108063"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969125"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"ref14","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proceedings of the 38th International Conference on Machine Learning, ICML 2021","volume":"139","author":"Radford"},{"article-title":"Learning factorized multimodal representations","volume-title":"International Conference on Learning Representations (ICLR)","author":"Tsai","key":"ref15"},{"article-title":"Lora: Low-rank adaptation of large language models","volume-title":"International Conference on Learning Representations (ICLR)","author":"Hu","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-2279"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1561\/0400000042"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2018.2822810"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10888977"},{"issue":"15","key":"ref22","article-title":"Reliability-based large-vocabulary audio-visual speech recognition","volume-title":"Sensors","volume":"22","author":"Yu","year":"2022"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02591"}],"event":{"name":"2025 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","start":{"date-parts":[[2025,10,12]]},"location":"Tahoe City, CA, USA","end":{"date-parts":[[2025,10,15]]}},"container-title":["2025 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11230875\/11230917\/11230949.pdf?arnumber=11230949","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:41:16Z","timestamp":1763192476000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11230949\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/waspaa66052.2025.11230949","relation":{},"subject":[],"published":{"date-parts":[[2025,10,12]]}}}