{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:04:16Z","timestamp":1750309456165,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":14,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100020595","name":"National Science and Technology Council","doi-asserted-by":"publisher","award":["112-2222-E-011-004-MY2"],"award-info":[{"award-number":["112-2222-E-011-004-MY2"]}],"id":[{"id":"10.13039\/100020595","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1145\/3696409.3700290","type":"proceedings-article","created":{"date-parts":[[2024,12,28]],"date-time":"2024-12-28T09:55:23Z","timestamp":1735379723000},"page":"1-5","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Description-Driven Audiovisual Embedding Space Learning for Enhanced Movie Understanding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-0523-8585","authenticated-orcid":false,"given":"Wei-Lun","family":"Huang","sequence":"first","affiliation":[{"name":"National Taiwan University of Science and Technology, Taipei, TW"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7382-3126","authenticated-orcid":false,"given":"Shao-Hung","family":"Wu","sequence":"additional","affiliation":[{"name":"National Tsing Hua University, Hsinchu, TW"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0721-4165","authenticated-orcid":false,"given":"Hung-Chang","family":"Huang","sequence":"additional","affiliation":[{"name":"KKCompany Technologies, Taipei, TW"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1917-2155","authenticated-orcid":false,"given":"Min-Chun","family":"Hu","sequence":"additional","affiliation":[{"name":"National Tsing Hua University, Hsinchu, TW"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8570-1575","authenticated-orcid":false,"given":"Tse-Yu","family":"Pan","sequence":"additional","affiliation":[{"name":"National Taiwan University of Science and Technology, Taipei, TW"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,12,28]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Hassan Akbari Liangzhe Yuan Rui Qian Wei-Hong Chuang Shih-Fu Chang Yin Cui and Boqing Gong. 2021. Vatt: Transformers for multimodal self-supervised learning from raw video audio and text. Advances in Neural Information Processing Systems 34 (2021) 24206\u201324221."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00632"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00967"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095889"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01569"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01457"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_41"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.5555\/2002472.2002491"},{"key":"e_1_3_3_1_10_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_3_1_11_2","unstructured":"Aaron van\u00a0den Oord Yazhe Li and Oriol Vinyals. 2019. Representation Learning with Contrastive Predictive Coding. arxiv:https:\/\/arXiv.org\/abs\/1807.03748\u00a0[cs.LG]"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00192"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00950"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01589"},{"key":"e_1_3_3_1_15_2","unstructured":"Bin Zhu Bin Lin Munan Ning Yang Yan Jiaxi Cui HongFa Wang Yatian Pang Wenhao Jiang Junwu Zhang Zongwei Li et\u00a0al. 2023. Languagebind: Extending video-language pretraining to n-modality by language-based semantic alignment. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.01852 (2023)."}],"event":{"name":"MMAsia '24: ACM Multimedia Asia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Auckland New Zealand","acronym":"MMAsia '24"},"container-title":["Proceedings of the 6th ACM International Conference on Multimedia in Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696409.3700290","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696409.3700290","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:10:16Z","timestamp":1750295416000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696409.3700290"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":14,"alternative-id":["10.1145\/3696409.3700290","10.1145\/3696409"],"URL":"https:\/\/doi.org\/10.1145\/3696409.3700290","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]},"assertion":[{"value":"2024-12-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}