{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T04:56:57Z","timestamp":1777525017574,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,10]],"date-time":"2024-06-10T00:00:00Z","timestamp":1717977600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"EU Horizon Europe","award":["101070093 vera.ai"],"award-info":[{"award-number":["101070093 vera.ai"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,10]]},"DOI":"10.1145\/3643491.3660287","type":"proceedings-article","created":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T06:19:37Z","timestamp":1717222777000},"page":"30-36","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Visual and audio scene classification for detecting discrepancies in video: a baseline method and experimental protocol"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9470-6332","authenticated-orcid":false,"given":"Konstantinos","family":"Apostolidis","sequence":"first","affiliation":[{"name":"Information Technologies Institute, CERTH, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4689-7944","authenticated-orcid":false,"given":"Jakob","family":"Abesser","sequence":"additional","affiliation":[{"name":"Fraunhofer Institute for Digital Media Technology, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5559-6508","authenticated-orcid":false,"given":"Luca","family":"Cuccovillo","sequence":"additional","affiliation":[{"name":"Fraunhofer Institute for Digital Media Technology, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0121-4364","authenticated-orcid":false,"given":"Vasileios","family":"Mezaris","sequence":"additional","affiliation":[{"name":"Information Technologies Institute, CERTH, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,6,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"A Review of Deep Learning Based Methods for Acoustic Scene Classification. Applied Sciences 10, 6","author":"Abe\u00dfer Jakob","year":"2020","unstructured":"Jakob Abe\u00dfer. 2020. A Review of Deep Learning Based Methods for Acoustic Scene Classification. Applied Sciences 10, 6 (2020)."},{"key":"e_1_3_2_1_2_1","volume-title":"How Robust are Audio Embeddings for Polyphonic Sound Event Tagging?IEEE\/ACM Transactions on Audio, Speech, and Language Processing 31","author":"Abe\u00dfer Jakob","year":"2023","unstructured":"Jakob Abe\u00dfer, Sascha Grollmisch, and Meinard M\u00fcller. 2023. How Robust are Audio Embeddings for Polyphonic Sound Event Tagging?IEEE\/ACM Transactions on Audio, Speech, and Language Processing 31 (2023), 2658\u20132667."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.238"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682475"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR).","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In Proceedings of the International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2019.8937231"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"e_1_3_2_1_8_1","volume-title":"Improving Semi-Supervised Learning for Audio Classification with FixMatch. Electronics 10, 15","author":"Grollmisch Sascha","year":"2021","unstructured":"Sascha Grollmisch and Estefan\u00eda Cano. 2021. Improving Semi-Supervised Learning for Audio Classification with FixMatch. Electronics 10, 15 (2021)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME55011.2023.00295"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.23919\/Eusipco47968.2020.9287327"},{"key":"e_1_3_2_1_13_1","volume-title":"Technical Report. DCASE2020 Challenge.","author":"Kim Byeonggeun","year":"2021","unstructured":"Byeonggeun Kim, Seunghan Yang, Jangho Kim, and Simyung Chang. 2021. QTI Submission to DCASE 2021: Residual Normalization for Device-Imbalanced Acoustic Scene Classification with Efficient Design. Technical Report. DCASE2020 Challenge."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3030497"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.23919\/Eusipco47968.2020.9287533"},{"key":"e_1_3_2_1_16_1","volume-title":"Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_1_17_1","volume-title":"Self-Attention Prediction Correction with Channel Suppression for Weakly-Supervised Semantic Segmentation. In 2023 IEEE International Conference on Multimedia and Expo (ICME). IEEE, 846\u2013851","author":"Sun Guoying","year":"2023","unstructured":"Guoying Sun and Meng Yang. 2023. Self-Attention Prediction Correction with Channel Suppression for Weakly-Supervised Semantic Segmentation. In 2023 IEEE International Conference on Multimedia and Expo (ICME). IEEE, 846\u2013851."},{"key":"e_1_3_2_1_18_1","volume-title":"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. In International Conference on Machine Learning. PMLR, 6105\u20136114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. In International Conference on Machine Learning. PMLR, 6105\u20136114."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132384.3132387"},{"key":"e_1_3_2_1_20_1","volume-title":"Advances in Neural Information Processing Systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention Is All You Need. Advances in Neural Information Processing Systems 30 (2017)."},{"key":"e_1_3_2_1_21_1","volume-title":"Computational Analysis of Sound Scenes and Events","author":"Virtanen Tuomas","unstructured":"Tuomas Virtanen, Mark\u00a0D. Plumbley, and Dan\u00a0(Eds.) Ellis. 2018. Computational Analysis of Sound Scenes and Events (1st ed.). Springer International Publishing.","edition":"1"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25353"},{"key":"e_1_3_2_1_23_1","unstructured":"Meng Wang Chengxin Chen Yuan Xie Hangting Chen Yuzhuo Liu and Pengyuan Zhang. 2021. Audio-Visual Scene Classification Using Transfer Learning and Hybrid Fusion Strategy. DCASE2021 Challenge Tech. Rep Tech. Rep. (2021)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9415085"}],"event":{"name":"ICMR '24: International Conference on Multimedia Retrieval","location":"Phuket Thailand","acronym":"ICMR '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["3rd ACM International Workshop on Multimedia AI against Disinformation"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643491.3660287","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643491.3660287","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,26]],"date-time":"2025-08-26T19:35:14Z","timestamp":1756236914000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643491.3660287"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,10]]},"references-count":24,"alternative-id":["10.1145\/3643491.3660287","10.1145\/3643491"],"URL":"https:\/\/doi.org\/10.1145\/3643491.3660287","relation":{},"subject":[],"published":{"date-parts":[[2024,6,10]]},"assertion":[{"value":"2024-06-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}