{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T00:22:22Z","timestamp":1743121342182,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":30,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819736256"},{"type":"electronic","value":"9789819736263"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-3626-3_20","type":"book-chapter","created":{"date-parts":[[2024,6,20]],"date-time":"2024-06-20T10:07:45Z","timestamp":1718878065000},"page":"269-283","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["MABC-Net: Multimodal Mixed Attentional Network with\u00a0Balanced Class for\u00a0Temporal Forgery Localization"],"prefix":"10.1007","author":[{"given":"Haonan","family":"Cheng","sequence":"first","affiliation":[]},{"given":"Haixin","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Li","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Long","family":"Ye","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,6,21]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Agarwal, S., Farid, H., Fried, O., Agrawala, M.: Detecting deep-fake videos from phoneme-viseme mismatches. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pp. 2814\u20132822 (2020)","key":"20_CR1","DOI":"10.1109\/CVPRW50498.2020.00338"},{"doi-asserted-by":"crossref","unstructured":"Bagchi, A., Mahmood, J., Fernandes, D., Sarvadevabhatla, R.K.: Hear me out: Fusional approaches for audio augmented temporal action localization. arXiv preprint arXiv:2106.14118 (2021)","key":"20_CR2","DOI":"10.5220\/0010832700003124"},{"doi-asserted-by":"crossref","unstructured":"Cai, Z., Ghosh, S., Gedeon, T., Dhall, A., Stefanov, K., Hayat, M.: \u201cglitch in the matrix!\u201d: A large scale benchmark for content driven audio-visual forgery detection and localization. arXiv preprint arXiv:2305.01979 (2023)","key":"20_CR3","DOI":"10.1016\/j.cviu.2023.103818"},{"doi-asserted-by":"crossref","unstructured":"Cai, Z., Stefanov, K., Dhall, A., Hayat, M.: Do you really mean that? content driven audio-visual deepfake dataset and multimodal method for temporal forgery localization. In: Proceedings of the International Conference on Digital Image Computing: Techniques and Applications (DICTA), pp. 1\u201310 (2022)","key":"20_CR4","DOI":"10.1109\/DICTA56598.2022.10034605"},{"unstructured":"Cao, K., Wei, C., Gaidon, A., Arechiga, N., Ma, T.: Learning imbalanced datasets with label-distribution-aware margin loss. Advances in neural information processing systems 32 (2019)","key":"20_CR5"},{"unstructured":"Cheng, H., Guo, Y., Wang, T., Li, Q., Ye, T., Nie, L.: Voice-face homogeneity tells deepfake. arXiv preprint arXiv:2203.02195 (2022)","key":"20_CR6"},{"doi-asserted-by":"crossref","unstructured":"Chugh, K., Gupta, P., Dhall, A., Subramanian, R.: Not made for each other- audio-visual dissonance-based deepfake detection and localization. In: Proceedings of the ACM International Conference on Multimedia (ACM MM) (2020)","key":"20_CR7","DOI":"10.1145\/3394171.3413700"},{"doi-asserted-by":"crossref","unstructured":"Chung, J.S., Zisserman, A.: Out of time: Automated lip sync in the wild. In: Proceedings of the Asian Conference on Computer Vision Workshops (ACCVW), pp. 251\u2013263 (2017)","key":"20_CR8","DOI":"10.1007\/978-3-319-54427-4_19"},{"doi-asserted-by":"crossref","unstructured":"Cozzolino, D., Nie\u00dfner, M., Verdoliva, L.: Audio-visual person-of-interest deepfake detection. arXiv preprint arXiv:2204.03083 (2022)","key":"20_CR9","DOI":"10.1109\/CVPRW59228.2023.00101"},{"unstructured":"Dolhansky, B., Bitton, J., Pflaum, B., Lu, J., Howes, R., Wang, M., Ferrer, C.C.: The deepfake detection challenge (dfdc) dataset. arXiv preprint arXiv:2006.07397 (2020)","key":"20_CR10"},{"doi-asserted-by":"crossref","unstructured":"Hong, Y., Han, S., Choi, K., Seo, S., Kim, B., Chang, B.: Disentangling label distribution for long-tailed visual recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6626\u20136636 (2021)","key":"20_CR11","DOI":"10.1109\/CVPR46437.2021.00656"},{"key":"20_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2023.110124","volume":"136","author":"H Ilyas","year":"2023","unstructured":"Ilyas, H., Javed, A., Malik, K.M.: Avfakenet: a unified end-to-end dense swin transformer deep learning model for audio-visual deepfakes detection. Appl. Soft Comput. 136, 110124 (2023)","journal-title":"Appl. Soft Comput."},{"unstructured":"Khalid, H., Tariq, S., Kim, M., Woo, S.S.: Fakeavceleb: A novel audio-video multimodal deepfake dataset. arXiv preprint arXiv:2108.05080 (2021)","key":"20_CR13"},{"unstructured":"Korshunov, P., et al.: Tampered speaker inconsistency detection with phonetically aware audio-visual features. In: Proceedings of the International Conference on Machine Learning (ICML), pp.\u00a01\u20135 (2019)","key":"20_CR14"},{"doi-asserted-by":"crossref","unstructured":"Korshunov, P., Marcel, S.: Speaker inconsistency detection in tampered video. In: Proceedings of the European Signal Processing Conference (EUSIPCO), pp. 2375\u20132379 (2018)","key":"20_CR15","DOI":"10.23919\/EUSIPCO.2018.8553270"},{"doi-asserted-by":"crossref","unstructured":"Lin, T., Liu, X., Li, X., Ding, E., Wen, S.: Bmn: boundary-matching network for temporal action proposal generation. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), pp. 3889\u20133898 (2019)","key":"20_CR16","DOI":"10.1109\/ICCV.2019.00399"},{"doi-asserted-by":"crossref","unstructured":"Lin, T., Zhao, X., Su, H., Wang, C., Yang, M.: Bsn: boundary sensitive network for temporal action proposal generation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018)","key":"20_CR17","DOI":"10.1007\/978-3-030-01225-0_1"},{"doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), pp. 2980\u20132988 (2017)","key":"20_CR18","DOI":"10.1109\/ICCV.2017.324"},{"doi-asserted-by":"crossref","unstructured":"Lomnitz, M., Hampel-Arias, Z., Sandesara, V., Hu, S.: Multimodal approach for deepfake detection. In: Proceedings of the Applied Imagery Pattern Recognition Workshop (AIPRW), pp.\u00a01\u20139 (2020)","key":"20_CR19","DOI":"10.1109\/AIPR50011.2020.9425192"},{"doi-asserted-by":"crossref","unstructured":"Mittal, T., Bhattacharya, U., Chandra, R., Bera, A., Manocha, D.: Emotions don\u2019t lie: an audio-visual deepfake detection method using affective cues. In: Proceedings of the ACM International Conference on Multimedia (ACM MM), pp. 2823\u20132832 (2020)","key":"20_CR20","DOI":"10.1145\/3394171.3413570"},{"unstructured":"Nawhal, M., Mori, G.: Activity graph transformer for temporal action localization. arXiv preprint arXiv:2101.08540 (2021)","key":"20_CR21"},{"key":"20_CR22","first-page":"4175","volume":"33","author":"J Ren","year":"2020","unstructured":"Ren, J., Yu, C., Ma, X., Zhao, H., Yi, S., et al.: Balanced meta-softmax for long-tailed visual recognition. Adv. Neural. Inf. Process. Syst. 33, 4175\u20134186 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"doi-asserted-by":"crossref","unstructured":"Tian, Y., Li, D., Xu, C.: Unified multisensory perception: weakly-supervised audio-visual video parsing. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 436\u2013454 (2020)","key":"20_CR23","DOI":"10.1007\/978-3-030-58580-8_26"},{"doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3d convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), pp. 4489\u20134497 (2015)","key":"20_CR24","DOI":"10.1109\/ICCV.2015.510"},{"unstructured":"Wang, G., Zhang, P., Xie, L., Huang, W., Zha, Y., Zhang, Y.: An audio-visual attention based multimodal network for fake talking face videos detection. arXiv preprint arXiv:2203.05178 (2022)","key":"20_CR25"},{"doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.Y., Kweon, I.S.: Cbam: convolutional block attention module. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018)","key":"20_CR26","DOI":"10.1007\/978-3-030-01234-2_1"},{"doi-asserted-by":"crossref","unstructured":"Xu, Z., Liu, R., Yang, S., Chai, Z., Yuan, C.: Learning imbalanced data with vision transformers. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 15793\u201315803 (2023)","key":"20_CR27","DOI":"10.1109\/CVPR52729.2023.01516"},{"key":"20_CR28","doi-asserted-by":"publisher","first-page":"2015","DOI":"10.1109\/TIFS.2023.3262148","volume":"18","author":"W Yang","year":"2023","unstructured":"Yang, W., Zhou, X., Chen, Z., Guo, B., Ba, Z., Xia, Z., Cao, X., Ren, K.: Avoid-df: audio-visual joint learning for detecting deepfake. IEEE Trans. Inf. Forensics Secur. 18, 2015\u20132029 (2023)","journal-title":"IEEE Trans. Inf. Forensics Secur."},{"doi-asserted-by":"crossref","unstructured":"Yu, J., Cheng, Y., Zhao, R.W., Feng, R., Zhang, Y.: Mm-pyramid: Multimodal pyramid attentional network for audio-visual event localization and video parsing. In: Proceedings of the ACM International Conference on Multimedia (ACM MM), pp. 6241\u20136249 (2022)","key":"20_CR29","DOI":"10.1145\/3503161.3547869"},{"doi-asserted-by":"crossref","unstructured":"Zhou, Y., Lim, S.N.: Joint audio-visual deepfake detection. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), pp. 14800\u201314809 (2021)","key":"20_CR30","DOI":"10.1109\/ICCV48922.2021.01453"}],"container-title":["Communications in Computer and Information Science","Digital Multimedia Communications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-3626-3_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,20]],"date-time":"2024-06-20T10:20:10Z","timestamp":1718878810000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-3626-3_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819736256","9789819736263"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-3626-3_20","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"21 June 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IFTC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Forum on Digital TV and Wireless Multimedia Communications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Beijing","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 December 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 December 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iftc2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.siga.org.cn\/xshd\/iftc2023.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}