{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T18:56:54Z","timestamp":1773082614809,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,12,1]],"date-time":"2021-12-01T00:00:00Z","timestamp":1638316800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Collaborative Innovation Center of Novel Software Technology and Industrialization"},{"name":"National Science Foundation of China","award":["62072232"],"award-info":[{"award-number":["62072232"]}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"publisher","award":["BK20191248"],"award-info":[{"award-number":["BK20191248"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010877","name":"Science, Technology and Innovation Commission of Shenzhen Municipality","doi-asserted-by":"publisher","award":["JCYJ20180307151516166"],"award-info":[{"award-number":["JCYJ20180307151516166"]}],"id":[{"id":"10.13039\/501100010877","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,12]]},"DOI":"10.1145\/3469877.3493599","type":"proceedings-article","created":{"date-parts":[[2022,1,10]],"date-time":"2022-01-10T18:24:29Z","timestamp":1641839069000},"page":"1-5","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Hybrid Improvements in Multimodal Analysis for Deep Video Understanding"],"prefix":"10.1145","author":[{"given":"Beibei","family":"Zhang","sequence":"first","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, China"}]},{"given":"Fan","family":"Yu","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, China and Shenzhen Research Institute of Nanjing University, China"}]},{"given":"Yaqun","family":"Fang","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, China"}]},{"given":"Tongwei","family":"Ren","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, China and Shenzhen Research Institute of Nanjing University, China"}]},{"given":"Gangshan","family":"Wu","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, China"}]}],"member":"320","published-online":{"date-parts":[[2022,1,10]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372278.3390742"},{"key":"e_1_3_2_1_3_1","volume-title":"RetinaFace: Single-Shot Multi-Level Face Localisation in the Wild. In IEEE Conference on Computer Vision and Pattern Recognition. 5203\u20135212","author":"Deng Jiankang","year":"2020","unstructured":"Jiankang Deng , Jia Guo , Evangelos Ververas , Irene Kotsia , and Stefanos Zafeiriou . 2020 . RetinaFace: Single-Shot Multi-Level Face Localisation in the Wild. In IEEE Conference on Computer Vision and Pattern Recognition. 5203\u20135212 . Jiankang Deng, Jia Guo, Evangelos Ververas, Irene Kotsia, and Stefanos Zafeiriou. 2020. RetinaFace: Single-Shot Multi-Level Face Localisation in the Wild. In IEEE Conference on Computer Vision and Pattern Recognition. 5203\u20135212."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the International AAAI Conference on Web and Social Media, Vol.\u00a08.","author":"Hutto Clayton","year":"2014","unstructured":"Clayton Hutto and Eric Gilbert . 2014 . Vader: A parsimonious rule-based model for sentiment analysis of social media text . In Proceedings of the International AAAI Conference on Web and Social Media, Vol.\u00a08. Clayton Hutto and Eric Gilbert. 2014. Vader: A parsimonious rule-based model for sentiment analysis of social media text. In Proceedings of the International AAAI Conference on Web and Social Media, Vol.\u00a08."},{"key":"e_1_3_2_1_6_1","unstructured":"Gregory Koch Richard Zemel Ruslan Salakhutdinov 2015. Siamese neural networks for one-shot image recognition. In ICML deep learning workshop Vol.\u00a02. Lille.  Gregory Koch Richard Zemel Ruslan Salakhutdinov 2015. Siamese neural networks for one-shot image recognition. In ICML deep learning workshop Vol.\u00a02. Lille."},{"key":"e_1_3_2_1_7_1","volume-title":"Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky , Ilya Sutskever , and Geoffrey\u00a0 E Hinton . 2012. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25 ( 2012 ), 1097\u20131105. Alex Krizhevsky, Ilya Sutskever, and Geoffrey\u00a0E Hinton. 2012. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25 (2012), 1097\u20131105."},{"key":"e_1_3_2_1_8_1","unstructured":"Adam Santoro Sergey Bartunov Matthew Botvinick Daan Wierstra and Timothy Lillicrap. 2016. One-shot learning with memory-augmented neural networks. arXiv preprint arXiv:1605.06065(2016).  Adam Santoro Sergey Bartunov Matthew Botvinick Daan Wierstra and Timothy Lillicrap. 2016. One-shot learning with memory-augmented neural networks. arXiv preprint arXiv:1605.06065(2016)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Bing Shuai Andrew Berneshawi Xinyu Li Davide Modolo and Joseph Tighe. 2021. SiamMOT: Siamese Multi-Object Tracking. In CVPR.  Bing Shuai Andrew Berneshawi Xinyu Li Davide Modolo and Joseph Tighe. 2021. SiamMOT: Siamese Multi-Object Tracking. In CVPR.","DOI":"10.1109\/CVPR46437.2021.01219"},{"key":"e_1_3_2_1_10_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Two-stream convolutional networks for action recognition in videos. arXiv preprint arXiv:1406.2199(2014).  Karen Simonyan and Andrew Zisserman. 2014. Two-stream convolutional networks for action recognition in videos. arXiv preprint arXiv:1406.2199(2014)."},{"key":"e_1_3_2_1_11_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556(2014).  Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556(2014)."},{"key":"e_1_3_2_1_12_1","unstructured":"Jake Snell Kevin Swersky and Richard\u00a0S Zemel. 2017. Prototypical networks for few-shot learning. arXiv preprint arXiv:1703.05175(2017).  Jake Snell Kevin Swersky and Richard\u00a0S Zemel. 2017. Prototypical networks for few-shot learning. arXiv preprint arXiv:1703.05175(2017)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_1_14_1","volume-title":"Matching networks for one shot learning. Advances in neural information processing systems 29","author":"Vinyals Oriol","year":"2016","unstructured":"Oriol Vinyals , Charles Blundell , Timothy Lillicrap , Daan Wierstra , 2016. Matching networks for one shot learning. Advances in neural information processing systems 29 ( 2016 ), 3630\u20133638. Oriol Vinyals, Charles Blundell, Timothy Lillicrap, Daan Wierstra, 2016. Matching networks for one shot learning. Advances in neural information processing systems 29 (2016), 3630\u20133638."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01217"},{"key":"e_1_3_2_1_16_1","volume-title":"Deep Relationship Analysis in Video with Multimodal Feature Fusion. In ACM International Conference on Multimedia. 4640\u20134644","author":"Yu Fan","year":"2020","unstructured":"Fan Yu , DanDan Wang , Beibei Zhang , and Tongwei Ren . 2020 . Deep Relationship Analysis in Video with Multimodal Feature Fusion. In ACM International Conference on Multimedia. 4640\u20134644 . Fan Yu, DanDan Wang, Beibei Zhang, and Tongwei Ren. 2020. Deep Relationship Analysis in Video with Multimodal Feature Fusion. In ACM International Conference on Multimedia. 4640\u20134644."},{"key":"e_1_3_2_1_17_1","volume-title":"Joint Learning for Relationship and Interaction Analysis in Video with Multimodal Feature Fusion. In ACM International Conference on Multimedia.","author":"Zhang Beibei","year":"2021","unstructured":"Beibei Zhang , Fan Yu , Yaqun Fang , Tongwei Ren , and Gangshan Wu . 2021 . Joint Learning for Relationship and Interaction Analysis in Video with Multimodal Feature Fusion. In ACM International Conference on Multimedia. Beibei Zhang, Fan Yu, Yaqun Fang, Tongwei Ren, and Gangshan Wu. 2021. Joint Learning for Relationship and Interaction Analysis in Video with Multimodal Feature Fusion. In ACM International Conference on Multimedia."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_28"}],"event":{"name":"MMAsia '21: ACM Multimedia Asia","location":"Gold Coast Australia","acronym":"MMAsia '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["ACM Multimedia Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3469877.3493599","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3469877.3493599","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:28:16Z","timestamp":1750195696000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3469877.3493599"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12]]},"references-count":18,"alternative-id":["10.1145\/3469877.3493599","10.1145\/3469877"],"URL":"https:\/\/doi.org\/10.1145\/3469877.3493599","relation":{},"subject":[],"published":{"date-parts":[[2021,12]]},"assertion":[{"value":"2022-01-10","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}