{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,18]],"date-time":"2026-01-18T07:05:27Z","timestamp":1768719927665,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61571071,61906025,61932009"],"award-info":[{"award-number":["61571071,61906025,61932009"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Science and Technology Research Program of Chongqing Municipal Education Commission under Grant","award":["KJQN201900607, KJQN202000647"],"award-info":[{"award-number":["KJQN201900607, KJQN202000647"]}]},{"DOI":"10.13039\/501100013223","name":"Chongqing Research Program of Basic Research and Frontier Technology","doi-asserted-by":"publisher","award":["cstc2018jcyjAX0227,cstc2020jcyj-msxmX0835"],"award-info":[{"award-number":["cstc2018jcyjAX0227,cstc2020jcyj-msxmX0835"]}],"id":[{"id":"10.13039\/501100013223","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,17]]},"DOI":"10.1145\/3474085.3475424","type":"proceedings-article","created":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T04:52:26Z","timestamp":1634532746000},"page":"4958-4966","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Video-to-Image Casting: A Flatting Method for Video Analysis"],"prefix":"10.1145","author":[{"given":"Xu","family":"Chen","sequence":"first","affiliation":[{"name":"Chongqing University of Posts and Telecommunications &amp; Chongqing Key Laboratory of Signal and Information Processing, Chongqing, China"}]},{"given":"Chenqiang","family":"Gao","sequence":"additional","affiliation":[{"name":"Chongqing University of Posts and Telecommunications &amp; Chongqing Key Laboratory of Signal and Information Processing, Chongqing, China"}]},{"given":"Feng","family":"Yang","sequence":"additional","affiliation":[{"name":"Chongqing University of Posts and Telecommunications &amp; Chongqing Key Laboratory of Signal and Information Processing, Chongqing, China"}]},{"given":"Xiaohan","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University &amp; Baidu Research, Hangzhou, China"}]},{"given":"Yi","family":"Yang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Yahong","family":"Han","sequence":"additional","affiliation":[{"name":"Tianjin University &amp; Peng Cheng Laboratory, Tianjin, China"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3050069"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00319"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2011.221"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"e_1_3_2_1_8_1","first-page":"5","article-title":"The","volume":"1","author":"Goyal Raghav","year":"2017","unstructured":"Raghav Goyal , Samira Ebrahimi Kahou , Vincent Michalski , Joanna Materzynska , Susanne Westphal , Heuna Kim , Valentin Haenel , Ingo Fruend , Peter Yianilos , Moritz Mueller-Freitag , 2017 . The \" Something Something\" Video Database for Learning and Evaluating Visual Common Sense.. In ICCV , Vol. 1. 5 . Raghav Goyal, Samira Ebrahimi Kahou, Vincent Michalski, Joanna Materzynska, Susanne Westphal, Heuna Kim, Valentin Haenel, Ingo Fruend, Peter Yianilos, Moritz Mueller-Freitag, et al. 2017. The\" Something Something\" Video Database for Learning and Evaluating Visual Common Sense.. In ICCV, Vol. 1. 5.","journal-title":"In ICCV"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.333"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_20"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00186"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00685"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.179"},{"key":"e_1_3_2_1_14_1","volume-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift. arXiv preprint arXiv:1502.03167","author":"Ioffe Sergey","year":"2015","unstructured":"Sergey Ioffe and Christian Szegedy . 2015. Batch normalization: Accelerating deep network training by reducing internal covariate shift. arXiv preprint arXiv:1502.03167 ( 2015 ). Sergey Ioffe and Christian Szegedy. 2015. Batch normalization: Accelerating deep network training by reducing internal covariate shift. arXiv preprint arXiv:1502.03167 (2015)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.59"},{"key":"e_1_3_2_1_16_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling . 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 ( 2016 ). Thomas N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.79"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350978"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00099"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00718"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123343"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3231741"},{"key":"e_1_3_2_1_23_1","volume-title":"Video cloze procedure for self-supervised spatio-temporal learning. arXiv preprint arXiv:2001.00294","author":"Luo Dezhao","year":"2020","unstructured":"Dezhao Luo , Chang Liu , Yu Zhou , Dongbao Yang , Can Ma , Qixiang Ye , and Weiping Wang . 2020. Video cloze procedure for self-supervised spatio-temporal learning. arXiv preprint arXiv:2001.00294 ( 2020 ). Dezhao Luo, Chang Liu, Yu Zhou, Dongbao Yang, Can Ma, Qixiang Ye, and Weiping Wang. 2020. Video cloze procedure for self-supervised spatio-temporal learning. arXiv preprint arXiv:2001.00294 (2020)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_32"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.178"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.278"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.590"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.5555\/2968826.2968890"},{"key":"e_1_3_2_1_29_1","volume-title":"Amir Roshan Zamir, and Mubarak Shah","author":"Soomro Khurram","year":"2012","unstructured":"Khurram Soomro , Amir Roshan Zamir, and Mubarak Shah . 2012 . UCF101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012). Khurram Soomro, Amir Roshan Zamir, and Mubarak Shah. 2012. UCF101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00675"},{"key":"e_1_3_2_1_33_1","volume-title":"Instance normalization: The missing ingredient for fast stylization. arXiv preprint arXiv:1607.08022","author":"Ulyanov Dmitry","year":"2016","unstructured":"Dmitry Ulyanov , Andrea Vedaldi , and Victor Lempitsky . 2016. Instance normalization: The missing ingredient for fast stylization. arXiv preprint arXiv:1607.08022 ( 2016 ). Dmitry Ulyanov, Andrea Vedaldi, and Victor Lempitsky. 2016. Instance normalization: The missing ingredient for fast stylization. arXiv preprint arXiv:1607.08022 (2016)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00413"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_25"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00037"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_19"},{"key":"e_1_3_2_1_40_1","volume-title":"Empirical evaluation of rectified activations in convolutional network. arXiv preprint arXiv:1505.00853","author":"Xu Bing","year":"2015","unstructured":"Bing Xu , Naiyan Wang , Tianqi Chen , and Mu Li. 2015. Empirical evaluation of rectified activations in convolutional network. arXiv preprint arXiv:1505.00853 ( 2015 ). Bing Xu, Naiyan Wang, Tianqi Chen, and Mu Li. 2015. Empirical evaluation of rectified activations in convolutional network. arXiv preprint arXiv:1505.00853 (2015)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01058"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00067"},{"key":"e_1_3_2_1_43_1","volume-title":"Action4d: Real-time action recognition in the crowd and clutter. arXiv preprint arXiv:1806.02424","author":"You Quanzeng","year":"2018","unstructured":"Quanzeng You and Hao Jiang . 2018. Action4d: Real-time action recognition in the crowd and clutter. arXiv preprint arXiv:1806.02424 ( 2018 ). Quanzeng You and Hao Jiang. 2018. Action4d: Real-time action recognition in the crowd and clutter. arXiv preprint arXiv:1806.02424 (2018)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126474"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2012.2226526"},{"key":"e_1_3_2_1_46_1","volume-title":"International Conference on Learning Representations .","author":"Zhang Shiwen","year":"2019","unstructured":"Shiwen Zhang , Sheng Guo , Weilin Huang , Matthew R Scott , and Limin Wang . 2019 . V4D: 4D Convolutional Neural Networks for Video-level Representation Learning . In International Conference on Learning Representations . Shiwen Zhang, Sheng Guo, Weilin Huang, Matthew R Scott, and Limin Wang. 2019. V4D: 4D Convolutional Neural Networks for Video-level Representation Learning. In International Conference on Learning Representations ."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.5555\/3327144.3327148"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_49"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_43"}],"event":{"name":"MM '21: ACM Multimedia Conference","location":"Virtual Event China","acronym":"MM '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 29th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475424","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3474085.3475424","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:48:32Z","timestamp":1750193312000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475424"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":49,"alternative-id":["10.1145\/3474085.3475424","10.1145\/3474085"],"URL":"https:\/\/doi.org\/10.1145\/3474085.3475424","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}