{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:21:07Z","timestamp":1750220467935,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Chinese Knowledge Center for Engineering Sciences and Technology"},{"name":"Alibaba-Zhejiang University Joint Institute of Frontier Technologies"},{"name":"the Fundamental Research Funds for the Central Universities"},{"name":"National Key R&D Program of China","award":["2018AAA0101900"],"award-info":[{"award-number":["2018AAA0101900"]}]},{"name":"Alibaba Group through Alibaba Innovative Research Program"},{"name":"NSFC","award":["U19B2042, 62072399"],"award-info":[{"award-number":["U19B2042, 62072399"]}]},{"name":"MoE Engineering Research Center of Digital Library"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,17]]},"DOI":"10.1145\/3474085.3475702","type":"proceedings-article","created":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T06:35:51Z","timestamp":1634538951000},"page":"5591-5599","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["DRDF"],"prefix":"10.1145","author":[{"given":"Haiwen","family":"Hong","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Xuan","family":"Jin","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"given":"Yin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Yunqing","family":"Hu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Jingfeng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Yuan","family":"He","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"given":"Hui","family":"Xue","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.667"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01104"},{"volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT (1).","year":"2019","author":"Devlin Jacob","key":"e_1_3_2_1_3_1"},{"volume-title":"Deformable Kernels: Adapting Effective Receptive Fields for Object Deformation. In International Conference on Learning Representations.","year":"2019","author":"Gao Hang","key":"e_1_3_2_1_4_1"},{"volume-title":"Dynamic neural networks: A survey. arXiv preprint arXiv:2102.04906","year":"2021","author":"Han Yizeng","key":"e_1_3_2_1_5_1"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.450"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Jie Hu Li Shen and Gang Sun. 2018. Squeeze-and-excitation networks. In Proceed- ings of the IEEE conference on computer vision and pattern recognition. 7132--7141.  Jie Hu Li Shen and Gang Sun. 2018. Squeeze-and-excitation networks. In Proceed- ings of the IEEE conference on computer vision and pattern recognition. 7132--7141.","DOI":"10.1109\/CVPR.2018.00745"},{"volume-title":"Supervised multimodal bitransformers for classifying images and text. arXiv preprint arXiv:1909.02950","year":"2019","author":"Kiela Douwe","key":"e_1_3_2_1_9_1"},{"volume-title":"The Hateful Memes Chal- lenge: Detecting Hate Speech in Multimodal Memes. Advances in Neural Information Processing Systems 33","year":"2020","author":"Kiela Douwe","key":"e_1_3_2_1_10_1"},{"key":"e_1_3_2_1_11_1","unstructured":"Alex Krizhevsky Geoffrey Hinton etal 2009. Learning multiple layers of features from tiny images. (2009).  Alex Krizhevsky Geoffrey Hinton et al. 2009. Learning multiple layers of features from tiny images. (2009)."},{"volume-title":"Tiny imagenet visual recognition challenge. CS 231N 7","year":"2015","author":"Le Ya","key":"e_1_3_2_1_12_1"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6795"},{"volume-title":"Visualbert: A simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557","year":"2019","author":"Li Liunian Harold","key":"e_1_3_2_1_14_1"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00060"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3454289"},{"volume-title":"Manuel Montes-y G\u00f3mez, and Fabio A Gonz\u00e1lez","year":"2017","author":"Arevalo Ovalle John Edison","key":"e_1_3_2_1_17_1"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1238"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-71278-5_7"},{"key":"e_1_3_2_1_20_1","unstructured":"K Simonyan and A Zisserman. 2015. Very deep convolutional networks for large-scale image recognition. (2015).  K Simonyan and A Zisserman. 2015. Very deep convolutional networks for large-scale image recognition. (2015)."},{"volume-title":"VL-BERT: Pre-training of Generic Visual-Linguistic Representations. In International Conference on Learning Representations.","year":"2019","author":"Su Weijie","key":"e_1_3_2_1_21_1"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1514"},{"volume-title":"Sarah Adel Bargal, and Joseph E Gonzalez","year":"2020","author":"Wan Alvin","key":"e_1_3_2_1_23_1"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3454404"},{"volume-title":"Wide Residual Networks. In British Machine Vision Conference","year":"2016","author":"Zagoruyko Sergey","key":"e_1_3_2_1_26_1"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01308"}],"event":{"name":"MM '21: ACM Multimedia Conference","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Virtual Event China","acronym":"MM '21"},"container-title":["Proceedings of the 29th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475702","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3474085.3475702","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:48:25Z","timestamp":1750193305000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475702"}},"subtitle":["Determining the Importance of Different Multimodal Information with Dual-Router Dynamic Framework"],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":27,"alternative-id":["10.1145\/3474085.3475702","10.1145\/3474085"],"URL":"https:\/\/doi.org\/10.1145\/3474085.3475702","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}