{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T07:59:52Z","timestamp":1770710392829,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T00:00:00Z","timestamp":1665360000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62101524"],"award-info":[{"award-number":["62101524"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,10]]},"DOI":"10.1145\/3503161.3547836","type":"proceedings-article","created":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T15:42:35Z","timestamp":1665416555000},"page":"3754-3763","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["Unsupervised Video Hashing with Multi-granularity Contextualization and Multi-structure Preservation"],"prefix":"10.1145","author":[{"given":"Yanbin","family":"Hao","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"given":"Jingru","family":"Duan","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"given":"Hao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Singapore Management University, Singapore, Singapore"}]},{"given":"Bin","family":"Zhu","sequence":"additional","affiliation":[{"name":"University of Bristol, Bristol, United Kingdom"}]},{"given":"Pengyuan","family":"Zhou","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"given":"Xiangnan","family":"He","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]}],"member":"320","published-online":{"date-parts":[[2022,10,10]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv preprint arXiv:1308.3432","author":"Bengio Yoshua","year":"2013","unstructured":"Yoshua Bengio , Nicholas L\u00e9onard , and Aaron Courville . 2013. Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv preprint arXiv:1308.3432 ( 2013 ). Yoshua Bengio, Nicholas L\u00e9onard, and Aaron Courville. 2013. Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv preprint arXiv:1308.3432 (2013)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.145"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3220162.3220168"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298862"},{"key":"e_1_3_2_2_6_1","volume-title":"Cross-Modality High-Frequency Transformer for MR Image Super-Resolution. arXiv preprint arXiv:2203.15314","author":"Fang Chaowei","year":"2022","unstructured":"Chaowei Fang , Dingwen Zhang , Liang Wang , Yulun Zhang , Lechao Cheng , and Junwei Han . 2022. Cross-Modality High-Frequency Transformer for MR Image Super-Resolution. arXiv preprint arXiv:2203.15314 ( 2022 ). Chaowei Fang, Dingwen Zhang, Liang Wang, Yulun Zhang, Lechao Cheng, and Junwei Han. 2022. Cross-Modality High-Frequency Transformer for MR Image Super-Resolution. arXiv preprint arXiv:2203.15314 (2022)."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-01001-9_5"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.Congress.2014.66"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2967225"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/11818175_3"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475241"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2737329"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2016.2610324"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2923121"},{"key":"e_1_3_2_2_15_1","volume-title":"Attention in Attention: Modeling Context Correlation for Efficient Video Classification","author":"Hao Yanbin","year":"2022","unstructured":"Yanbin Hao , Shuo Wang , Pei Cao , Xinjian Gao , Tong Xu , Jinmeng Wu , and Xiangnan He. 2022a. Attention in Attention: Modeling Context Correlation for Efficient Video Classification . IEEE Transactions on Circuits and Systems for Video Technology ( 2022 ). Yanbin Hao, Shuo Wang, Pei Cao, Xinjian Gao, Tong Xu, Jinmeng Wu, and Xiangnan He. 2022a. Attention in Attention: Modeling Context Correlation for Efficient Video Classification. IEEE Transactions on Circuits and Systems for Video Technology (2022)."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00100"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_18_1","volume-title":"Long short-term memory. Neural computation","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber . 1997. Long short-term memory. Neural computation , Vol. 9 , 8 ( 1997 ), 1735--1780. Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation, Vol. 9, 8 (1997), 1735--1780."},{"key":"e_1_3_2_2_19_1","volume-title":"Vision permutator: A permutable mlp-like architecture for visual recognition","author":"Hou Qibin","year":"2022","unstructured":"Qibin Hou , Zihang Jiang , Li Yuan , Ming-Ming Cheng , Shuicheng Yan , and Jiashi Feng . 2022. Vision permutator: A permutable mlp-like architecture for visual recognition . IEEE Transactions on Pattern Analysis and Machine Intelligence ( 2022 ). Qibin Hou, Zihang Jiang, Li Yuan, Ming-Ming Cheng, Shuicheng Yan, and Jiashi Feng. 2022. Vision permutator: A permutable mlp-like architecture for visual recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence (2022)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"e_1_3_2_2_21_1","volume-title":"Exploiting feature and class relationships in video categorization with regularized deep neural networks","author":"Jiang Yu-Gang","year":"2017","unstructured":"Yu-Gang Jiang , Zuxuan Wu , Jun Wang , Xiangyang Xue , and Shih-Fu Chang . 2017. Exploiting feature and class relationships in video categorization with regularized deep neural networks . IEEE transactions on pattern analysis and machine intelligence, Vol. 40 , 2 ( 2017 ), 352--364. Yu-Gang Jiang, Zuxuan Wu, Jun Wang, Xiangyang Xue, and Shih-Fu Chang. 2017. Exploiting feature and class relationships in video categorization with regularized deep neural networks. IEEE transactions on pattern analysis and machine intelligence, Vol. 40, 2 (2017), 352--364."},{"key":"e_1_3_2_2_22_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba . 2014 . Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014). Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3133030"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2946096"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00830"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01334"},{"key":"e_1_3_2_2_27_1","volume-title":"Structure-adaptive Neighborhood Preserving Hashing for Scalable Video Search","author":"Li Shuyan","year":"2021","unstructured":"Shuyan Li , Xiu Lia , Jiwen Lu , and Jie Zhou . 2021b. Structure-adaptive Neighborhood Preserving Hashing for Scalable Video Search . IEEE Transactions on Circuits and Systems for Video Technology ( 2021 ). Shuyan Li, Xiu Lia, Jiwen Lu, and Jie Zhou. 2021b. Structure-adaptive Neighborhood Preserving Hashing for Scalable Video Search. IEEE Transactions on Circuits and Systems for Video Technology (2021)."},{"key":"e_1_3_2_2_28_1","volume-title":"Deep unsupervised image hashing by maximizing bit entropy. arXiv preprint arXiv:2012.12334","author":"Li Yunqiang","year":"2020","unstructured":"Yunqiang Li and Jan van Gemert . 2020. Deep unsupervised image hashing by maximizing bit entropy. arXiv preprint arXiv:2012.12334 ( 2020 ). Yunqiang Li and Jan van Gemert. 2020. Deep unsupervised image hashing by maximizing bit entropy. arXiv preprint arXiv:2012.12334 (2020)."},{"key":"e_1_3_2_2_29_1","volume-title":"As-mlp: An axial shifted mlp architecture for vision. arXiv preprint arXiv:2107.08391","author":"Lian Dongze","year":"2021","unstructured":"Dongze Lian , Zehao Yu , Xing Sun , and Shenghua Gao . 2021 . As-mlp: An axial shifted mlp architecture for vision. arXiv preprint arXiv:2107.08391 (2021). Dongze Lian, Zehao Yu, Xing Sun, and Shenghua Gao. 2021. As-mlp: An axial shifted mlp architecture for vision. arXiv preprint arXiv:2107.08391 (2021)."},{"key":"e_1_3_2_2_30_1","first-page":"9204","article-title":"Pay attention to mlps","volume":"34","author":"Liu Hanxiao","year":"2021","unstructured":"Hanxiao Liu , Zihang Dai , David So , and Quoc V Le . 2021 . Pay attention to mlps . Advances in Neural Information Processing Systems , Vol. 34 (2021), 9204 -- 9215 . Hanxiao Liu, Zihang Dai, David So, and Quoc V Le. 2021. Pay attention to mlps. Advances in Neural Information Processing Systems, Vol. 34 (2021), 9204--9215.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_31_1","unstructured":"Wei Liu Jun Wang Sanjiv Kumar and Shih-Fu Chang. 2011. Hashing with graphs. In Icml.  Wei Liu Jun Wang Sanjiv Kumar and Shih-Fu Chang. 2011. Hashing with graphs. In Icml."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00817"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.5555\/850924.851523"},{"key":"e_1_3_2_2_34_1","volume-title":"Cimon: Towards high-quality hash codes. arXiv preprint arXiv:2010.07804","author":"Luo Xiao","year":"2020","unstructured":"Xiao Luo , Daqing Wu , Zeyu Ma , Chong Chen , Minghua Deng , Jinwen Ma , Zhongming Jin , Jianqiang Huang , and Xian-Sheng Hua . 2020 . Cimon: Towards high-quality hash codes. arXiv preprint arXiv:2010.07804 (2020). Xiao Luo, Daqing Wu, Zeyu Ma, Chong Chen, Minghua Deng, Jinwen Ma, Zhongming Jin, Jianqiang Huang, and Xian-Sheng Hua. 2020. Cimon: Towards high-quality hash codes. arXiv preprint arXiv:2010.07804 (2020)."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2021.107467"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-019-8229-7"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3356316"},{"key":"e_1_3_2_2_39_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman . 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 ( 2014 ). Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2072298.2072354"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2814344"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475218"},{"key":"e_1_3_2_2_43_1","volume-title":"The new data and new challenges in multimedia research. arXiv preprint arXiv:1503.01817","author":"Thomee Bart","year":"2015","unstructured":"Bart Thomee , David A Shamma , Gerald Friedland , Benjamin Elizalde , Karl Ni , Douglas Poland , Damian Borth , and Li-Jia Li. 2015. The new data and new challenges in multimedia research. arXiv preprint arXiv:1503.01817 , Vol. 1 , 8 ( 2015 ). Bart Thomee, David A Shamma, Gerald Friedland, Benjamin Elizalde, Karl Ni, Douglas Poland, Damian Borth, and Li-Jia Li. 2015. The new data and new challenges in multimedia research. arXiv preprint arXiv:1503.01817, Vol. 1, 8 (2015)."},{"key":"e_1_3_2_2_44_1","volume-title":"Advances in Neural Information Processing Systems","volume":"34","author":"Tolstikhin Ilya O","year":"2021","unstructured":"Ilya O Tolstikhin , Neil Houlsby , Alexander Kolesnikov , Lucas Beyer , Xiaohua Zhai , Thomas Unterthiner , Jessica Yung , Andreas Steiner , Daniel Keysers , Jakob Uszkoreit , 2021 . Mlp-mixer: An all-mlp architecture for vision . Advances in Neural Information Processing Systems , Vol. 34 (2021). Ilya O Tolstikhin, Neil Houlsby, Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Thomas Unterthiner, Jessica Yung, Andreas Steiner, Daniel Keysers, Jakob Uszkoreit, et al. 2021. Mlp-mixer: An all-mlp architecture for vision. Advances in Neural Information Processing Systems, Vol. 34 (2021)."},{"key":"e_1_3_2_2_45_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton . 2008 . Visualizing data using t-SNE . Journal of machine learning research , Vol. 9 , 11 (2008). Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_2_46_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan N Gomez , \u0141ukasz Kaiser , and Illia Polosukhin . 2017. Attention is all you need. Advances in neural information processing systems , Vol. 30 ( 2017 ). Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2015.2487976"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3314577","article-title":"Cross-modality retrieval by joint correlation learning","volume":"15","author":"Wang Shuo","year":"2019","unstructured":"Shuo Wang , Dan Guo , Xin Xu , Li Zhuo , and Meng Wang . 2019 a. Cross-modality retrieval by joint correlation learning . ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM) , Vol. 15 , 2s (2019), 1 -- 16 . Shuo Wang, Dan Guo, Xin Xu, Li Zhuo, and Meng Wang. 2019a. Cross-modality retrieval by joint correlation learning. ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM), Vol. 15, 2s (2019), 1--16.","journal-title":"ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM)"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2019.2963339"},{"key":"e_1_3_2_2_50_1","volume-title":"Spectral hashing. Advances in neural information processing systems","author":"Weiss Yair","year":"2008","unstructured":"Yair Weiss , Antonio Torralba , and Rob Fergus . 2008. Spectral hashing. Advances in neural information processing systems , Vol. 21 ( 2008 ). Yair Weiss, Antonio Torralba, and Rob Fergus. 2008. Spectral hashing. Advances in neural information processing systems, Vol. 21 (2008)."},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2882155"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/1291233.1291280"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00367"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00315"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/1835449.1835455"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475272"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2964308"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.1110"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00556"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01174"}],"event":{"name":"MM '22: The 30th ACM International Conference on Multimedia","location":"Lisboa Portugal","acronym":"MM '22","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 30th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503161.3547836","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3503161.3547836","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:35Z","timestamp":1750186955000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503161.3547836"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,10]]},"references-count":60,"alternative-id":["10.1145\/3503161.3547836","10.1145\/3503161"],"URL":"https:\/\/doi.org\/10.1145\/3503161.3547836","relation":{},"subject":[],"published":{"date-parts":[[2022,10,10]]},"assertion":[{"value":"2022-10-10","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}