{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,19]],"date-time":"2026-04-19T06:02:11Z","timestamp":1776578531061,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,30]],"date-time":"2024-05-30T00:00:00Z","timestamp":1717027200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,30]]},"DOI":"10.1145\/3652583.3658014","type":"proceedings-article","created":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T06:30:40Z","timestamp":1717741840000},"page":"824-832","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":20,"title":["HybridHash: Hybrid Convolutional and Self-Attention Deep Hashing for Image Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-0678-8616","authenticated-orcid":false,"given":"Chao","family":"He","sequence":"first","affiliation":[{"name":"School of Computer Science, Inner Mongolia University, Provincial Key Laboratory of Mongolian Information Processing Technology, National and Local Joint Engineering Research Center of Mongolian Information Processing, Hohhot, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2570-4544","authenticated-orcid":false,"given":"Hongxi","family":"Wei","sequence":"additional","affiliation":[{"name":"School of Computer Science, Inner Mongolia University, Provincial Key Laboratory of Mongolian Information Processing Technology, National and Local Joint Engineering Research Center of Mongolian Information Processing, Hohhot, China"}]}],"member":"320","published-online":{"date-parts":[[2024,6,7]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Jamie Ryan Kiros, and Geoffrey E Hinton","author":"Ba Jimmy Lei","year":"2016","unstructured":"Jimmy Lei Ba, Jamie Ryan Kiros, and Geoffrey E Hinton. 2016. Layer normalization. arXiv preprint arXiv:1607.06450 (2016)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3097175"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00134"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.598"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/509907.509965"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3512527.3531405"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1646396.1646452"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2940693"},{"key":"e_1_3_2_1_10_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_11_1","first-page":"12","article-title":"Maximum Likelihood in Cost-Sensitive Learning: Model Specification, Approximations, and Upper Bounds","volume":"11","author":"Dmochowski Jacek P","year":"2010","unstructured":"Jacek P Dmochowski, Paul Sajda, and Lucas C Parra. 2010. Maximum Likelihood in Cost-Sensitive Learning: Model Specification, Approximations, and Upper Bounds. Journal of Machine Learning Research, Vol. 11 (2010), 12.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the International Conference on Learning Representations.","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, et al. 2021. An image is worth 16x16 words: Transformers for image recognition at scale. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/115"},{"key":"e_1_3_2_1_14_1","volume-title":"Rmt: Retentive networks meet vision transformers. arXiv preprint arXiv:2309.11523","author":"Fan Qihang","year":"2023","unstructured":"Qihang Fan, Huaibo Huang, Mingrui Chen, Hongmin Liu, and Ran He. 2023. Rmt: Retentive networks meet vision transformers. arXiv preprint arXiv:2309.11523 (2023)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.193"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01186"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2652730"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00548"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the Conference and Workshop on Neural Information Processing Systems. 15908--15919","author":"Han Kai","year":"2021","unstructured":"Kai Han, An Xiao, Enhua Wu, Jianyuan Guo, Chunjing Xu, and Yunhe Wang. 2021. Transformer in transformer. In Proceedings of the Conference and Workshop on Neural Information Processing Systems. 15908--15919."},{"key":"e_1_3_2_1_20_1","volume-title":"FasterViT: Fast Vision Transformers with Hierarchical Attention. arXiv preprint arXiv:2306.06189","author":"Hatamizadeh Ali","year":"2023","unstructured":"Ali Hatamizadeh, Greg Heinrich, Hongxu Yin, Andrew Tao, Jose M Alvarez, Jan Kautz, and Pavlo Molchanov. 2023. FasterViT: Fast Vision Transformers with Hierarchical Attention. arXiv preprint arXiv:2306.06189 (2023)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01474"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00834"},{"key":"e_1_3_2_1_24_1","unstructured":"Alex Krizhevsky Geoffrey Hinton et al. 2009. Learning multiple layers of features from tiny images. (2009)."},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the Conference and Workshop on Neural Information Processing Systems. 1106--1114","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. Imagenet classification with deep convolutional neural networks. In Proceedings of the Conference and Workshop on Neural Information Processing Systems. 1106--1114."},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the Conference and Workshop on Neural Information Processing Systems. 1042--1050","author":"Kulis Brian","year":"2009","unstructured":"Brian Kulis and Trevor Darrell. 2009. Learning to hash with binary reconstructive embeddings. In Proceedings of the Conference and Workshop on Neural Information Processing Systems. 1042--1050."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00714"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2022.3157517"},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the International Joint Conference on Artificial Intelligence. 1711--1717","author":"Li Wu-Jun","year":"2016","unstructured":"Wu-Jun Li, Sheng Wang, and Wang-Cheng Kang. 2016. Feature learning based deep supervised hashing with pairwise labels. In Proceedings of the International Joint Conference on Artificial Intelligence. 1711--1717."},{"key":"e_1_3_2_1_30_1","volume-title":"Msvit: training multiscale vision transformers for image retrieval","author":"Li Xue","year":"2023","unstructured":"Xue Li, Jiong Yu, Shaochen Jiang, Hongchun Lu, and Ziyang Li. 2023. Msvit: training multiscale vision transformers for image retrieval. IEEE Transactions on Multimedia (2023), 1--15."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.253"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2015.7301269"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.227"},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 2074--2081","author":"Liu Wei","year":"2012","unstructured":"Wei Liu, Jun Wang, Rongrong Ji, Yu-Gang Jiang, and Shih-Fu Chang. 2012. Supervised hashing with kernels. In Proceedings of the IEEE conference on computer vision and pattern recognition. 2074--2081."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01386"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1011139631724"},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings of the International Conference on Learning Representations.","author":"Park Namuk","year":"2022","unstructured":"Namuk Park and Songkuk Kim. 2022. How do vision transformers work?. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Olga Russakovsky Jia Deng Hao Su Jonathan Krause Sanjeev Satheesh Sean Ma Zhiheng Huang Andrej Karpathy Aditya Khosla Michael Bernstein et al. 2015. Imagenet large scale visual recognition challenge. International journal of computer vision Vol. 115 (2015) 211--252.","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of the International conference on machine learning. 6105--611","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural network. In Proceedings of the International conference on machine learning. 6105--611."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20053-3_27"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01270"},{"key":"e_1_3_2_1_44_1","volume-title":"Proceedings of the Conference and Workshop on Neural Information Processing Systems. 5998--6008","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Central similarity quantization for efficient image and video retrieval. In Proceedings of the Conference and Workshop on Neural Information Processing Systems. 5998--6008."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00542"},{"key":"e_1_3_2_1_46_1","volume-title":"Proceedings of the Conference and Workshop on Neural Information Processing Systems. 1753--1760","author":"Weiss Yair","year":"2008","unstructured":"Yair Weiss, Antonio Torralba, and Rob Fergus. 2008. Spectral hashing. In Proceedings of the Conference and Workshop on Neural Information Processing Systems. 1753--1760."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v28i1.8952"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00315"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/1835449.1835455"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298621"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20252"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2929957"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.04.037"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10235"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00995"}],"event":{"name":"ICMR '24: International Conference on Multimedia Retrieval","location":"Phuket Thailand","acronym":"ICMR '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia","SIGSOFT ACM Special Interest Group on Software Engineering"]},"container-title":["Proceedings of the 2024 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3658014","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3652583.3658014","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T08:44:32Z","timestamp":1755765872000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3658014"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,30]]},"references-count":56,"alternative-id":["10.1145\/3652583.3658014","10.1145\/3652583"],"URL":"https:\/\/doi.org\/10.1145\/3652583.3658014","relation":{},"subject":[],"published":{"date-parts":[[2024,5,30]]},"assertion":[{"value":"2024-06-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}