{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,13]],"date-time":"2025-11-13T07:24:05Z","timestamp":1763018645460,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T00:00:00Z","timestamp":1686528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62102186"],"award-info":[{"award-number":["62102186"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"publisher","award":["BK20200725"],"award-info":[{"award-number":["BK20200725"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,12]]},"DOI":"10.1145\/3591106.3592269","type":"proceedings-article","created":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T22:33:38Z","timestamp":1686263618000},"page":"425-433","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["TsP-Tran: Two-Stage Pure Transformer for Multi-Label Image Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5695-4706","authenticated-orcid":false,"given":"Ying","family":"Li","sequence":"first","affiliation":[{"name":"School of Computer and Electronic Information \/ School of Artificial Intelligence, Nanjing Normal University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3498-3159","authenticated-orcid":false,"given":"Chunming","family":"Guan","sequence":"additional","affiliation":[{"name":"School of Computer and Electronic Information \/ School of Artificial Intelligence, Nanjing Normal University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2983-9921","authenticated-orcid":false,"given":"Jiaquan","family":"Gao","sequence":"additional","affiliation":[{"name":"School of Computer and Electronic Information \/ School of Artificial Intelligence, Nanjing Normal University, China"}]}],"member":"320","published-online":{"date-parts":[[2023,6,12]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"CoRR abs\/1607.06450","author":"Ba Lei\u00a0Jimmy","year":"2016","unstructured":"Lei\u00a0Jimmy Ba, Jamie\u00a0Ryan Kiros, and Geoffrey\u00a0E. Hinton. 2016. Layer Normalization. CoRR abs\/1607.06450 (2016)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_2_1","DOI":"10.1109\/TIP.2018.2863028"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_3_1","DOI":"10.1109\/CVPR.2018.00134"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_4_1","DOI":"10.1109\/ICCV.2017.598"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_5_1","DOI":"10.1109\/ICCV48922.2021.00950"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_6_1","DOI":"10.1145\/3512527.3531405"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_7_1","DOI":"10.1145\/1646396.1646452"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_8_1","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_9_1","volume-title":"Pre-training of deep bidirectional transformers for language understanding In: Proceedings of the 2019 Conference of the North American","author":"Devlin J","year":"2019","unstructured":"J Devlin, MW Chang, K Lee, and KB Toutanova. 2019. Pre-training of deep bidirectional transformers for language understanding In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). Minneapolis, MN: Association for Computational Linguistics (2019), 4171\u201386."},{"key":"e_1_3_2_1_10_1","volume-title":"Words: Transformers for Image Recognition at Scale.","author":"Dosovitskiy A.","year":"2021","unstructured":"A. Dosovitskiy, L. Beyer, A. Kolesnikov, D. Weissenborn, and N. Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. (2021)."},{"key":"e_1_3_2_1_11_1","volume-title":"The pascal visual object classes challenge 2007 (voc2007) development kit","author":"Everingham Mark","year":"2007","unstructured":"Mark Everingham and John Winn. 2007. The pascal visual object classes challenge 2007 (voc2007) development kit. University of Leeds, Tech. Rep (2007)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_12_1","DOI":"10.1145\/3372278.3390716"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_13_1","DOI":"10.1109\/CVPR.2019.00033"},{"key":"e_1_3_2_1_14_1","volume-title":"Iterative quantization: A procrustean approach to learning binary codes for large-scale image retrieval","author":"Gong Yunchao","year":"2012","unstructured":"Yunchao Gong, Svetlana Lazebnik, Albert Gordo, and Florent Perronnin. 2012. Iterative quantization: A procrustean approach to learning binary codes for large-scale image retrieval. IEEE transactions on pattern analysis and machine intelligence 35, 12 (2012), 2916\u20132929."},{"key":"e_1_3_2_1_15_1","volume-title":"Iterative quantization: A procrustean approach to learning binary codes for large-scale image retrieval","author":"Gong Yunchao","year":"2012","unstructured":"Yunchao Gong, Svetlana Lazebnik, Albert Gordo, and Florent Perronnin. 2012. Iterative quantization: A procrustean approach to learning binary codes for large-scale image retrieval. IEEE transactions on pattern analysis and machine intelligence 35, 12 (2012), 2916\u20132929."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_16_1","DOI":"10.1109\/CVPR52688.2022.01553"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_17_1","DOI":"10.1109\/CVPR.2016.90"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_18_1","DOI":"10.1609\/aaai.v32i1.11814"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_19_1","DOI":"10.1609\/aaai.v30i1.10176"},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Machine Learning. PMLR, 5583\u20135594","author":"Kim Wonjae","year":"2021","unstructured":"Wonjae Kim, Bokyung Son, and Ildoo Kim. 2021. Vilt: Vision-and-language transformer without convolution or region supervision. In International Conference on Machine Learning. PMLR, 5583\u20135594."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_21_1","DOI":"10.1109\/TIP.2016.2545300"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_22_1","DOI":"10.1145\/3132847.3133084"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_23_1","DOI":"10.1109\/LSP.2022.3157517"},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence, IJCAI 2016","author":"Li Wu-Jun","year":"2016","unstructured":"Wu-Jun Li, Sheng Wang, and Wang-Cheng Kang. 2016. Feature Learning Based Deep Supervised Hashing with Pairwise Labels. In Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence, IJCAI 2016, New York, NY, USA, 9-15 July 2016, Subbarao Kambhampati (Ed.). IJCAI\/AAAI Press, 1711\u20131717."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_25_1","DOI":"10.1145\/3474085.3475695"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_26_1","DOI":"10.1109\/CVPR.2014.253"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_27_1","DOI":"10.1007\/978-3-319-10602-1_48"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_28_1","DOI":"10.1145\/3240508.3240516"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_29_1","DOI":"10.5555\/2354409.2355047"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_30_1","DOI":"10.1109\/CVPR52688.2022.01170"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_31_1","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_32_1","volume-title":"SGDR: Stochastic Gradient Descent with Warm Restarts. In 5th International Conference on Learning Representations, ICLR","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. SGDR: Stochastic Gradient Descent with Warm Restarts. In 5th International Conference on Learning Representations, ICLR 2017, Toulon, France, April 24-26, 2017, Conference Track Proceedings. OpenReview.net."},{"key":"e_1_3_2_1_33_1","volume-title":"Decoupled Weight Decay Regularization. In 7th International Conference on Learning Representations, ICLR 2019","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019. OpenReview.net."},{"key":"e_1_3_2_1_34_1","volume-title":"The power of asymmetry in binary hashing. Advances in neural information processing systems 26","author":"Neyshabur Behnam","year":"2013","unstructured":"Behnam Neyshabur, Nati Srebro, Russ\u00a0R Salakhutdinov, Yury Makarychev, and Payman Yadollahpour. 2013. The power of asymmetry in binary hashing. Advances in neural information processing systems 26 (2013)."},{"key":"e_1_3_2_1_35_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=HkAClQgA-","author":"Paulus Romain","year":"2018","unstructured":"Romain Paulus, Caiming Xiong, and Richard Socher. 2018. A Deep Reinforced Model for Abstractive Summarization. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=HkAClQgA-"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_36_1","DOI":"10.1109\/CVPR.2015.7298598"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_37_1","DOI":"10.1145\/3206025.3206027"},{"key":"e_1_3_2_1_38_1","volume-title":"International Conference on Machine Learning. PMLR, 10347\u201310357","author":"Touvron Hugo","year":"2021","unstructured":"Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, and Herv\u00e9 J\u00e9gou. 2021. Training data-efficient image transformers & distillation through attention. In International Conference on Machine Learning. PMLR, 10347\u201310357."},{"key":"e_1_3_2_1_39_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_40_1","DOI":"10.1145\/3007669.3007688"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_41_1","DOI":"10.1145\/3323873.3325032"},{"key":"e_1_3_2_1_42_1","first-page":"5720","article-title":"A2-Net: Learning Attribute-Aware Hash Codes for Large-Scale Fine-Grained Image Retrieval","volume":"34","author":"Wei Xiu-Shen","year":"2021","unstructured":"Xiu-Shen Wei, Yang Shen, Xuhao Sun, Han-Jia Ye, and Jian Yang. 2021. A2-Net: Learning Attribute-Aware Hash Codes for Large-Scale Fine-Grained Image Retrieval. Advances in Neural Information Processing Systems 34 (2021), 5720\u20135730.","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_43_1","DOI":"10.1145\/3078971.3078989"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_44_1","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"e_1_3_2_1_45_1","volume-title":"Proc. 13th AAAI Conf. Artif. Intell.3457\u20133463","author":"Yue Cao","year":"2016","unstructured":"Cao Yue, M Long, J Wang, Zhu Han, and Q Wen. 2016. Deep quantization network for efficient image retrieval. In Proc. 13th AAAI Conf. Artif. Intell.3457\u20133463."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_46_1","DOI":"10.1109\/TCSVT.2017.2771332"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_47_1","DOI":"10.1145\/2600428.2609600"},{"key":"e_1_3_2_1_48_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 1556\u20131564","author":"Zhao Fang","year":"2015","unstructured":"Fang Zhao, Yongzhen Huang, Liang Wang, and Tieniu Tan. 2015. Deep semantic ranking based hashing for multi-label image retrieval. In Proceedings of the IEEE conference on computer vision and pattern recognition. 1556\u20131564."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_49_1","DOI":"10.1109\/TIP.2021.3112011"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_50_1","DOI":"10.1007\/s11263-016-0889-2"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_51_1","DOI":"10.1609\/aaai.v30i1.10235"},{"key":"e_1_3_2_1_52_1","volume-title":"Deep Category-Aware Hashing for Object Retrieval in Multi-Label Image. In 2022 IEEE International Conference on Multimedia and Expo (ICME). IEEE, 1\u20136.","author":"Zou Yun","year":"2022","unstructured":"Yun Zou, Xiaoyan Tan, Jingkuan Song, Ke Zhou, and Fuhao Zou. 2022. Deep Category-Aware Hashing for Object Retrieval in Multi-Label Image. In 2022 IEEE International Conference on Multimedia and Expo (ICME). IEEE, 1\u20136."}],"event":{"sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"acronym":"ICMR '23","name":"ICMR '23: International Conference on Multimedia Retrieval","location":"Thessaloniki Greece"},"container-title":["Proceedings of the 2023 ACM International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592269","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3591106.3592269","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:30Z","timestamp":1750178250000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592269"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,12]]},"references-count":52,"alternative-id":["10.1145\/3591106.3592269","10.1145\/3591106"],"URL":"https:\/\/doi.org\/10.1145\/3591106.3592269","relation":{},"subject":[],"published":{"date-parts":[[2023,6,12]]},"assertion":[{"value":"2023-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}