{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T08:02:21Z","timestamp":1769932941674,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["Grant 62102186"],"award-info":[{"award-number":["Grant 62102186"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"publisher","award":["Grant BK20200725"],"award-info":[{"award-number":["Grant BK20200725"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612216","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:30Z","timestamp":1698391650000},"page":"6301-6310","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Tran-GCN: Multi-label Pattern Image Retrieval via Transformer Driven Graph Convolutional Network"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5695-4706","authenticated-orcid":false,"given":"Ying","family":"Li","sequence":"first","affiliation":[{"name":"Nanjing Normal University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3498-3159","authenticated-orcid":false,"given":"Chunming","family":"Guan","sequence":"additional","affiliation":[{"name":"Nanjing Normal University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0724-4000","authenticated-orcid":false,"given":"Rui","family":"Cai","sequence":"additional","affiliation":[{"name":"Nanjing Normal University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8784-4110","authenticated-orcid":false,"given":"Ye","family":"Erwan","sequence":"additional","affiliation":[{"name":"Nanjing Normal University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8970-4415","authenticated-orcid":false,"given":"Ding","family":"Yuxiang","sequence":"additional","affiliation":[{"name":"Nanjing Normal University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2983-9921","authenticated-orcid":false,"given":"Jiaquan","family":"Gao","sequence":"additional","affiliation":[{"name":"Nanjing Normal University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"R. Arandjelovic P. Gronat A. Torii T. Pajdla and J. Sivic. 2017. NetVLAD: CNN architecture for weakly supervised place recognition. IEEE Transactions on Pattern Analysis & Machine Intelligence (2017) 1--1.","DOI":"10.1109\/CVPR.2016.572"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2863028"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_43"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00134"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.598"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00061"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3512527.3531405"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00532"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME52920.2022.9860016"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1646396.1646452"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1423"},{"key":"e_1_3_2_1_14_1","volume-title":"Words: Transformers for Image Recognition at Scale.","author":"Dosovitskiy A.","year":"2021","unstructured":"A. Dosovitskiy, L. Beyer, A. Kolesnikov, D. Weissenborn, and N. Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. (2021)."},{"key":"e_1_3_2_1_15_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_16_1","first-page":"1655","article-title":"Fine-tuning CNN Image Retrieval with No Human Annotation","volume":"41","author":"Filip R.","year":"2017","unstructured":"R. Filip, T. Giorgos, and C. Ondrej. 2017. Fine-tuning CNN Image Retrieval with No Human Annotation. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 41 (2017), 1655--1668.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_18_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_19_1","volume-title":"Learning Multiple Layers of Features from Tiny Images","author":"Krizhevsky Alex","unstructured":"Alex Krizhevsky and Geoffrey Hinton. 2009. Learning Multiple Layers of Features from Tiny Images. Toronto, ON, Canada."},{"key":"e_1_3_2_1_20_1","unstructured":"A. Krizhevsky I. Sutskever and G. Hinton. 2012. ImageNet Classification with Deep Convolutional Neural Networks. Advances in neural information processing systems (2012)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01621"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2022.3157517"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence, IJCAI 2016","author":"Li Wu-Jun","year":"2016","unstructured":"Wu-Jun Li, Sheng Wang, and Wang-Cheng Kang. 2016. Feature Learning Based Deep Supervised Hashing with Pairwise Labels. In Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence, IJCAI 2016, New York, NY, USA, 9-15 July 2016, Subbarao Kambhampati (Ed.). IJCAI\/AAAI Press, 1711--1717."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475695"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.227"},{"key":"e_1_3_2_1_27_1","volume-title":"Query2label: A simple transformer way to multi-label classification. arXiv preprint arXiv:2107.10834","author":"Liu Shilong","year":"2021","unstructured":"Shilong Liu, Lei Zhang, Xiao Yang, Hang Su, and Jun Zhu. 2021b. Query2label: A simple transformer way to multi-label classification. arXiv preprint arXiv:2107.10834 (2021)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_29_1","volume-title":"SGDR: Stochastic Gradient Descent with Warm Restarts. In 5th International Conference on Learning Representations, ICLR","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. SGDR: Stochastic Gradient Descent with Warm Restarts. In 5th International Conference on Learning Representations, ICLR 2017, Toulon, France, April 24-26, 2017, Conference Track Proceedings. OpenReview.net."},{"key":"e_1_3_2_1_30_1","volume-title":"Decoupled Weight Decay Regularization. In 7th International Conference on Learning Representations, ICLR 2019","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019. OpenReview.net."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17098"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299097"},{"key":"e_1_3_2_1_34_1","unstructured":"K. Simonyan and A. Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. Computer Science (2014)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00019"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3206025.3206027"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"e_1_3_2_1_38_1","volume-title":"Going Deeper with Convolutions","author":"Szegedy Christian","year":"2014","unstructured":"Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, and Andrew Rabinovich. 2014. Going Deeper with Convolutions. IEEE Computer Society (2014)."},{"key":"e_1_3_2_1_39_1","volume-title":"International Conference on Machine Learning. PMLR, 10347--10357","author":"Touvron Hugo","year":"2021","unstructured":"Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, and Herv\u00e9 J\u00e9gou. 2021. Training data-efficient image transformers & distillation through attention. In International Conference on Machine Learning. PMLR, 10347--10357."},{"key":"e_1_3_2_1_40_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6909"},{"key":"e_1_3_2_1_42_1","volume-title":"A 2-Net: Learning Attribute-Aware Hash Codes for Large-Scale Fine-Grained Image Retrieval. Advances in Neural Information Processing Systems","author":"Wei Xiu-Shen","year":"2021","unstructured":"Xiu-Shen Wei, Yang Shen, Xuhao Sun, Han-Jia Ye, and Jian Yang. 2021. A 2-Net: Learning Attribute-Aware Hash Codes for Large-Scale Fine-Grained Image Retrieval. Advances in Neural Information Processing Systems (2021)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3078971.3078989"},{"key":"e_1_3_2_1_44_1","volume-title":"GM-MLIC: graph matching based multi-label image classification. arXiv preprint arXiv:2104.14762","author":"Wu Yanan","year":"2021","unstructured":"Yanan Wu, He Liu, Songhe Feng, Yi Jin, Gengyu Lyu, and Zizhang Wu. 2021. GM-MLIC: graph matching based multi-label image classification. arXiv preprint arXiv:2104.14762 (2021)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806375"},{"key":"e_1_3_2_1_46_1","volume-title":"Graph attention transformer network for multi-label image classification. ACM Transactions on Multimedia Computing, Communications and Applications","author":"Yuan Jin","year":"2023","unstructured":"Jin Yuan, Shikai Chen, Yao Zhang, Zhongchao Shi, Xin Geng, Jianping Fan, and Yong Rui. 2023. Graph attention transformer network for multi-label image classification. ACM Transactions on Multimedia Computing, Communications and Applications (2023)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"G. Zhang Z. Zeng S. Zhang Y. Zhang and W. Wu. 2017. SIFT Matching with CNN Evidences for Particular Object Retrieval. Neurocomputing (2017).","DOI":"10.1016\/j.neucom.2017.01.081"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2017.2771332"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2600428.2609600"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2929957"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0889-2"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10235"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612216","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612216","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:08:29Z","timestamp":1755821309000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612216"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":52,"alternative-id":["10.1145\/3581783.3612216","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612216","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}