{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T17:31:14Z","timestamp":1769275874835,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":64,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key R&D Program of China","award":["No. 2022ZD0118100"],"award-info":[{"award-number":["No. 2022ZD0118100"]}]},{"name":"National Natural Science Foundation of China","award":["No. U1936210, U1936208"],"award-info":[{"award-number":["No. U1936210, U1936208"]}]},{"name":"Shenzhen Science and Technology Program","award":["No. 202206193000001, 0220816225523001"],"award-info":[{"award-number":["No. 202206193000001, 0220816225523001"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612283","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:12Z","timestamp":1698391632000},"page":"6400-6409","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Hi-SIGIR: Hierachical Semantic-Guided Image-to-image Retrieval via Scene Graph"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-9854-4885","authenticated-orcid":false,"given":"Yulu","family":"Wang","sequence":"first","affiliation":[{"name":"Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6262-982X","authenticated-orcid":false,"given":"Pengwen","family":"Dai","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2018-9344","authenticated-orcid":false,"given":"Xiaojun","family":"Jia","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9716-5626","authenticated-orcid":false,"given":"Zhitao","family":"Zeng","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4847-2324","authenticated-orcid":false,"given":"Rui","family":"Li","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7141-708X","authenticated-orcid":false,"given":"Xiaochun","family":"Cao","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Ranking via sinkhorn propagation. arXiv preprint arXiv:1106.1925","author":"Adams Ryan Prescott","year":"2011","unstructured":"Ryan Prescott Adams and Richard S Zemel. 2011. Ranking via sinkhorn propagation. arXiv preprint arXiv:1106.1925 (2011)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.572"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/11744023_32"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2009.5459303"},{"key":"e_1_3_2_1_5_1","volume-title":"Signature verification using a \"siamese\" time delay neural network. Advances in neural information processing systems 6","author":"Bromley Jane","year":"1993","unstructured":"Jane Bromley, Isabelle Guyon, Yann LeCun, Eduard S\u00e4ckinger, and Roopak Shah. 1993. Signature verification using a \"siamese\" time delay neural network. Advances in neural information processing systems 6 (1993)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10660-013-9104-5"},{"key":"e_1_3_2_1_7_1","volume-title":"Deep learning for instance retrieval: A survey","author":"Chen Wei","year":"2022","unstructured":"Wei Chen, Yu Liu, Weiping Wang, Erwin M Bakker, Theodoros Georgiou, Paul Fieguth, Li Liu, and Michael S Lew. 2022. Deep learning for instance retrieval: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence (2022)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3080920"},{"key":"e_1_3_2_1_10_1","volume-title":"Deep graph matching consensus. arXiv preprint arXiv:2001.09621","author":"Fey Matthias","year":"2020","unstructured":"Matthias Fey, Jan E Lenssen, Christopher Morris, Jonathan Masci, and Nils M Kriege. 2020. Deep graph matching consensus. arXiv preprint arXiv:2001.09621 (2020)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46466-4_15"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-017-1016-8"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.560"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00731"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1973.4309314"},{"key":"e_1_3_2_1_17_1","unstructured":"Chris Harris Mike Stephens et al. 1988. A combined corner and edge detector. In Alvey vision conference Vol. 15. Citeseer 10--5244."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2022.3141262"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413732"},{"key":"e_1_3_2_1_21_1","volume-title":"Image retrieval using color and shape. Pattern recognition 29, 8","author":"Jain Anil K","year":"1996","unstructured":"Anil K Jain and Aditya Vailaya. 1996. Image retrieval using color and shape. Pattern recognition 29, 8 (1996), 1233--1244."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123429"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_33"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01304"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1013208.1013210"},{"key":"e_1_3_2_1_27_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_28_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and MaxWelling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/s41109-019-0195-3"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126542"},{"key":"e_1_3_2_1_31_1","volume-title":"International Conference on Machine Learning. PMLR, 3835--3845","author":"Li Yujia","year":"2019","unstructured":"Yujia Li, Chenjie Gu, Thomas Dullien, Oriol Vinyals, and Pushmeet Kohli. 2019. Graph matching networks for learning the similarity of graph structured objects. In International Conference on Machine Learning. PMLR, 3835--3845."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_36"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58574-7_3"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10618-020-00733-5"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3087514"},{"key":"e_1_3_2_1_38_1","first-page":"4090","article-title":"A Tale of HodgeRank and Spectral Method: Target Attack Against Rank Aggregation is the Fixed Point of Adversarial Game","volume":"45","author":"Ma Ke","year":"2022","unstructured":"Ke Ma, Qianqian Xu, Jinshan Zeng, Guorong Li, Xiaochun Cao, and Qingming Huang. 2022. A Tale of HodgeRank and Spectral Method: Target Attack Against Rank Aggregation is the Fixed Point of Adversarial Game. IEEE Transactions on Pattern Analysis and Machine Intelligence 45, 4 (2022), 4090--4108.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.531803"},{"key":"e_1_3_2_1_40_1","volume-title":"Learning latent permutations with gumbel-sinkhorn networks. arXiv preprint arXiv:1802.08665","author":"Mena Gonzalo","year":"2018","unstructured":"Gonzalo Mena, David Belanger, Scott Linderman, and Jasper Snoek. 2018. Learning latent permutations with gumbel-sinkhorn networks. arXiv preprint arXiv:1802.08665 (2018)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijmedinf.2003.11.024"},{"key":"e_1_3_2_1_42_1","volume-title":"A deep local and global scene-graph matching for image-text retrieval. arXiv preprint arXiv:2106.02400","author":"Nguyen Manh-Duy","year":"2021","unstructured":"Manh-Duy Nguyen, Binh T Nguyen, and Cathal Gurrin. 2021. A deep local and global scene-graph matching for image-text retrieval. arXiv preprint arXiv:2106.02400 (2021)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.374"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540009"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2007.383172"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2008.4587635"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.303"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_1"},{"key":"e_1_3_2_1_50_1","volume-title":"Searching for activation functions. arXiv preprint arXiv:1710.05941","author":"Ramachandran Prajit","year":"2017","unstructured":"Prajit Ramachandran, Barret Zoph, and Quoc V Le. 2017. Searching for activation functions. arXiv preprint arXiv:1710.05941 (2017)."},{"key":"e_1_3_2_1_51_1","volume-title":"Sentence-bert: Sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084","author":"Reimers Nils","year":"2019","unstructured":"Nils Reimers and Iryna Gurevych. 2019. Sentence-bert: Sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084 (2019)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.275"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00130487"},{"key":"e_1_3_2_1_54_1","volume-title":"International Conference on Machine Learning. PMLR, 6105--6114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International Conference on Machine Learning. PMLR, 6105--6114."},{"key":"e_1_3_2_1_55_1","volume-title":"Particular object retrieval with integral max-pooling of CNN activations. arXiv preprint arXiv:1511.05879","author":"Tolias Giorgos","year":"2015","unstructured":"Giorgos Tolias, Ronan Sicre, and Herv\u00e9 J\u00e9gou. 2015. Particular object retrieval with integral max-pooling of CNN activations. arXiv preprint arXiv:1511.05879 (2015)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2894498"},{"key":"e_1_3_2_1_57_1","volume-title":"International Conference on Machine Learning. PMLR, 23318--23340","author":"Wang Peng","year":"2022","unstructured":"Peng Wang, An Yang, Rui Men, Junyang Lin, Shuai Bai, Zhikang Li, Jianxin Ma, Chang Zhou, Jingren Zhou, and Hongxia Yang. 2022. Ofa: Unifying architectures, tasks, and modalities through a simple sequence-to-sequence learning framework. In International Conference on Machine Learning. PMLR, 23318--23340."},{"key":"e_1_3_2_1_58_1","first-page":"5261","article-title":"Neural graph matching network: Learning lawler's quadratic assignment problem with extension to hypergraph and multiple-graph matching","volume":"44","author":"Wang Runzhong","year":"2021","unstructured":"Runzhong Wang, Junchi Yan, and Xiaokang Yang. 2021. Neural graph matching network: Learning lawler's quadratic assignment problem with extension to hypergraph and multiple-graph matching. IEEE Transactions on Pattern Analysis and Machine Intelligence 44, 9 (2021), 5261--5279.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093614"},{"key":"e_1_3_2_1_60_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Xu Kelvin","year":"2015","unstructured":"Kelvin Xu, Jimmy Ba, Ryan Kiros, Kyunghyun Cho, Aaron Courville, Ruslan Salakhudinov, Rich Zemel, and Yoshua Bengio. 2015. Show, attend and tell: Neural image caption generation with visual attention. In International Conference on Machine Learning. PMLR, 2048--2057."},{"key":"e_1_3_2_1_61_1","volume-title":"How powerful are graph neural networks? arXiv preprint arXiv:1810.00826","author":"Xu Keyulu","year":"2018","unstructured":"Keyulu Xu, Weihua Hu, Jure Leskovec, and Stefanie Jegelka. 2018. How powerful are graph neural networks? arXiv preprint arXiv:1810.00826 (2018)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2783417"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17281"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00611"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612283","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612283","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:01:43Z","timestamp":1755820903000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612283"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":64,"alternative-id":["10.1145\/3581783.3612283","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612283","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}