{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:08:45Z","timestamp":1755907725470,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":16,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,18]],"date-time":"2023-11-18T00:00:00Z","timestamp":1700265600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the National Natural Science Foundation of China","award":["61602085"],"award-info":[{"award-number":["61602085"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,18]]},"DOI":"10.1145\/3603273.3635666","type":"proceedings-article","created":{"date-parts":[[2024,1,9]],"date-time":"2024-01-09T18:12:40Z","timestamp":1704823960000},"page":"229-232","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["ISES: Instance-level Semantic Enhanced Score Model for Image-Text Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4879-1388","authenticated-orcid":false,"given":"Bo","family":"Lu","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Dalian Minzu University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9302-2040","authenticated-orcid":false,"given":"Hanxing","family":"Cai","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Dalian Minzu University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8970-7447","authenticated-orcid":false,"given":"Yangyang","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Dalian Minzu University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6250-1351","authenticated-orcid":false,"given":"Zhengchun","family":"Shi","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Dalian Minzu University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1775-6280","authenticated-orcid":false,"given":"Xiaodong","family":"Duan","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Dalian Minzu University, China"}]}],"member":"320","published-online":{"date-parts":[[2024,1,9]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence.page","author":"Diao Haiwen","year":"2022","unstructured":"Haiwen Diao, Ying Zhang, Lin Ma, and Huchuan Lu. Similarity reasoning and filtration for image-text matching. Proceedings of the AAAI Conference on Artificial Intelligence.page 1218-1226, Sep 2022"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the first USENIX workshop on Offensive Technologies (WOOT \u201907)","author":"Gundy Matthew Van","year":"2007","unstructured":"Matthew Van Gundy, Davide Balzarotti, and Giovanni Vigna. 2007. Catch me, if you can: Evading network signatures with web-based polymorphic worms. In Proceedings of the first USENIX workshop on Offensive Technologies (WOOT \u201907) . USENIX Association, Berkley, CA, Article 7, 9 pages."},{"key":"e_1_3_2_1_3_1","unstructured":"Kunpeng Li Yulun Zhang Kai Li Yuanyuan Li and YunFu. Image-text embedding learning via visual and textual semantic reasoning"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00475"},{"key":"e_1_3_2_1_5_1","article-title":"Hierarchical feature aggregation based on transformer for image-text matching","volume":"6437","author":"Dong Xinfeng","year":"2022","unstructured":"Xinfeng Dong, Huaxiang Zhang, Lei Zhu, Liqiang Nie, and Li Liu. Hierarchical feature aggregation based on transformer for image-text matching. IEEE Transactions on Circuits and Systems for Video Technology, page 6437-6447.Sep 2022","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology, page"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/106"},{"key":"e_1_3_2_1_7_1","volume-title":"Coder: Coupled diversity-sensitive momentum contrastive learning for image-text retrieval","author":"Wang Haoran","year":"2022","unstructured":"Haoran Wang, Dongliang He, Wenhao Wu, Boyang Xia, MinYang, Fu Li, Yunlong Yu, Zhong Ji, Errui Ding, and Jingdong Wang. Coder: Coupled diversity-sensitive momentum contrastive learning for image-text retrieval. Aug 2022"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3141603"},{"key":"e_1_3_2_1_9_1","first-page":"15661","volume-title":"Wang Q and Zhang Y D 2022 Negative-aware attention framework for image-text matching Proc. of the IEEE\/CVF Conf. on Computer Vision and Pattern Recognition","author":"Zhang","unstructured":"Zhang K, Mao Z D, Wang Q and Zhang Y D 2022 Negative-aware attention framework for image-text matching Proc. of the IEEE\/CVF Conf. on Computer Vision and Pattern Recognition pp 15661-70"},{"key":"e_1_3_2_1_10_1","article-title":"On the role of correlation and abstraction in cross-modal multimedia retrieval","volume":"521","author":"Pereira Jose Costa","year":"2014","unstructured":"Jose Costa Pereira, Emanuele Coviello Gabriel Doyle.Nikhil Rasiwasia, Gert R. G. Lanckriet, Roger Levy, and Nuno Vasconcelos. On the role of correlation and abstraction in cross-modal multimedia retrieval. IEEE Transactions on Pattern Analysis and Machine Intelligence, page 521-535.Mar 2014","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence, page"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Zhedong Zheng Liang Zheng Michael Garrett Yi Yang Mingliang Xu and Y-Dong Shen. Dual-path convolutional image-text embeddings with instance loss. ACM Transactions on Multimedia Computing Communications and Applications (TOMM) 16(2):1-23 2020","DOI":"10.1145\/3383184"},{"key":"e_1_3_2_1_12_1","first-page":"740","volume-title":"ECCV","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays. Pietro Perona, Deva Ramanan, Piotr Dollar, and C Lawrence Zitnick. Microsoft coco: Common objects in context. In ECCV, pages 740-755.Springer, 2014"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Peter Young Alice Lai Micah Hodosh and Julia Hockenmaier. From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions. 2:67-78 2014","DOI":"10.1162\/tacl_a_00166"},{"key":"e_1_3_2_1_14_1","first-page":"201","volume-title":"Hu H D H and He X D 2018 Stacked cross attention for image-text matching Proc. of the European Conf. Computer Vision (ECCV) part IV","author":"Lee K H","unstructured":"Lee K H, Chen X, Hu H D H and He X D 2018 Stacked cross attention for image-text matching Proc. of the European Conf. Computer Vision (ECCV) part IV pp 201-216"},{"key":"e_1_3_2_1_15_1","first-page":"4653","volume-title":"Li Y Y and Fu Y 2019 Visual semantic reasoning for image-text matching Proc. of the IEEE\/CVF Int. Conf. on Computer Vision","author":"Li K P","unstructured":"Li K P, Zhang Y L, Li K, Li Y Y and Fu Y 2019 Visual semantic reasoning for image-text matching Proc. of the IEEE\/CVF Int. Conf. on Computer Vision (Seoul, Korea) pp 4653-61"},{"key":"e_1_3_2_1_16_1","first-page":"12655","volume-title":"Liu J and Han J G 2020 IMRAM: Iterative Matching with Recurrent Attention Memory for cross-modal image-text retrieval Proc. of the IEEE\/CVF Conf. on Computer Vision and Pattern Recognition","author":"Chen","unstructured":"Chen H, Ding G G, Liu X D, Lin Z J, Liu J and Han J G 2020 IMRAM: Iterative Matching with Recurrent Attention Memory for cross-modal image-text retrieval Proc. of the IEEE\/CVF Conf. on Computer Vision and Pattern Recognition pp 12655-63"}],"event":{"name":"AAIA 2023: 2023 International Conference on Advances in Artificial Intelligence and Applications","acronym":"AAIA 2023","location":"Wuhan China"},"container-title":["Proceedings of the 2023 International Conference on Advances in Artificial Intelligence and Applications"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3603273.3635666","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3603273.3635666","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T19:34:16Z","timestamp":1755891256000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3603273.3635666"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,18]]},"references-count":16,"alternative-id":["10.1145\/3603273.3635666","10.1145\/3603273"],"URL":"https:\/\/doi.org\/10.1145\/3603273.3635666","relation":{},"subject":[],"published":{"date-parts":[[2023,11,18]]},"assertion":[{"value":"2024-01-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}