{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T20:19:07Z","timestamp":1742933947488,"version":"3.40.3"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030930455"},{"type":"electronic","value":"9783030930462"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-93046-2_15","type":"book-chapter","created":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T05:30:01Z","timestamp":1641015001000},"page":"168-179","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DSGSR: Dynamic Semantic Generation and Similarity Reasoning for Image-Text Matching"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1779-855X","authenticated-orcid":false,"given":"Xiaojing","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2694-1023","authenticated-orcid":false,"given":"Bin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6781-5034","authenticated-orcid":false,"given":"Xiaohong","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6184-4771","authenticated-orcid":false,"given":"Xiaochun","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,1,1]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and VQA. CoRR abs\/1707.07998 (2017)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Chen, T., Luo, J.: Expressing objects just like words: recurrent visual embedding for image-text matching. In: The Thirty-Fourth AAAI Conference on Artificial Intelligence, AAAI 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, IAAI 2020, The Tenth AAAI Symposium on Educational Advances in Artificial Intelligence, EAAI 2020, New York, NY, USA, February 7\u201312, 2020, pp. 10583\u201310590. AAAI Press (2020)","DOI":"10.1609\/aaai.v34i07.6631"},{"key":"15_CR3","unstructured":"Faghri, F., Fleet, D.J., Kiros, J.R., Fidler, S.: VSE++: improving visual-semantic embeddings with hard negatives. In: British Machine Vision Conference 2018, BMVC 2018, Newcastle, UK, September 3\u20136, 2018, p. 12. BMVA Press (2018)"},{"key":"15_CR4","unstructured":"Gao, P., et al.: Dynamic fusion with intra- and inter- modality attention flow for visual question answering. CoRR abs\/1812.05252 (2018). http:\/\/arxiv.org\/abs\/1812.05252"},{"key":"15_CR5","doi-asserted-by":"crossref","unstructured":"Hu, Z., Luo, Y., Lin, J., Yan, Y., Chen, J.: Multi-level visual-semantic alignments with relation-wise dual attention network for image and text matching. In: Kraus, S. (ed.) Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, IJCAI 2019, Macao, China, August 10\u201316, 2019, pp. 789\u2013795. ijcai.org (2019)","DOI":"10.24963\/ijcai.2019\/111"},{"key":"15_CR6","doi-asserted-by":"crossref","unstructured":"Huang, Y., Wu, Q., Song, C., Wang, L.: Learning semantic concepts and order for image and sentence matching. In: 2018 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2018, Salt Lake City, UT, USA, June 18\u201322, 2018, pp. 6163\u20136171. IEEE Computer Society (2018)","DOI":"10.1109\/CVPR.2018.00645"},{"key":"15_CR7","doi-asserted-by":"publisher","unstructured":"Ito, T., Tsubouchi, K., Sakaji, H., Yamashita, T., Izumi, K.: Contextual sentiment neural network for document sentiment analysis. Data Sci. Eng. 5(2), 180\u2013192 (2020). https:\/\/doi.org\/10.1007\/s41019-020-00122-4","DOI":"10.1007\/s41019-020-00122-4"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Jiang, Q., Li, W.: Deep cross-modal hashing. CoRR abs\/1602.02255 (2016)","DOI":"10.1109\/CVPR.2017.348"},{"key":"15_CR9","unstructured":"Karpathy, A., Joulin, A., Li, F.: Deep fragment embeddings for bidirectional image sentence mapping. In: Ghahramani, Z., Welling, M., Cortes, C., Lawrence, N.D., Weinberger, K.Q. (eds.) Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014, December 8\u201313 2014, Montreal, Quebec, Canada, pp. 1889\u20131897 (2014)"},{"key":"15_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"212","DOI":"10.1007\/978-3-030-01225-0_13","volume-title":"Computer Vision \u2013 ECCV 2018","author":"K-H Lee","year":"2018","unstructured":"Lee, K.-H., Chen, X., Hua, G., Hu, H., He, X.: Stacked cross attention for image-text matching. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 212\u2013228. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_13"},{"key":"15_CR11","doi-asserted-by":"crossref","unstructured":"Li, G., Duan, N., Fang, Y., Gong, M., Jiang, D.: Unicoder-vl: a universal encoder for vision and language by cross-modal pre-training. In: The Thirty-Fourth AAAI Conference on Artificial Intelligence, AAAI 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, IAAI 2020, The Tenth AAAI Symposium on Educational Advances in Artificial Intelligence, EAAI 2020, New York, NY, USA, February 7\u201312, 2020, pp. 11336\u201311344. AAAI Press (2020)","DOI":"10.1609\/aaai.v34i07.6795"},{"key":"15_CR12","doi-asserted-by":"crossref","unstructured":"Li, S., Xiao, T., Li, H., Yang, W., Wang, X.: Identity-aware textual-visual matching with latent co-attention. In: IEEE International Conference on Computer Vision, ICCV 2017, Venice, Italy, October 22\u201329, 2017, pp. 1908\u20131917. IEEE Computer Society (2017)","DOI":"10.1109\/ICCV.2017.209"},{"key":"15_CR13","doi-asserted-by":"crossref","unstructured":"Liu, C., Mao, Z., Liu, A., Zhang, T., Wang, B., Zhang, Y.: Focus your attention: a bidirectional focal attention network for image-text matching. In: Amsaleg, L., et al. (eds.) Proceedings of the 27th ACM International Conference on Multimedia, MM 2019, Nice, France, October 21\u201325, 2019, pp. 3\u201311. ACM (2019)","DOI":"10.1145\/3343031.3350869"},{"key":"15_CR14","unstructured":"Lu, J., Batra, D., Parikh, D., Lee, S.: Vilbert: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In: Wallach, H.M., Larochelle, H., Beygelzimer, A., d\u2019Alch\u00e9-Buc, F., Fox, E.B., Garnett, R. (eds.) Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8\u201314, 2019, Vancouver, BC, Canada, pp. 13\u201323 (2019)"},{"key":"15_CR15","doi-asserted-by":"crossref","unstructured":"Mithun, N.C., Panda, R., Papalexakis, E.E., Roy-Chowdhury, A.K.: Webly supervised joint embedding for cross-modal image-text retrieval. In: Boll, S., et al. (eds.) 2018 ACM Multimedia Conference on Multimedia Conference, MM 2018, Seoul, Republic of Korea, October 22\u201326, 2018, pp. 1856\u20131864. ACM (2018)","DOI":"10.1145\/3240508.3240712"},{"issue":"6","key":"15_CR16","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren, S., He, K., Girshick, R.B., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"15_CR17","doi-asserted-by":"crossref","unstructured":"Sarafianos, N., Xu, X., Kakadiaris, I.A.: Adversarial representation learning for text-to-image matching. In: 2019 IEEE\/CVF International Conference on Computer Vision, ICCV 2019, Seoul, Korea (South), October 27 - November 2, 2019, pp. 5813\u20135823. IEEE (2019)","DOI":"10.1109\/ICCV.2019.00591"},{"key":"15_CR18","doi-asserted-by":"crossref","unstructured":"Shi, B., Ji, L., Lu, P., Niu, Z., Duan, N.: Knowledge aware semantic concept expansion for image-text matching. In: Kraus, S. (ed.) Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, IJCAI 2019, Macao, China, August 10\u201316, 2019, pp. 5182\u20135189. ijcai.org (2019)","DOI":"10.24963\/ijcai.2019\/720"},{"key":"15_CR19","unstructured":"Wang, B., Yang, Y., Xu, X., Hanjalic, A., Shen, H.T.: Adversarial cross-modal retrieval. In: Liu, Q., et al. (eds.) Proceedings of the 2017 ACM on Multimedia Conference, MM 2017, Mountain View, CA, USA, October 23\u201327, 2017, pp. 154\u2013162. ACM (2017)"},{"issue":"2","key":"15_CR20","doi-asserted-by":"publisher","first-page":"394","DOI":"10.1109\/TPAMI.2018.2797921","volume":"41","author":"L Wang","year":"2019","unstructured":"Wang, L., Li, Y., Huang, J., Lazebnik, S.: Learning two-branch neural networks for image-text matching tasks. IEEE Trans. Pattern Anal. Mach. Intell. 41(2), 394\u2013407 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"15_CR21","doi-asserted-by":"crossref","unstructured":"Wang, S., Wang, R., Yao, Z., Shan, S., Chen, X.: Cross-modal scene graph matching for relationship-aware image-text retrieval. In: IEEE Winter Conference on Applications of Computer Vision, WACV 2020, Snowmass Village, CO, USA, March 1\u20135, 2020, pp. 1497\u20131506. IEEE (2020)","DOI":"10.1109\/WACV45572.2020.9093614"},{"key":"15_CR22","unstructured":"Wang, Y., et al.: Position focused attention network for image-text matching. In: Kraus, S. (ed.) Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, IJCAI 2019, Macao, China, August 10\u201316, 2019, pp. 3792\u20133798. ijcai.org (2019)"},{"key":"15_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Lei, Z., Zhang, Z., Li, S.Z.: Context-aware attention network for image-text retrieval. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2020, Seattle, WA, USA, June 13\u201319, 2020, pp. 3533\u20133542. IEEE (2020)","DOI":"10.1109\/CVPR42600.2020.00359"},{"key":"15_CR24","doi-asserted-by":"crossref","unstructured":"Zhen, L., Hu, P., Wang, X., Peng, D.: Deep supervised cross-modal retrieval. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2019, Long Beach, CA, USA, June 16\u201320, 2019, pp. 10394\u201310403. Computer Vision Foundation\/IEEE (2019)","DOI":"10.1109\/CVPR.2019.01064"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-93046-2_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,18]],"date-time":"2022-06-18T08:04:47Z","timestamp":1655539487000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-93046-2_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030930455","9783030930462"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-93046-2_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"1 January 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CICAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CAAI International Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 June 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 June 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cicai2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/cicai.caai.cn\/#\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"307","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"105","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"34% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.2","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5.3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}