{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T19:19:30Z","timestamp":1742930370857,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":28,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819794362"},{"type":"electronic","value":"9789819794379"}],"license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-9437-9_35","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T16:32:00Z","timestamp":1730392320000},"page":"446-457","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Graph Interpretation of\u00a0Image-Text Matching: Link Prediction on\u00a0Concept-Enhanced Cross-Modal Graph"],"prefix":"10.1007","author":[{"given":"Zhihao","family":"Fan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zejun","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Siyuan","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhongyu","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haijun","family":"Shan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,1]]},"reference":[{"key":"35_CR1","doi-asserted-by":"crossref","unstructured":"Anderson, P., Fernando, B., Johnson, M., Gould, S.: Spice: semantic propositional image caption evaluation. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"35_CR2","doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: Vqa: visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2425\u20132433 (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"35_CR3","unstructured":"Bischoff, S.: Feature learning for meta-paths in knowledge graphs. arXiv:1809.03267 (2018)"},{"key":"35_CR4","doi-asserted-by":"crossref","unstructured":"Chen, H., Ding, G., Liu, X., Lin, Z., Liu, J., Han, J.: Imram: iterative matching with recurrent attention memory for cross-modal image-text retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12655\u201312663 (2020)","DOI":"10.1109\/CVPR42600.2020.01267"},{"key":"35_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Y.C., Li, L., Yu, L., et\u00a0al.: Uniter: universal image-text representation learning. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"35_CR6","unstructured":"Devlin, J., Chang, M.W., et\u00a0al.: Bert: pre-training of deep bidirectional transformers for language understanding. In: NAACL (2019)"},{"key":"35_CR7","unstructured":"Frome, A., et al.: Devise: a deep visual-semantic embedding model (2013)"},{"key":"35_CR8","doi-asserted-by":"crossref","unstructured":"Gao, T., Yao, X., Chen, D.: Simcse: Simple contrastive learning of sentence embeddings. arXiv:2104.08821 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"35_CR9","doi-asserted-by":"crossref","unstructured":"Goyal, Y., Khot, T., Summers-Stay, D., Batra, D., Parikh, D.: Making the v in vqa matter: elevating the role of image understanding in visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6904\u20136913 (2017)","DOI":"10.1109\/CVPR.2017.670"},{"key":"35_CR10","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Fei-Fei, L.: Deep visual-semantic alignments for generating image descriptions. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"35_CR11","doi-asserted-by":"crossref","unstructured":"Li, G., Duan, N., Fang, Y., et\u00a0al.: Unicoder-vl: a universal encoder for vision and language by cross-modal pre-training. In: AAAI (2020)","DOI":"10.1609\/aaai.v34i07.6795"},{"key":"35_CR12","doi-asserted-by":"crossref","unstructured":"Li, Z., Fan, Z., Tou, H., Chen, J., Wei, Z., Huang, X.: Mvptr: multi-level semantic alignment for vision-language pre-training via multi-stage learning. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 4395\u20134405 (2022)","DOI":"10.1145\/3503161.3548341"},{"key":"35_CR13","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., et al.: Microsoft COCO: Common objects in context. In: ECCV (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"35_CR14","doi-asserted-by":"crossref","unstructured":"Liu, C., Mao, Z., et\u00a0al.: Focus your attention: a bidirectional focal attention network for image-text matching. In: ACMMM (2019)","DOI":"10.1145\/3343031.3350869"},{"key":"35_CR15","doi-asserted-by":"crossref","unstructured":"Plummer, B.A., Wang, L., Cervantes, C.M., et\u00a0al.: Flickr30k entities: collecting region-to-phrase correspondences for richer image-to-sentence models. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.303"},{"key":"35_CR16","unstructured":"Radford, A., Kim, J.W., et\u00a0al.: Learning transferable visual models from natural language supervision. arXiv:2103.00020 (2021)"},{"key":"35_CR17","doi-asserted-by":"crossref","unstructured":"Shaw, P., Uszkoreit, J., Vaswani, A.: Self-attention with relative position representations. arXiv:1803.02155 (2018)","DOI":"10.18653\/v1\/N18-2074"},{"key":"35_CR18","doi-asserted-by":"crossref","unstructured":"Sun, S., Chen, Y.C., Li, L., Wang, S., Fang, Y., Liu, J.: Lightningdot: pre-training visual-semantic embeddings for real-time image-text retrieval. arXiv:2103.08784 (2021)","DOI":"10.18653\/v1\/2021.naacl-main.77"},{"key":"35_CR19","doi-asserted-by":"crossref","unstructured":"Sun, Y., Barber, R., Gupta, M., Aggarwal, C.C., Han, J.: Co-author relationship prediction in heterogeneous bibliographic networks. In: ASONAM (2011)","DOI":"10.1109\/ASONAM.2011.112"},{"key":"35_CR20","unstructured":"Veli\u010dkovi\u0107, P., Cucurull, G., Casanova, A., Romero, A., Lio, P., Bengio, Y.: Graph attention networks. arXiv:1710.10903 (2017)"},{"key":"35_CR21","doi-asserted-by":"crossref","unstructured":"Wang, S., Wang, R., Yao, Z., Shan, S., Chen, X.: Cross-modal scene graph matching for relationship-aware image-text retrieval. In: WCAV (2020)","DOI":"10.1109\/WACV45572.2020.9093614"},{"key":"35_CR22","doi-asserted-by":"crossref","unstructured":"Yang, J., et al.: Vision-language pre-training with triple contrastive learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15671\u201315680 (2022)","DOI":"10.1109\/CVPR52688.2022.01522"},{"issue":"3","key":"35_CR23","doi-asserted-by":"publisher","first-page":"751","DOI":"10.1007\/s10115-014-0789-0","volume":"45","author":"Y Yang","year":"2015","unstructured":"Yang, Y., Lichtenwalter, R.N., Chawla, N.V.: Evaluating link prediction methods. Knowl. Inf. Syst. 45(3), 751\u2013782 (2015)","journal-title":"Knowl. Inf. Syst."},{"key":"35_CR24","doi-asserted-by":"crossref","unstructured":"Young, P., Lai, A., et\u00a0al.: From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions. T-ACL (2014)","DOI":"10.1162\/tacl_a_00166"},{"key":"35_CR25","unstructured":"Yu, F., Tang, J., et\u00a0al.: Ernie-vil: knowledge enhanced vision-language representations through scene graph. arXiv:2006.16934 (2020)"},{"key":"35_CR26","doi-asserted-by":"crossref","unstructured":"Yu, L., Poirson, P., Yang, S., Berg, A.C., Berg, T.L.: Modeling context in referring expressions. In: European Conference on Computer Vision, pp. 69\u201385. Springer (2016)","DOI":"10.1007\/978-3-319-46475-6_5"},{"key":"35_CR27","doi-asserted-by":"crossref","unstructured":"Zhang, P., Li, X., Hu, X., et\u00a0al.: Vinvl: revisiting visual representations in vision-language models. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00553"},{"key":"35_CR28","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Lei, Z., Zhang, Z., Li, S.Z.: Context-aware attention network for image-text retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3536\u20133545 (2020)","DOI":"10.1109\/CVPR42600.2020.00359"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-9437-9_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T16:33:30Z","timestamp":1730392410000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-9437-9_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,1]]},"ISBN":["9789819794362","9789819794379"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-9437-9_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,1]]},"assertion":[{"value":"1 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2024\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}