{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,10]],"date-time":"2025-11-10T17:04:36Z","timestamp":1762794276894,"version":"build-2065373602"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1007\/s00138-025-01757-x","type":"journal-article","created":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T15:52:35Z","timestamp":1761148355000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Entity-aware multi-image captioning by common context text selection and joint entity prompting"],"prefix":"10.1007","volume":"36","author":[{"given":"Shichao","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingqiang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,22]]},"reference":[{"issue":"4","key":"1757_CR1","doi-asserted-by":"publisher","first-page":"664","DOI":"10.1109\/TPAMI.2016.2598339","volume":"39","author":"A Karpathy","year":"2017","unstructured":"Karpathy, A., Fei-Fei, L.: Deep visual-semantic alignments for generating image descriptions. IEEE Trans. Pattern Anal. Mach. Intell. 39(4), 664\u2013676 (2017). https:\/\/doi.org\/10.1109\/TPAMI.2016.2598339","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1757_CR2","doi-asserted-by":"publisher","unstructured":"Biten, A.F., G\u00f3mez, L., Rusi\u00f1ol, M., et al.: Good news, everyone! context driven entity-aware captioning for news images. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2019, Long Beach, CA, USA, June 16-20, 2019. Computer Vision Foundation \/ IEEE, pp 12466\u201312475, https:\/\/doi.org\/10.1109\/CVPR.2019.01275, http:\/\/openaccess.thecvf.com\/content_CVPR_2019\/html\/Biten_Good_News_Everyone_Context_Driven_Entity-Aware_Captioning_for_News_Images_CVPR_2019_paper.html (2019)","DOI":"10.1109\/CVPR.2019.01275"},{"key":"1757_CR3","doi-asserted-by":"publisher","unstructured":"Tran, A., Mathews, A.P., Xie, L.: Transform and tell: entity-aware news image captioning. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2020, Seattle, WA, USA, June 13-19, 2020. Computer Vision Foundation \/ IEEE, pp 13032\u201313042, https:\/\/doi.org\/10.1109\/CVPR42600.2020.01305, https:\/\/openaccess.thecvf.com\/content_CVPR_2020\/html\/Tran_Transform_and_Tell_Entity-Aware_News_Image_Captioning_CVPR_2020_paper.html (2020)","DOI":"10.1109\/CVPR42600.2020.01305"},{"key":"1757_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/J.CVIU.2023.103878","volume":"238","author":"J Chen","year":"2024","unstructured":"Chen, J.: Transform, contrast and tell: coherent entity-aware multi-image captioning. Comput. Vis. Image Underst. 238, 103878 (2024). https:\/\/doi.org\/10.1016\/J.CVIU.2023.103878","journal-title":"Comput. Vis. Image Underst."},{"key":"1757_CR5","doi-asserted-by":"publisher","unstructured":"Qu, T., Tuytelaars, T., Moens, M.: Visually-aware context modeling for news image captioning. In: Duh K, G\u00f3mez-Adorno H, Bethard S (eds) Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), NAACL 2024, Mexico City, Mexico, June 16-21, 2024. Association for Computational Linguistics, pp 2927\u20132943, https:\/\/doi.org\/10.18653\/V1\/2024.NAACL-LONG.162, (2024)","DOI":"10.18653\/V1\/2024.NAACL-LONG.162"},{"key":"1757_CR6","doi-asserted-by":"publisher","unstructured":"Zhang, J., Fang, S., Mao, Z., et al.: Fine-tuning with multi-modal entity prompts for news image captioning. In: Magalh\u00e3es J, Bimbo AD, Satoh S, et\u00a0al (eds) MM \u201922: The 30th ACM International Conference on Multimedia, Lisboa, Portugal, October 10 - 14, 2022. ACM, pp 4365\u20134373, https:\/\/doi.org\/10.1145\/3503161.3547883, (2022)","DOI":"10.1145\/3503161.3547883"},{"key":"1757_CR7","unstructured":"Radford, A., Kim, J.W., Hallacy, C., et\u00a0al.: Learning transferable visual models from natural language supervision. In: Meila M, Zhang T (eds) Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18-24 July 2021, Virtual Event, Proceedings of Machine Learning Research, vol 139. PMLR, pp 8748\u20138763, http:\/\/proceedings.mlr.press\/v139\/radford21a.html, (2021)"},{"issue":"1","key":"1757_CR8","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1109\/TNN.2008.2005605","volume":"20","author":"F Scarselli","year":"2009","unstructured":"Scarselli, F., Gori, M., Tsoi, A.C., et al.: The graph neural network model. IEEE Trans. Neural Netw. 20(1), 61\u201380 (2009). https:\/\/doi.org\/10.1109\/TNN.2008.2005605","journal-title":"IEEE Trans. Neural Netw."},{"key":"1757_CR9","doi-asserted-by":"publisher","unstructured":"Sun, C., Qiu, X., Xu, Y., et\u00a0al.: How to fine-tune BERT for text classification? In: Sun M, Huang X, Ji H, et\u00a0al (eds) Chinese Computational Linguistics - 18th China National Conference, CCL 2019, Kunming, China, October 18-20, 2019, Proceedings, Lecture Notes in Computer Science, vol 11856. Springer, pp 194\u2013206, https:\/\/doi.org\/10.1007\/978-3-030-32381-3_16, (2019)","DOI":"10.1007\/978-3-030-32381-3_16"},{"key":"1757_CR10","doi-asserted-by":"publisher","unstructured":"Reynolds, L., McDonell, K.: Prompt programming for large language models: beyond the few-shot paradigm. In: Kitamura Y, Quigley A, Isbister K, et\u00a0al (eds) CHI \u201921: CHI Conference on Human Factors in Computing Systems, Virtual Event \/ Yokohama Japan, May 8-13, 2021, Extended Abstracts. ACM, pp 314:1\u2013314:7, https:\/\/doi.org\/10.1145\/3411763.3451760, (2021)","DOI":"10.1145\/3411763.3451760"},{"key":"1757_CR11","doi-asserted-by":"publisher","unstructured":"Lewis, M., Liu, Y., Goyal, N., et\u00a0al.: BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Jurafsky D, Chai J, Schluter N, et\u00a0al (eds) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, ACL 2020, Online, July 5-10, 2020. Association for Computational Linguistics, pp 7871\u20137880, https:\/\/doi.org\/10.18653\/V1\/2020.ACL-MAIN.703, (2020)","DOI":"10.18653\/V1\/2020.ACL-MAIN.703"},{"key":"1757_CR12","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., et\u00a0al.: Attention is all you need. In: Guyon I, von Luxburg U, Bengio S, et\u00a0al (eds) Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, USA, pp 5998\u20136008, https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html (2017)"},{"issue":"1","key":"1757_CR13","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1109\/MMUL.2024.3363429","volume":"31","author":"S Ajankar","year":"2024","unstructured":"Ajankar, S., Dutta, T.: Image-relevant entities knowledge-aware news image captioning. IEEE Multim 31(1), 88\u201398 (2024). https:\/\/doi.org\/10.1109\/MMUL.2024.3363429","journal-title":"IEEE Multim"},{"key":"1757_CR14","doi-asserted-by":"publisher","unstructured":"Yang, X., Wu, X., Xu, T.: DRSGN: dual revised semantic graph structured network for image-text matching. In: 7th IEEE International Conference on Cloud Computing and Intelligent Systems, CCIS 2021, Xi\u2019an, China, November 7-8, 2021. IEEE, pp 230\u2013242, https:\/\/doi.org\/10.1109\/CCIS53392.2021.9754625, (2021)","DOI":"10.1109\/CCIS53392.2021.9754625"},{"key":"1757_CR15","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M., Lee, K., et\u00a0al.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Burstein J, Doran C, Solorio T (eds) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2-7, 2019, Volume 1 (Long and Short Papers). Association for Computational Linguistics, pp 4171\u20134186, https:\/\/doi.org\/10.18653\/V1\/N19-1423, (2019)","DOI":"10.18653\/V1\/N19-1423"},{"key":"1757_CR16","unstructured":"Radford, A.: Improving language understanding by generative pre-training. arXiv preprint (2018)"},{"key":"1757_CR17","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. In: 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021. OpenReview.net, https:\/\/openreview.net\/forum?id=YicbFdNTTy (2021)"},{"issue":"11","key":"1757_CR18","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., et al.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998). https:\/\/doi.org\/10.1109\/5.726791","journal-title":"Proc. IEEE"},{"issue":"10","key":"1757_CR19","first-page":"1995","volume":"3361","author":"Y LeCun","year":"1995","unstructured":"LeCun, Y., Bengio, Y., et al.: Convolutional networks for images, speech, and time series. The handbook of brain theory and neural networks 3361(10), 1995 (1995)","journal-title":"The handbook of brain theory and neural networks"},{"key":"1757_CR20","doi-asserted-by":"publisher","unstructured":"Zhou, M., Luo, G., Rohrbach, A., et\u00a0al.: Focus! relevant and sufficient context selection for news image captioning. In: Goldberg Y, Kozareva Z, Zhang Y (eds) Findings of the Association for Computational Linguistics: EMNLP 2022, Abu Dhabi, United Arab Emirates, December 7-11, 2022. Association for Computational Linguistics, pp 6078\u20136088, https:\/\/doi.org\/10.18653\/V1\/2022.FINDINGS-EMNLP.450, (2022)","DOI":"10.18653\/V1\/2022.FINDINGS-EMNLP.450"},{"key":"1757_CR21","doi-asserted-by":"publisher","unstructured":"Xu, N., Gao, Y., Zhang, T., et\u00a0al.: Cross-modal coherence-enhanced feedback prompting for news captioning. In: Cai J, Kankanhalli MS, Prabhakaran B, et\u00a0al (eds) Proceedings of the 32nd ACM International Conference on Multimedia, MM 2024, Melbourne, VIC, Australia, 28 October 2024 - 1 November 2024. ACM, pp 9369\u20139377, https:\/\/doi.org\/10.1145\/3664647.3681497, (2024)","DOI":"10.1145\/3664647.3681497"},{"key":"1757_CR22","unstructured":"Velickovic, P., Cucurull, G., Casanova, A., et\u00a0al.: Graph attention networks. In: 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings. OpenReview.net, https:\/\/openreview.net\/forum?id=rJXMpikCZ (2018)"},{"key":"1757_CR23","unstructured":"Brody, S., Alon, U., Yahav, E.: How attentive are graph attention networks? In: The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25-29, 2022. OpenReview.net, https:\/\/openreview.net\/forum?id=F72ximsx7C1 (2022)"},{"key":"1757_CR24","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. In: 5th International Conference on Learning Representations, ICLR 2017, Toulon, France, April 24-26, 2017, Conference Track Proceedings. OpenReview.net, https:\/\/openreview.net\/forum?id=SJU4ayYgl (2017)"},{"key":"1757_CR25","unstructured":"Chen, M., Wei, Z., Huang, Z., et\u00a0al.: Simple and deep graph convolutional networks. In: Proceedings of the 37th International Conference on Machine Learning, ICML 2020, 13-18 July 2020, Virtual Event, Proceedings of Machine Learning Research, vol 119. PMLR, pp 1725\u20131735, http:\/\/proceedings.mlr.press\/v119\/chen20v.html (2020)"},{"key":"1757_CR26","doi-asserted-by":"publisher","unstructured":"Papineni, K., Roukos, S., Ward, T., et\u00a0al.: Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, July 6-12, 2002, Philadelphia, PA, USA. ACL, pp 311\u2013318, https:\/\/doi.org\/10.3115\/1073083.1073135, https:\/\/aclanthology.org\/P02-1040\/ (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"1757_CR27","doi-asserted-by":"publisher","unstructured":"Denkowski, M.J., Lavie, A.: Meteor universal: language specific translation evaluation for any target language. In: Proceedings of the Ninth Workshop on Statistical Machine Translation, WMT@ACL 2014, June 26-27, 2014, Baltimore, Maryland, USA. The Association for Computer Linguistics, pp 376\u2013380, https:\/\/doi.org\/10.3115\/V1\/W14-3348, (2014)","DOI":"10.3115\/V1\/W14-3348"},{"key":"1757_CR28","unstructured":"Lin, C.Y.: Rouge: a package for automatic evaluation of summaries. In: Text summarization branches out, pp 74\u201381 (2004)"},{"key":"1757_CR29","doi-asserted-by":"publisher","unstructured":"Vedantam, R., Zitnick, C.L., Parikh, D.: Cider: consensus-based image description evaluation. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2015, Boston, MA, USA, June 7-12, 2015. IEEE Computer Society, pp 4566\u20134575, https:\/\/doi.org\/10.1109\/CVPR.2015.7299087, (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"1757_CR30","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019. OpenReview.net, https:\/\/openreview.net\/forum?id=Bkg6RiCqY7 (2019)"},{"key":"1757_CR31","unstructured":"Hu, A., Chen, S., Jin, Q.: ICECAP: information concentrated entity-aware image captioning. CoRR arxiv:abs\/2108.02050 (2021)"},{"key":"1757_CR32","doi-asserted-by":"publisher","first-page":"2659","DOI":"10.1109\/TMM.2023.3301279","volume":"26","author":"W Zhao","year":"2024","unstructured":"Zhao, W., Wu, X.: Boosting entity-aware image captioning with multi-modal knowledge graph. IEEE Trans Multim 26, 2659\u20132670 (2024). https:\/\/doi.org\/10.1109\/TMM.2023.3301279","journal-title":"IEEE Trans Multim"},{"key":"1757_CR33","doi-asserted-by":"publisher","unstructured":"Yang, X., Karaman, S., Tetreault, J.R., et\u00a0al.: Journalistic guidelines aware news image captioning. In: Moens M, Huang X, Specia L, et\u00a0al (eds) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, EMNLP 2021, Virtual Event \/ Punta Cana, Dominican Republic, 7-11 November, 2021. Association for Computational Linguistics, pp 5162\u20135175, https:\/\/doi.org\/10.18653\/V1\/2021.EMNLP-MAIN.419, (2021)","DOI":"10.18653\/V1\/2021.EMNLP-MAIN.419"},{"key":"1757_CR34","doi-asserted-by":"publisher","unstructured":"Kalarani, A.R., Bhattacharyya, P., Chhaya, N., et\u00a0al.: \"let\u2019s not quote out of context\": Unified vision-language pretraining for context assisted image captioning. In: Sitaram S, Klebanov BB, Williams JD (eds) Proceedings of the The 61st Annual Meeting of the Association for Computational Linguistics: Industry Track, ACL 2023, Toronto, Canada, July 9-14, 2023. Association for Computational Linguistics, pp 695\u2013706, https:\/\/doi.org\/10.18653\/V1\/2023.ACL-INDUSTRY.67, (2023)","DOI":"10.18653\/V1\/2023.ACL-INDUSTRY.67"},{"key":"1757_CR35","unstructured":"Xu, K., Ba, J., Kiros, R., et\u00a0al.: Show, attend and tell: neural image caption generation with visual attention. In: Bach FR, Blei DM (eds) Proceedings of the 32nd International Conference on Machine Learning, ICML 2015, Lille, France, 6-11 July 2015, JMLR Workshop and Conference Proceedings, vol\u00a037. JMLR.org, pp 2048\u20132057, http:\/\/proceedings.mlr.press\/v37\/xuc15.html (2015)"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-025-01757-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-025-01757-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-025-01757-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,10]],"date-time":"2025-11-10T17:00:41Z","timestamp":1762794041000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-025-01757-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":35,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,11]]}},"alternative-id":["1757"],"URL":"https:\/\/doi.org\/10.1007\/s00138-025-01757-x","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"type":"print","value":"0932-8092"},{"type":"electronic","value":"1432-1769"}],"subject":[],"published":{"date-parts":[[2025,10,22]]},"assertion":[{"value":"22 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 September 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 October 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 October 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"134"}}