{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T03:23:39Z","timestamp":1740108219798,"version":"3.37.3"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2024,9,12]],"date-time":"2024-09-12T00:00:00Z","timestamp":1726099200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,12]],"date-time":"2024-09-12T00:00:00Z","timestamp":1726099200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2024,11]]},"DOI":"10.1007\/s00138-024-01597-1","type":"journal-article","created":{"date-parts":[[2024,9,12]],"date-time":"2024-09-12T18:03:35Z","timestamp":1726164215000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A novel key point based ROI segmentation and image captioning using guidance information"],"prefix":"10.1007","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0279-6850","authenticated-orcid":false,"given":"Jothi Lakshmi","family":"Selvakani","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bhuvaneshwari","family":"Ranganathan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Geetha","family":"Palanisamy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,12]]},"reference":[{"key":"1597_CR1","doi-asserted-by":"publisher","unstructured":"Althubiti, S., William, N., Janelle, M., Xiaohong, Y., Albert, E., Applying long short-term memory recurrent neural network for intrusion detection, IEEE, pp. 1\u20135 (2018). https:\/\/doi.org\/10.1109\/SECON.2018.8478898","DOI":"10.1109\/SECON.2018.8478898"},{"key":"1597_CR2","doi-asserted-by":"publisher","first-page":"404","DOI":"10.1007\/11744023_32","volume-title":"Computer vision \u2013 ECCV 2006: 9th European conference on computer vision, Graz, Austria, May 7-13, 2006. Proceedings, Part I","author":"H Bay","year":"2006","unstructured":"Bay, H., Tuytelaars, T., Van Gool, L.: Surf: Speeded up robust features. In: Leonardis, A., Bischof, H., Pinz, A. (eds.) Computer vision \u2013 ECCV 2006: 9th European conference on computer vision, Graz, Austria, May 7-13, 2006. Proceedings, Part I, pp. 404\u2013417. Springer, Berlin (2006). https:\/\/doi.org\/10.1007\/11744023_32"},{"key":"1597_CR3","doi-asserted-by":"publisher","unstructured":"Chen, L., Hanwang, Z., Jun, X., Liqiang, N., Jian, S., Wei, L., Tat, SC., Sca-cnn: Spatial and channel-wise attention in convolutional networks for image captioning. Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5659\u20135667 (2017). https:\/\/doi.org\/10.48550\/arXiv.1611.05594","DOI":"10.48550\/arXiv.1611.05594"},{"key":"1597_CR4","doi-asserted-by":"publisher","unstructured":"Farhadi, A., Mohsen, H., Mohammad, AS., Peter, Y., Cyrus, R., Julia, H., David, F., Every picture tells a story: Generating sentences from images. European Conference on Computer Vision, Springer, pp. 15\u201329 (2015). https:\/\/doi.org\/10.1007\/978-3-642-15561-1_2","DOI":"10.1007\/978-3-642-15561-1_2"},{"key":"1597_CR5","doi-asserted-by":"publisher","DOI":"10.1155\/2022\/6895833","author":"S Fekri-Ershad","year":"2022","unstructured":"Fekri-Ershad, S., Al-Imari, M.J., Hamad, M.H., Alsaffar, M.F., Hassan, F.G., Hadi, M.E., Mahdi, K.S.: Cell phenotype classification based on joint of texture information and multilayer feature extraction in densenet. Comput. Intell. Neurosci. (2022). https:\/\/doi.org\/10.1155\/2022\/6895833","journal-title":"Comput. Intell. Neurosci."},{"issue":"2","key":"1597_CR6","doi-asserted-by":"publisher","first-page":"210","DOI":"10.48550\/arXiv.1212.4522","volume":"106","author":"Y Gong","year":"2014","unstructured":"Gong, Y., Qifa, K., Michael, I., Svetlana, L.: A multi-view embedding space for modeling internet images, tags, and their semantics. Int. J. Comput. Vision 106(2), 210\u2013233 (2014). https:\/\/doi.org\/10.48550\/arXiv.1212.4522","journal-title":"Int. J. Comput. Vision"},{"key":"1597_CR7","doi-asserted-by":"publisher","unstructured":"Gupta, A., Prashanth, M., From image annotation to image description International conference on neural information processing. Springer, pp. 196\u2013204 (2012). https:\/\/doi.org\/10.1007\/978-3-642-34500-5_24","DOI":"10.1007\/978-3-642-34500-5_24"},{"key":"1597_CR8","doi-asserted-by":"publisher","unstructured":"Hashem, P., Naghsh-Nilchi, AR., Mohammadi, HM., Transformer-based local-global guidance for image captioning. Expert Systems With Applications, Volume 223, 2023, 119774, ISSN pp. 0957\u20134174, https:\/\/doi.org\/10.1016\/j.eswa.2023.119774.","DOI":"10.1016\/j.eswa.2023.119774"},{"key":"1597_CR9","doi-asserted-by":"publisher","first-page":"853","DOI":"10.1613\/jair.3994","volume":"47","author":"M Hodosh","year":"2013","unstructured":"Hodosh, M., Young, P., Hockenmaier, J.: Framing image description as a ranking task: Data, models and evaluation metrics. J. Artif. Intell. Res. 47, 853\u2013899 (2013). https:\/\/doi.org\/10.1613\/jair.3994","journal-title":"J. Artif. Intell. Res."},{"key":"1597_CR10","doi-asserted-by":"publisher","unstructured":"Jia, Xu., Efstratios, G., Basura, F., Tinne, T.: Guiding the long-short term memory model for image caption generation. In: Proceedings of the IEEE international conference on computer vision, pp. 2407\u20132415 (2015). https:\/\/doi.org\/10.48550\/arXiv.1509.04942","DOI":"10.48550\/arXiv.1509.04942"},{"key":"1597_CR11","doi-asserted-by":"publisher","unstructured":"Jin, J., Kun, F., Runpeng, C., Fei, S., Changshui, Z.: Aligning where to see and what to tell: image caption with region-based attention and scene factorization.In: IEEE Transactions on pattern analysis and machine intelligence, 39(12) pp.2321\u20132334 (2015). https:\/\/doi.org\/10.48550\/arXiv.1506.06272","DOI":"10.48550\/arXiv.1506.06272"},{"issue":"4","key":"1597_CR12","doi-asserted-by":"publisher","first-page":"664","DOI":"10.1109\/TPAMI.2016.2598339","volume":"39","author":"A Karpathy","year":"2014","unstructured":"Karpathy, A., Fei-Fei, Li.: Deep visual-semantic alignments for generating image descriptions. IEEE Trans. Pattern Anal. Mach. Intell. 39(4), 664\u2013676 (2014). https:\/\/doi.org\/10.1109\/TPAMI.2016.2598339","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1597_CR13","unstructured":"Kiros, R., Ruslan, S., Rich, Z., Multimodal neural language models. In: International conference on machine learning, PMLR, pp. 595\u2013603 (2014)."},{"key":"1597_CR14","doi-asserted-by":"publisher","unstructured":"Kulkarni, G., Visruth, P., Vicente, O., Sagnik, D., Siming, L., Yejin, C., Alexander, C., Berg, Tamara, L., Berg, B.: Understanding and generating simple image descriptions. In: IEEE transactions on pattern analysis and machine intelligence, 35(12): pp. 2891\u20132903 (2013). https:\/\/doi.org\/10.1109\/CVPR.2011.5995466","DOI":"10.1109\/CVPR.2011.5995466"},{"key":"1597_CR15","unstructured":"Kuznetsova, P., Vicente, O., Alexander, C., Berg, Tamara L., Berg, A., Yejin, C.: Generalizing Image Captions for Image-Text Parallel Corpus. ACL, pp. 790\u2013796 (2013)."},{"key":"1597_CR16","unstructured":"Li, S., Girish, K., Tamara, B., Alexander, B., Yejin, C., Composing simple image descriptions using web-scale n-grams. Proceedings of the Fifteenth conference on computational natural language learning, association for computational linguistics, pp. 220\u2013228 (2011)."},{"issue":"2","key":"1597_CR17","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D.G.: Distinctive image features from scale-invariant keypoints. Int. J. Comput. Vision 60(2), 91\u2013110 (2004). https:\/\/doi.org\/10.1023\/B:VISI.0000029664.99615.94","journal-title":"Int. J. Comput. Vision"},{"issue":"7","key":"1597_CR18","doi-asserted-by":"publisher","first-page":"4471","DOI":"10.1007\/s11831-021-09542-5","volume":"28","author":"R Mittal","year":"2021","unstructured":"Mittal, R., Arora, S., Bansal, V., Bhatia, M.P.S.: An extensive study on deep learning: techniques, applications. Arch. Comput. Method. Eng. 28(7), 4471\u20134485 (2021). https:\/\/doi.org\/10.1007\/s11831-021-09542-5","journal-title":"Arch. Comput. Method. Eng."},{"key":"1597_CR19","doi-asserted-by":"publisher","unstructured":"Papineni, K., Salim R, Todd, W., Wei-Jing, Z, Bleu: a method for automatic evaluation of machine translation. Proceedings of the 40th annual meeting of the association for computational linguistics, pp. 311\u2013318 (2002). https:\/\/doi.org\/10.3115\/1073083.1073135","DOI":"10.3115\/1073083.1073135"},{"key":"1597_CR20","doi-asserted-by":"publisher","unstructured":"Pedersoli, M., Thomas, L., Cordelia, S., Jakob, V.: Areas of attention for image captioning. Proceedings of the IEEE international conference on computer vision, pp. 1242\u20131250 (2017). https:\/\/doi.org\/10.48550\/arXiv.1612.01033","DOI":"10.48550\/arXiv.1612.01033"},{"issue":"8","key":"1597_CR21","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997). https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","journal-title":"Neural Comput."},{"issue":"6","key":"1597_CR22","doi-asserted-by":"publisher","first-page":"1029","DOI":"10.18280\/ts.370615","volume":"37","author":"F Shafiei","year":"2020","unstructured":"Shafiei, F., Ershad, S.F.: Detection of lung cancer tumor in ct scan images using novel combination of super pixel and active contour algorithms. Traitement du Signal 37(6), 1029\u20131035 (2020)","journal-title":"Traitement du Signal"},{"key":"1597_CR23","doi-asserted-by":"publisher","first-page":"110555","DOI":"10.1016\/j.patcog.2024.110555","volume":"153","author":"S Cao","year":"2024","unstructured":"Cao, S., An, G., Cen, Y., Yang, Z., Lin, W.: CAST: cross-modal retrieval and Visual conditioning for image captioning. Pattern Recognit. 153, 110555 (2024). https:\/\/doi.org\/10.1016\/j.patcog.2024.110555","journal-title":"Pattern Recognit."},{"key":"1597_CR24","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1162\/tacl_a_00177","volume":"2","author":"R Socher","year":"2014","unstructured":"Socher, R., Karpathy, A., Le, Q.V., Manning, C.D., Ng, A.Y.: Grounded compositional semantics for finding and describing images with sentences. Trans. Assoc. Comput. Linguist. 2, 207\u2013218 (2014)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"1597_CR25","doi-asserted-by":"publisher","unstructured":"Tan, Y H., Chee, S C., phi-LSTM: a phrase-based hierarchical LSTM model for image captioning. Asian conference on computer vision. Springer, pp. 101\u2013117 (2016). https:\/\/doi.org\/10.48550\/arXiv.1608.05813","DOI":"10.48550\/arXiv.1608.05813"},{"issue":"2s","key":"1597_CR26","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3115432","volume":"14","author":"C Wang","year":"2018","unstructured":"Wang, C., Yang, H., Meinel, C.: Image captioning with deep bidirectional LSTMs and multi-task learning. ACM Trans. Multimed. Comput. Commun. Appl. 14(2s), 1\u201320 (2018). https:\/\/doi.org\/10.1145\/3115432","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"1597_CR27","doi-asserted-by":"publisher","unstructured":"Wang, C., Haojin, Y., Christian, B., Christoph, M., Image Captioning with Deep Bidirectional LSTMs. Proceedings of the 24th ACM international conference on multimedia, MM \u201916. New York, NY, USA: Association for computing machinery, pp. 988\u2013997 (2016). https:\/\/doi.org\/10.48550\/arXiv.1604.00790","DOI":"10.48550\/arXiv.1604.00790"},{"key":"1597_CR28","doi-asserted-by":"publisher","first-page":"127823","DOI":"10.1016\/j.neucom.2024.127823","volume":"593","author":"Y Xiaobao","year":"2024","unstructured":"Xiaobao, Y., Yang, Y., Sugang, M., Zhijie, L., Wei, D., Marcin, W.: SAMT-generator: A second-attention for image captioning based on multi-stage transformer network. Neurocomputing 593, 127823 (2024). https:\/\/doi.org\/10.1016\/j.neucom.2024.127823","journal-title":"Neurocomputing"},{"key":"1597_CR29","doi-asserted-by":"publisher","unstructured":"Xu, K., Jimmy, B., Ryan, K., Kyunghyun, C., Aaron, C., Ruslan, S., Rich, Z., Yoshua, B., Show, attend and tell: Neural image caption generation with visual attention. International conference on machine learning, PMLR, pp. 2048\u20132057 (2015). https:\/\/doi.org\/10.48550\/arXiv.1502.03044","DOI":"10.48550\/arXiv.1502.03044"},{"key":"1597_CR30","unstructured":"Yang, Y., Ching, T., Hal III, D., Yiannis, A.: Corpus-guided sentence generation of natural images. Proceedings of the 2011 conference on empirical methods in natural language processing, pp. 444\u2013454 (2011)."},{"issue":"12","key":"1597_CR31","doi-asserted-by":"publisher","first-page":"4467","DOI":"10.1109\/TCSVT.2019.2947482","volume":"30","author":"J Yu","year":"2019","unstructured":"Yu, J., Jing, L., Zhou, Y., Qingming, H.: Multimodal transformer with multi-view visual representation for image captioning. IEEE trans. circuits syst. video technol. 30(12), 4467\u20134480 (2019)","journal-title":"IEEE trans. circuits syst. video technol."},{"key":"1597_CR32","doi-asserted-by":"publisher","first-page":"115462","DOI":"10.1016\/j.eswa.2021.115462","volume":"184","author":"J Zhang","year":"2021","unstructured":"Zhang, J., Kangkang, L., Zhenkun, W., Xianwen, Z., Zhe, W.: Visual enhanced gLSTM for image captioning. Expert Syst. Appl. 184, 115462 (2021)","journal-title":"Expert Syst. Appl."}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01597-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-024-01597-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01597-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,8]],"date-time":"2024-11-08T03:04:46Z","timestamp":1731035086000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-024-01597-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,12]]},"references-count":32,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,11]]}},"alternative-id":["1597"],"URL":"https:\/\/doi.org\/10.1007\/s00138-024-01597-1","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"type":"print","value":"0932-8092"},{"type":"electronic","value":"1432-1769"}],"subject":[],"published":{"date-parts":[[2024,9,12]]},"assertion":[{"value":"27 January 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 June 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 August 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 September 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"Informed consent was obtained from all individual participants included in the studies.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed Consent"}}],"article-number":"127"}}