{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,9]],"date-time":"2025-05-09T05:40:10Z","timestamp":1746769210890,"version":"3.40.5"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031876622","type":"print"},{"value":"9783031876639","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-87663-9_9","type":"book-chapter","created":{"date-parts":[[2025,5,9]],"date-time":"2025-05-09T05:13:24Z","timestamp":1746767604000},"page":"108-118","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Image Caption Extending Using LLM and Style Transfer"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0735-7697","authenticated-orcid":false,"given":"Nikita","family":"Andriyanov","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4880-0432","authenticated-orcid":false,"given":"Vitaly","family":"Dementiev","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,10]]},"reference":[{"key":"9_CR1","doi-asserted-by":"publisher","first-page":"489","DOI":"10.1134\/S1054661822030026","volume":"32","author":"NA Andriyanov","year":"2022","unstructured":"Andriyanov, N.A.: Combining text and image analysis methods for solving multimodal classification problems. Pattern Recognit Image Anal. 32, 489\u2013494 (2022). https:\/\/doi.org\/10.1134\/S1054661822030026","journal-title":"Pattern Recognit Image Anal."},{"key":"9_CR2","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1007\/978-3-031-37742-6_31","volume":"13644","author":"NA Andriyanov","year":"2023","unstructured":"Andriyanov, N.A.: Multimodal data processing based on text classifiers and image recognition. Lect. Notes Comput. Sci. 13644, 414\u2013423 (2023). https:\/\/doi.org\/10.1007\/978-3-031-37742-6_31","journal-title":"Lect. Notes Comput. Sci."},{"key":"9_CR3","doi-asserted-by":"publisher","first-page":"2908","DOI":"10.3390\/math11132908","volume":"11","author":"E Jeong","year":"2023","unstructured":"Jeong, E., Kim, G., Kang, S.: Multimodal prompt learning in emotion recognition using context and audio information. Mathematics 11, 2908 (2023). https:\/\/doi.org\/10.3390\/math11132908","journal-title":"Mathematics"},{"key":"9_CR4","doi-asserted-by":"publisher","first-page":"1976","DOI":"10.3390\/buildings13081976","volume":"13","author":"X Nong","year":"2023","unstructured":"Nong, X., Luo, X., Lin, S., Ruan, Y., Ye, X.: Multimodal deep neural network-based sensor data anomaly diagnosis method for structural health monitoring. Buildings 13, 1976 (2023). https:\/\/doi.org\/10.3390\/buildings13081976","journal-title":"Buildings"},{"key":"9_CR5","doi-asserted-by":"publisher","first-page":"727","DOI":"10.3390\/electronics12030727","volume":"12","author":"NA Andriyanov","year":"2023","unstructured":"Andriyanov, N.A.: Development of apple detection system and reinforcement learning for apple manipulator. Electronics 12, 727 (2023). https:\/\/doi.org\/10.3390\/electronics12030727","journal-title":"Electronics"},{"key":"9_CR6","doi-asserted-by":"publisher","unstructured":"Naik, T., Gostu, H., Sharma, R.: Navigating ethics of ai-powered creativity in midjourney. In: 3rd International Conference for Innovation in Technology (INOCON) Proceedings, pp. 1\u20136 (2024). https:\/\/doi.org\/10.1109\/INOCON60754.2024.10511571","DOI":"10.1109\/INOCON60754.2024.10511571"},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., Zitnick, C.: Mind\u2019s eye: a recurrent visual representation for image caption generation. In: Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, pp. 2422\u20132431 (2015)","DOI":"10.1109\/CVPR.2015.7298856"},{"key":"9_CR8","first-page":"438","volume":"2258","author":"KP Korshunova","year":"2018","unstructured":"Korshunova, K.P.: The neural network image captioning model based on adversarial training. Ceur Workshop Proc. 2258, 438\u2013444 (2018)","journal-title":"Ceur Workshop Proc."},{"key":"9_CR9","doi-asserted-by":"publisher","unstructured":"Sabri, M.A., Madhoune, H.E., Zouitni, C., Aarab, A.: image captioning: an understanding study. In: Lecture Notes in Networks and Systems, vol. 669, pp. 482\u2013491. https:\/\/doi.org\/10.1007\/978-3-031-29860-8_49","DOI":"10.1007\/978-3-031-29860-8_49"},{"key":"9_CR10","doi-asserted-by":"publisher","first-page":"13619","DOI":"10.1007\/s10462-023-10488-2","volume":"56","author":"H Sharma","year":"2023","unstructured":"Sharma, H., Padha, D.: A comprehensive survey on image captioning: from handcrafted to deep learning-based techniques, a taxonomy and open research issues. Artif. Intell. Rev. 56, 13619\u201313661 (2023). https:\/\/doi.org\/10.1007\/s10462-023-10488-2","journal-title":"Artif. Intell. Rev."},{"key":"9_CR11","doi-asserted-by":"publisher","first-page":"2255","DOI":"10.3390\/electronics13122255","volume":"13","author":"M Kochanek","year":"2024","unstructured":"Kochanek, M., et al.: Improving training dataset balance with ChatGPT prompt engineering. Electronics 13, 2255 (2024). https:\/\/doi.org\/10.3390\/electronics13122255","journal-title":"Electronics"},{"key":"9_CR12","doi-asserted-by":"publisher","first-page":"1960","DOI":"10.3390\/electronics13101960","volume":"13","author":"M Morales-Chan","year":"2024","unstructured":"Morales-Chan, M., Amado-Salvatierra, H.R., Medina, J.A., Barchino, R., Hern\u00e1ndez-Rizzardini, R., Teixeira, A.M.: Personalized feedback in massive open online courses: harnessing the power of LangChain and OpenAI API. Electronics 13, 1960 (2024). https:\/\/doi.org\/10.3390\/electronics13101960","journal-title":"Electronics"},{"key":"9_CR13","unstructured":"GCC dataset. https:\/\/ai.google.com\/research\/ConceptualCaptions\/. Accessed 12 Nov 2024"},{"key":"9_CR14","unstructured":"Flicker30k dataset. https:\/\/www.kaggle.com\/datasets\/hsankesara\/flickr-image-dataset. Accessed 09 Nov 2024"},{"key":"9_CR15","unstructured":"MS COCO dataset. https:\/\/www.kaggle.com\/datasets\/nikhil7280\/coco-image-caption. Accessed 14 Nov 2024"},{"key":"9_CR16","doi-asserted-by":"publisher","first-page":"56","DOI":"10.31676\/0235-2591-2023-5-56-63","volume":"5","author":"AI Kutyrev","year":"2023","unstructured":"Kutyrev, A.I., Smirnov, I.G., Andriyanov, N.A.: Neural network models of apple fruit identification in tree crowns: comparative analysis. Horticult. Viticult. 5, 56\u201363 (2023). https:\/\/doi.org\/10.31676\/0235-2591-2023-5-56-63","journal-title":"Horticult. Viticult."},{"key":"9_CR17","doi-asserted-by":"publisher","unstructured":"Varghese, R.: YOLOv8: a novel object detection algorithm with enhanced performance and robustness. In Proceedings of 2024 International Conference on Advances in Data Engineering and Intelligent Computing Systems (ADICS), pp. 1\u20136 (2024). https:\/\/doi.org\/10.1109\/ADICS58448.2024.10533619","DOI":"10.1109\/ADICS58448.2024.10533619"},{"key":"9_CR18","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. arXiv preprints, pp. 1\u201312 (2015). https:\/\/arxiv.org\/abs\/1512.03385. Accessed 13 Nov 2024"},{"key":"9_CR19","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprints, pp. 1\u201316 (2018). https:\/\/arxiv.org\/abs\/1810.04805. Accessed 13 Nov 2024"},{"key":"9_CR20","doi-asserted-by":"publisher","unstructured":"Jain, S.M.: BERT. In: Introduction to Transformers for NLP. Apress, Berkeley, CA (2022). https:\/\/doi.org\/10.1007\/978-1-4842-8844-3_3","DOI":"10.1007\/978-1-4842-8844-3_3"},{"key":"9_CR21","unstructured":"Kishore, P.: BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting on Association for Computational Linguistics. Association for Computational Linguistics, pp. 44\u201355 (2002)"},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Lawrence, Z.C., Parikh, D.: Cider: consensus-based image description evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4566\u20134575 (2015)","DOI":"10.1109\/CVPR.2015.7299087"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition. ICPR 2024 International Workshops and Challenges"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-87663-9_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,9]],"date-time":"2025-05-09T05:13:31Z","timestamp":1746767611000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-87663-9_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031876622","9783031876639"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-87663-9_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"10 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}