{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T20:34:27Z","timestamp":1769546067716,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":10,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,7]]},"DOI":"10.1145\/3779153.3779163","type":"proceedings-article","created":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T07:35:51Z","timestamp":1769499351000},"page":"63-69","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Image Captioning Method Based on a CNN-Transformer Hybrid Architecture with Attention Mechanism"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-8740-9777","authenticated-orcid":false,"given":"Xin","family":"He","sequence":"first","affiliation":[{"name":"School of Artificial Intelligence, Chongqing Technology and Business University, Chongqing, Chongqing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8258-1908","authenticated-orcid":false,"given":"Yang","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Chongqing Technology and Business University, Chongqing, Chongqing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0380-2999","authenticated-orcid":false,"given":"Jiajie","family":"Xie","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Chongqing Technology and Business University, Chongqing, Chongqing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,1,26]]},"reference":[{"key":"e_1_3_3_1_1_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_3_1_3_2","first-page":"2057","volume-title":"In\u00a0International conference on machine learning","author":"Xu Kelvin","unstructured":"Kelvin Xu, Jimmy Ba, Ryan Kiros, Kyunghyun Cho, Aaron Courville, Ruslan Salakhutdinov, Richard S Zemel, and Yoshua Bengio. Show, attend and tell: Neural image caption generation with visual attention. In\u00a0International conference on machine learning, pages 2048\u20132057. PMLR, 2015."},{"key":"e_1_3_3_1_4_2","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale.\u00a0arXiv preprint arXiv:2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, et al. An image is worth 16x16 words: Transformers for image recognition at scale.\u00a0arXiv preprint arXiv:2010.11929, 2020."},{"key":"e_1_3_3_1_5_2","volume-title":"In\u00a0OpenAI","author":"Radford Alec","year":"2018","unstructured":"Alec Radford, Karthik Narasimhan, Tim Salimans, and Ilya Sutskever. Improving language understanding by generative pre-training. In\u00a0OpenAI, 2018."},{"key":"e_1_3_3_1_6_2","first-page":"755","volume-title":"In\u00a0European conference on computer vision","author":"Lin Tsung-Yi","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C Lawrence Zitnick. Microsoft coco: Common objects in context. In\u00a0European conference on computer vision, pages 740\u2013755. Springer, 2014."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"e_1_3_3_1_8_2","first-page":"318","volume-title":"Bleu: a method for automatic evaluation of machine translation. In\u00a0Proceedings of the 40th annual meeting of the Association for Computational Linguistics","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. Bleu: a method for automatic evaluation of machine translation. In\u00a0Proceedings of the 40th annual meeting of the Association for Computational Linguistics, pages 311\u2013318, 2002."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3348"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"}],"event":{"name":"BDIOT 2025: 2025 9th International Conference on Big Data and Internet of Things","location":"Chongqing China","acronym":"BDIOT 2025"},"container-title":["Proceedings of the 2025 9th International Conference on Big Data and Internet of Things"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3779153.3779163","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T07:37:00Z","timestamp":1769499420000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3779153.3779163"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,7]]},"references-count":10,"alternative-id":["10.1145\/3779153.3779163","10.1145\/3779153"],"URL":"https:\/\/doi.org\/10.1145\/3779153.3779163","relation":{},"subject":[],"published":{"date-parts":[[2025,11,7]]},"assertion":[{"value":"2026-01-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}