{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T05:29:38Z","timestamp":1730266178586,"version":"3.28.0"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,18]]},"DOI":"10.1109\/ijcnn54540.2023.10191656","type":"proceedings-article","created":{"date-parts":[[2023,8,2]],"date-time":"2023-08-02T13:30:03Z","timestamp":1690983003000},"page":"01-08","source":"Crossref","is-referenced-by-count":0,"title":["Image Alone Are Not Enough: A General Semantic-Augmented Transformer-Based Framework for Image Captioning"],"prefix":"10.1109","author":[{"given":"Jiawei","family":"Liu","sequence":"first","affiliation":[{"name":"East China Normal University,Shanghai,China"}]},{"given":"Xin","family":"Lin","sequence":"additional","affiliation":[{"name":"East China Normal University,Shanghai,China"}]},{"given":"Liang","family":"He","sequence":"additional","affiliation":[{"name":"East China Normal University,Shanghai,China"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1145\/3295748"},{"key":"ref2","article-title":"From show to tell: A survey on image captioning","author":"Stefanini","year":"2021","journal-title":"arXiv preprint"},{"key":"ref3","article-title":"Sequence to sequence learning with neural networks","volume":"27","author":"Sutskever","year":"2014","journal-title":"Advances in neural information processing systems"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/CVPR.2015.7298935"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.5555\/3045118.3045336"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/ICCV.2017.140"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1109\/CVPR.2017.127"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/CVPR.2017.345"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/ICCV.2019.00902"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1109\/CVPR42600.2020.01059"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/ICCV.2019.00473"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1145\/3474085.3475439"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1109\/ICCV.2019.01042"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1007\/978-3-030-01264-9_42"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1109\/CVPR46437.2021.01521"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/CVPR.2019.00850"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/CVPR42600.2020.00998"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/CVPR.2019.00859"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1609\/aaai.v32i1.12266"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/CVPR.2019.00856"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1109\/CVPR.2018.00636"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/CVPR42600.2020.01028"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1109\/CVPR.2019.01094"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.48550\/ARXIV.1706.03762"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1109\/CVPR42600.2020.01098"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1609\/aaai.v35i3.16328"},{"key":"ref27","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","volume":"28","author":"Ren","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref28","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv preprint"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.48550\/arXiv.1405.0312"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1109\/CVPR.2015.7298932"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.3115\/1073083.1073135"},{"key":"ref32","first-page":"65","article-title":"Meteor: An automatic metric for mt evaluation with improved correlation with human judgments","volume-title":"Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization","author":"Banerjee","year":"2005"},{"key":"ref33","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"Lin","year":"2004","journal-title":"Text summarization branches out"},{"doi-asserted-by":"publisher","key":"ref34","DOI":"10.1109\/CVPR.2015.7299087"},{"doi-asserted-by":"publisher","key":"ref35","DOI":"10.1007\/978-3-319-46454-1_24"}],"event":{"name":"2023 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2023,6,18]]},"location":"Gold Coast, Australia","end":{"date-parts":[[2023,6,23]]}},"container-title":["2023 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10190990\/10190992\/10191656.pdf?arnumber=10191656","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,14]],"date-time":"2024-03-14T00:22:28Z","timestamp":1710375748000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10191656\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,18]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/ijcnn54540.2023.10191656","relation":{},"subject":[],"published":{"date-parts":[[2023,6,18]]}}}