{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,24]],"date-time":"2025-05-24T04:10:36Z","timestamp":1748059836690,"version":"3.41.0"},"publisher-location":"Singapore","reference-count":28,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819609932","type":"print"},{"value":"9789819609949","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0994-9_17","type":"book-chapter","created":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T13:23:11Z","timestamp":1748006591000},"page":"183-191","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Discrete Diffusion Model for\u00a0Image Captioning by\u00a0Self-Critical Learning"],"prefix":"10.1007","author":[{"given":"Vincenzo","family":"Silvio","sequence":"first","affiliation":[]},{"given":"Emanuel","family":"Di Nardo","sequence":"additional","affiliation":[]},{"given":"Angelo","family":"Ciaramella","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,24]]},"reference":[{"key":"17_CR1","doi-asserted-by":"crossref","unstructured":"Ashar, A.A.K., Abrar, A., Liu, J.: A survey on deep learning-based smart assistive aids for visually impaired individuals. In: Proceedings of the 2023 7th International Conference on Information System and Data Mining, ICISDM \u201923, pp. 90\u201395, New York, NY, USA, 2023. Association for Computing Machinery","DOI":"10.1145\/3603765.3603775"},{"key":"17_CR2","doi-asserted-by":"crossref","unstructured":"Wang, J., Wang, S., Zhang, Y.: Artificial intelligence for visually impaired 77, 102391","DOI":"10.1016\/j.displa.2023.102391"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Khuat, T.T., Kedziora, D.J., Gabrys, B., et\u00a0al.: The roles and modes of human interactions with automated machine learning systems: a critical review and perspectives. Found. Trends\u00ae Human\u2013Comput. Interact. 17(3\u20134), 195\u2013387 (2023)","DOI":"10.1561\/1100000091"},{"key":"17_CR4","doi-asserted-by":"crossref","unstructured":"Lv, Z., Poiesi, F., Dong, Q., Lloret, J., Song, H.: Deep learning for intelligent human\u2013computer interaction. Appl. Sci. 12(22) (2022)","DOI":"10.3390\/app122211457"},{"key":"17_CR5","doi-asserted-by":"crossref","unstructured":"Chen, W.Y., Liu, W.W., Bakker, E.M., Georgiou, T., Fieguth, P., Liu, L., Lew, M.S.: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence, Deep learning for instance retrieval (2022)","DOI":"10.1109\/TPAMI.2022.3218591"},{"key":"17_CR6","unstructured":"Zhou, W., Li, H., Tian, Q.: Recent advance in content-based image retrieval: a literature survey. ArXiv, abs\/1706.06064 (2017)"},{"key":"17_CR7","doi-asserted-by":"crossref","unstructured":"Zhang, J., Kalantidis, Y., Rohrbach, M., Paluri, M., Elgammal, A., Elhoseiny, M.: Large-scale visual relationship understanding. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 9185\u20139194 (2019)","DOI":"10.1609\/aaai.v33i01.33019185"},{"key":"17_CR8","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"17_CR9","doi-asserted-by":"crossref","unstructured":"Ramos, R., Elliott, D., Martins, B.: Retrieval-augmented image captioning. arXiv preprint (2023). arXiv:2302.08268","DOI":"10.18653\/v1\/2023.eacl-main.266"},{"key":"17_CR10","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-CNN: towards real-time object detection with region proposal networks. Adv. Neural Inf. Process. Syst. 28 (2015)"},{"key":"17_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.physd.2019.132306","volume":"404","author":"A Sherstinsky","year":"2020","unstructured":"Sherstinsky, A.: Fundamentals of recurrent neural network (RNN) and long short-term memory (LSTM) network. Physica D: Nonlinear Phenomena 404, 132306 (2020)","journal-title":"Physica D: Nonlinear Phenomena"},{"key":"17_CR12","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Zitnick, C.L., Parikh, D.: Cider: consensus-based image description evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4566\u20134575 (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"17_CR14","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"17_CR15","unstructured":"Nichol, A.Q., Dhariwal, P.: Improved denoising diffusion probabilistic models. In: International Conference on Machine Learning, pp. 8162\u20138171. PMLR (2021)"},{"issue":"11","key":"17_CR16","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Bing, X., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"17_CR17","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. CoRR, abs\/1312.6114 (2013)"},{"key":"17_CR18","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"17_CR19","doi-asserted-by":"crossref","unstructured":"Rennie, S.J., Marcheret, E., Mroueh, Y., Ross, J., Goel, V.: Self-critical sequence training for image captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7008\u20137024 (2017)","DOI":"10.1109\/CVPR.2017.131"},{"key":"17_CR20","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"17_CR21","doi-asserted-by":"crossref","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Reinforcement Learning, pp. 5\u201332 (1992)","DOI":"10.1007\/978-1-4615-3618-5_2"},{"key":"17_CR22","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Zitnick, C.L., Parikh, D.: Cider: consensus-based image description evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4566\u20134575 (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.-J.: Bleu: a method for automatic evaluation of machine translation. In: Isabelle, P., Charniak, E., Lin, D. (eds.) Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, pp. 311\u2013318, Philadelphia, Pennsylvania, USA, July 2002. Association for Computational Linguistics","DOI":"10.3115\/1073083.1073135"},{"key":"17_CR24","unstructured":"Banerjee, S., Lavie, A.: Meteor: an automatic metric for mt evaluation with improved correlation with human judgments. In: Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization, pp. 65\u201372 (2005)"},{"key":"17_CR25","unstructured":"Lin, C.-Y.: Rouge: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u201381 (2004)"},{"key":"17_CR26","doi-asserted-by":"crossref","unstructured":"Lin, C.-Y., Och, F.-J.: Automatic evaluation of machine translation quality using longest common subsequence and skip-bigram statistics. In: Proceedings of the 42nd Annual Meeting of the Association for Computational Linguistics (ACL-04), pp. 605\u2013612 (2004)","DOI":"10.3115\/1218955.1219032"},{"key":"17_CR27","doi-asserted-by":"crossref","unstructured":"Anderson, P., Fernando, B., Johnson, M., Gould, S.: Spice: semantic propositional image caption evaluation. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11-14, 2016, Proceedings, Part V 14, pp. 382\u2013398. Springer (2016)","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"17_CR28","doi-asserted-by":"publisher","first-page":"276","DOI":"10.1016\/j.ins.2022.11.055","volume":"619","author":"E Di Nardo","year":"2023","unstructured":"Di Nardo, E., Ciaramella, A.: Tracking vision transformer with class and regression tokens. Inf. Sci. 619, 276\u2013287 (2023)","journal-title":"Inf. Sci."}],"container-title":["Smart Innovation, Systems and Technologies","Advanced Neural Artificial Intelligence: Theories and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0994-9_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T13:23:26Z","timestamp":1748006606000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0994-9_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819609932","9789819609949"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0994-9_17","relation":{},"ISSN":["2190-3018","2190-3026"],"issn-type":[{"value":"2190-3018","type":"print"},{"value":"2190-3026","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}