{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T07:14:49Z","timestamp":1761808489149,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":15,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789811666230"},{"type":"electronic","value":"9789811666247"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-981-16-6624-7_7","type":"book-chapter","created":{"date-parts":[[2022,2,28]],"date-time":"2022-02-28T04:26:48Z","timestamp":1646022408000},"page":"63-70","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Bengali Visual Genome: A\u00a0Multimodal Dataset for\u00a0Machine Translation and\u00a0Image Captioning"],"prefix":"10.1007","author":[{"given":"Arghyadeep","family":"Sen","sequence":"first","affiliation":[]},{"given":"Shantipriya","family":"Parida","sequence":"additional","affiliation":[]},{"given":"Ketan","family":"Kotwal","sequence":"additional","affiliation":[]},{"given":"Subhadarshi","family":"Panda","sequence":"additional","affiliation":[]},{"given":"Ond\u0159ej","family":"Bojar","sequence":"additional","affiliation":[]},{"given":"Satya Ranjan","family":"Dash","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,2,28]]},"reference":[{"issue":"2","key":"7_CR1","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/s10590-020-09250-0","volume":"34","author":"U Sulubacak","year":"2020","unstructured":"Sulubacak, U., Caglayan, O., Gr\u00f6nroos, S.A., Rouhe, A., Elliott, D., Specia, L., Tiedemann, J.: Multimodal machine translation through visuals and speech. Mach. Transl. 34(2), 97\u2013147 (2020)","journal-title":"Mach. Transl."},{"issue":"1","key":"7_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41467-020-18073-9","volume":"11","author":"M Popel","year":"2020","unstructured":"Popel, M., Tomkova, M., Tomek, J., Kaiser, \u0141, Uszkoreit, J., Bojar, O., \u017dabokrtsk\u00fd, Z.: Transforming machine translation: a deep learning system reaches news translation quality comparable to human professionals. Nat. Commun. 11(1), 1\u201315 (2020)","journal-title":"Nat. Commun."},{"key":"7_CR3","unstructured":"Parida, S., Motlicek, P., Dash, A.R., Dash, S.R., Mallick, D.K., Biswal, S.P., Pattnaik, P., Nayak, B.N., Bojar, O.: Odianlp\u2019s participation in WAT2020. In: Proceedings of the 7th Workshop on Asian Translation. pp. 103\u2013108 (2020)"},{"key":"7_CR4","doi-asserted-by":"crossref","unstructured":"Khan, M.F., Sadiq-Ur-Rahman, S., Islam, M.S.: Improved bengali image captioning via deep convolutional neural network based encoder-decoder model. In: Proceedings of International Joint Conference on Advances in Computational Intelligence. pp. 217\u2013229. Springer (2021)","DOI":"10.1007\/978-981-16-0586-4_18"},{"key":"7_CR5","doi-asserted-by":"publisher","first-page":"636","DOI":"10.1016\/j.procs.2019.06.100","volume":"154","author":"M Rahman","year":"2019","unstructured":"Rahman, M., Mohammed, N., Mansoor, N., Momen, S.: Chittron: an automatic Bangla image captioning system. Procedia Comput. Sci. 154, 636\u2013642 (2019)","journal-title":"Procedia Comput. Sci."},{"key":"7_CR6","unstructured":"Kamruzzaman, T.: Dataset for image captioning system (in bangla) (2021)"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Nakazawa, T., Doi, N., Higashiyama, S., Ding, C., Dabre, R., Mino, H., Goto, I., Pa, W.P., Kunchukuttan, A., Oda, Y., Parida, S., Bojar, O., Kurohashi, S.: Overview of the 6th workshop on Asian translation. In: Proceedings of the 6th Workshop on Asian Translation. pp. 1\u201335. Association for Computational Linguistics, Hong Kong, China (Nov 2019). https:\/\/doi.org\/10.18653\/v1\/D19-5201, https:\/\/www.aclweb.org\/anthology\/D19-5201","DOI":"10.18653\/v1\/D19-5201"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Parida, S., Bojar, O., Dash, S.R.: Hindi visual genome: a dataset for multi-modal English to hindi machine translation. Comput. Sist. 23(4) (2019)","DOI":"10.13053\/cys-23-4-3294"},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"Kudo, T., Richardson, J.: SentencePiece: A simple and language independent subword tokenizer and detokenizer for neural text processing. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. pp. 66\u201371. Association for Computational Linguistics, Brussels, Belgium (Nov 2018). https:\/\/doi.org\/10.18653\/v1\/D18-2012, https:\/\/www.aclweb.org\/anthology\/D18-2012","DOI":"10.18653\/v1\/D18-2012"},{"key":"7_CR10","unstructured":"Glorot, X., Bengio, Y.: Understanding the difficulty of training deep feedforward neural networks. In: Teh, Y.W., Titterington, M. (eds.) Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics. Proceedings of Machine Learning Research, vol.\u00a09, pp. 249\u2013256. PMLR, Chia Laguna Resort, Sardinia, Italy (13\u201315 May 2010), http:\/\/proceedings.mlr.press\/v9\/glorot10a.html"},{"key":"7_CR11","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization (2014), http:\/\/arxiv.org\/abs\/1412.6980, cite arxiv:1412.6980Comment: Published as a conference paper at the 3rd International Conference for Learning Representations, San Diego, 2015"},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR). pp. 3156\u20133164 (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7298935","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV). pp. 1440\u20131448 (2015). https:\/\/doi.org\/10.1109\/ICCV.2015.169","DOI":"10.1109\/ICCV.2015.169"},{"key":"7_CR14","unstructured":"Soh, M.: Learning cnn-lstm architectures for image caption generation"},{"issue":"12","key":"7_CR15","doi-asserted-by":"publisher","first-page":"4467","DOI":"10.1109\/TCSVT.2019.2947482","volume":"30","author":"J Yu","year":"2019","unstructured":"Yu, J., Li, J., Yu, Z., Huang, Q.: Multimodal transformer with multi-view visual representation for image captioning. IEEE Trans. Circuits Syst. Video Technol. 30(12), 4467\u20134480 (2019)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."}],"container-title":["Smart Innovation, Systems and Technologies","Intelligent Data Engineering and Analytics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-16-6624-7_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,5]],"date-time":"2022-05-05T17:17:31Z","timestamp":1651771051000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-16-6624-7_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9789811666230","9789811666247"],"references-count":15,"URL":"https:\/\/doi.org\/10.1007\/978-981-16-6624-7_7","relation":{},"ISSN":["2190-3018","2190-3026"],"issn-type":[{"type":"print","value":"2190-3018"},{"type":"electronic","value":"2190-3026"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"28 February 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}