{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T19:35:40Z","timestamp":1742931340482,"version":"3.40.3"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031346187"},{"type":"electronic","value":"9783031346194"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-34619-4_26","type":"book-chapter","created":{"date-parts":[[2023,6,10]],"date-time":"2023-06-10T19:01:31Z","timestamp":1686423691000},"page":"318-330","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Bornomala: A Deep Learning-Based Bangla Image Captioning Technique"],"prefix":"10.1007","author":[{"given":"Jannatul","family":"Naim","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0176-470X","authenticated-orcid":false,"given":"Md. Bipul","family":"Hossain","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3970-1878","authenticated-orcid":false,"given":"Apurba","family":"Adhikary","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,6,11]]},"reference":[{"key":"26_CR1","doi-asserted-by":"crossref","unstructured":"Rennie, S.J., Marcheret, E., Mroueh, Y., Ross, J., Goel, V.: Self-critical sequence training for image captioning. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 7008\u20137024 (2017)","DOI":"10.1109\/CVPR.2017.131"},{"key":"26_CR2","doi-asserted-by":"crossref","unstructured":"Chen, X., Lawrence Zitnick, C.: Mind\u2019s eye: a recurrent visual representation for image caption generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2422\u20132431 (2015)","DOI":"10.1109\/CVPR.2015.7298856"},{"key":"26_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"26_CR4","doi-asserted-by":"crossref","unstructured":"Deng, J., et al.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"26_CR5","doi-asserted-by":"crossref","unstructured":"Shopon, M., Mohammed, N., Abedin, M.A.: Image augmentation by blocky artifact in deep convolutional neural network for handwritten digit recognition. In: 2017 IEEE International Conference on Imaging, Vision and Pattern Recognition (icIVPR), pp. 1\u20136. IEEE (2017)","DOI":"10.1109\/ICIVPR.2017.7890867"},{"key":"26_CR6","doi-asserted-by":"crossref","unstructured":"Hossain, M.B., et al.: Recognition and solution for handwritten equation using convolutional neural network. In: 2018 Joint 7th International Conference on Informatics, Electronics and Vision (ICIEV) and 2018 2nd International Conference on Imaging, Vision and Pattern Recognition (icIVPR). IEEE (2018)","DOI":"10.1109\/ICIEV.2018.8640991"},{"key":"26_CR7","doi-asserted-by":"crossref","unstructured":"Sharif, S., et al.: A hybrid deep model with HOG features for Bangla handwritten numeral classification. In: 2016 9th International Conference on Electrical and Computer Engineering (ICECE). IEEE (2016)","DOI":"10.1109\/ICECE.2016.7853957"},{"key":"26_CR8","doi-asserted-by":"crossref","unstructured":"Shopon, M., Mohammed, N., Abedin, M.A.: Bangla handwritten digit recognition using autoencoder and deep convolutional neural network. In: 2016 International Workshop on Computational Intelligence (IWCI), pp. 64\u201368. IEEE (2016)","DOI":"10.1109\/IWCI.2016.7860340"},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Sharif, S.M.A., Mahboob, M.: A comparison between hybrid models for classifying Bangla isolated basic characters. In: 2017 4th International Conference on Advances in Electrical Engineering (ICAEE), pp. 211\u2013216. IEEE (2017)","DOI":"10.1109\/ICAEE.2017.8255355"},{"key":"26_CR10","unstructured":"Whatshots.in. https:\/\/www.whatshot.in\/kolkata\/celebrate-holi-at-shantiniketan-c-14177. Accessed 22 July 2022"},{"key":"26_CR11","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. Adv. Neural Inf. Process. Syst. 25 (2012)"},{"key":"26_CR12","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"26_CR13","unstructured":"Xu, K., et al.: Show, attend and tell: neural image caption generation with visual attention. In: International Conference on Machine Learning. PMLR (2015)"},{"key":"26_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1007\/978-3-319-71589-6_10","volume-title":"Image and Graphics","author":"Z Yang","year":"2017","unstructured":"Yang, Z., Zhang, Y.-J., Rehman, S., Huang, Y.: Image captioning with object detection and localization. In: Zhao, Y., Kong, X., Taubman, D. (eds.) ICIG 2017. LNCS, vol. 10667, pp. 109\u2013118. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-71589-6_10"},{"issue":"3","key":"26_CR15","doi-asserted-by":"publisher","first-page":"6143","DOI":"10.1007\/s10586-018-1885-9","volume":"22","author":"M Han","year":"2018","unstructured":"Han, M., Chen, W., Moges, A.D.: Fast image captioning using LSTM. Clust. Comput. 22(3), 6143\u20136155 (2018). https:\/\/doi.org\/10.1007\/s10586-018-1885-9","journal-title":"Clust. Comput."},{"key":"26_CR16","doi-asserted-by":"crossref","unstructured":"Xu, K., Wang, H., Tang, P.: Image captioning with deep LSTM based on sequential residual. In: 2017 IEEE International Conference on Multimedia and Expo (ICME), pp. 361\u2013366. IEEE (2017)","DOI":"10.1109\/ICME.2017.8019408"},{"key":"26_CR17","doi-asserted-by":"crossref","unstructured":"Wang, C., et al.: Image captioning with deep bidirectional LSTMs. In: Proceedings of the 24th ACM International Conference on Multimedia (2016)","DOI":"10.1145\/2964284.2964299"},{"key":"26_CR18","doi-asserted-by":"publisher","first-page":"636","DOI":"10.1016\/j.procs.2019.06.100","volume":"154","author":"M Rahman","year":"2019","unstructured":"Rahman, M., et al.: Chittron: an automatic Bangla image captioning system. Procedia Comput. Sci. 154, 636\u2013642 (2019)","journal-title":"Procedia Comput. Sci."},{"issue":"6","key":"26_CR19","doi-asserted-by":"publisher","first-page":"7427","DOI":"10.3233\/JIFS-179351","volume":"37","author":"T Deb","year":"2019","unstructured":"Deb, T., et al.: Oboyob: a sequential-semantic Bengali image captioning engine. J. Intell. Fuzzy Syst. 37(6), 7427\u20137439 (2019)","journal-title":"J. Intell. Fuzzy Syst."},{"key":"26_CR20","doi-asserted-by":"crossref","unstructured":"Kamal, A.H., Jishan, M.A., Mansoor, N.: TextMage: the automated Bangla caption generator based on deep learning. In: 2020 International Conference on Decision Aid Sciences and Application (DASA), pp. 822\u2013826. IEEE (2020)","DOI":"10.1109\/DASA51403.2020.9317108"},{"key":"26_CR21","doi-asserted-by":"crossref","unstructured":"Humaira, M., et al.: A hybridized deep learning method for Bengali image captioning. Int. J. Adv. Comput. Sci. Appl. 12(2) (2021)","DOI":"10.14569\/IJACSA.2021.0120287"},{"issue":"2","key":"26_CR22","doi-asserted-by":"publisher","first-page":"109","DOI":"10.26555\/ijain.v6i2.499","volume":"6","author":"A Jishan","year":"2020","unstructured":"Jishan, A., et al.: Hybrid deep neural network for Bangla automated image descriptor. Int. J. Adv. Intell. Inform. 6(2), 109\u2013122 (2020)","journal-title":"Int. J. Adv. Intell. Inform."},{"key":"26_CR23","unstructured":"Haque, M., Islam, M.: Sunset related image captioning in Bengali with deep learning (2020)"},{"key":"26_CR24","unstructured":"Shah, F.M., et al.: Bornon: Bengali image captioning with transformer-based deep learning approach. arXiv preprint arXiv:2109.05218 (2021)"},{"issue":"6","key":"26_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3295748","volume":"51","author":"MZ Hossain","year":"2019","unstructured":"Hossain, M.Z., et al.: A comprehensive survey of deep learning for image captioning. ACM Comput. Surv. 51(6), 1\u201336 (2019)","journal-title":"ACM Comput. Surv."},{"key":"26_CR26","unstructured":"Medium.com: https:\/\/medium.com\/swlh\/image-captioning-using-attention-mechanism-f3d7fc96eb0e. Accessed 22 July 2022"},{"key":"26_CR27","unstructured":"Appliedaicourse: https:\/\/www.appliedaicourse.com\/lecture\/11\/applied-machine-learning-online-course\/4150\/attention-models-in-deep-learning\/8\/module-8-neural-networks-computer-vision-and-deep-learning. Accessed 22 July 2022"},{"key":"26_CR28","doi-asserted-by":"crossref","unstructured":"Luong, M.-T., Pham, H., Manning, C.D.: Effective approaches to attention-based neural machine translation. arXiv preprint arXiv:1508.04025 (2015)","DOI":"10.18653\/v1\/D15-1166"},{"key":"26_CR29","doi-asserted-by":"crossref","unstructured":"Li, L., et al.: Image caption with global-local attention. In Thirty-First AAAI Conference on Artificial Intelligence (2017)","DOI":"10.1609\/aaai.v31i1.11236"},{"issue":"2","key":"26_CR30","doi-asserted-by":"publisher","first-page":"16","DOI":"10.9734\/ajrcos\/2020\/v6i230154","volume":"6","author":"M Hossain","year":"2020","unstructured":"Hossain, M., Adhikary, A., Soheli, S.J.: Sign language digit recognition using different convolutional neural network model. Asian J. Res. Comput. Sci. 6(2), 16\u201324 (2020)","journal-title":"Asian J. Res. Comput. Sci."},{"key":"26_CR31","unstructured":"Yang, Z., et al.: Review networks for caption generation. Adv. Neural Inf. Process. Syst. 29 (2016)"},{"key":"26_CR32","unstructured":"Soh, M.: Learning CNN-LSTM architectures for image caption generation. Dept. Comput. Sci., Stanford Univ., Stanford, CA, USA, Tech. Rep. 1 (2016)"},{"key":"26_CR33","unstructured":"Googlecloud: https:\/\/cloud.google.com\/translate\/automl\/docs\/evaluate. Accessed 22 July 2022"}],"container-title":["Lecture Notes of the Institute for Computer Sciences, Social Informatics and Telecommunications Engineering","Machine Intelligence and Emerging Technologies"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-34619-4_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,10]],"date-time":"2023-06-10T19:07:41Z","timestamp":1686424061000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-34619-4_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031346187","9783031346194"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-34619-4_26","relation":{},"ISSN":["1867-8211","1867-822X"],"issn-type":[{"type":"print","value":"1867-8211"},{"type":"electronic","value":"1867-822X"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"11 June 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MIET","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Machine Intelligence and Emerging Technologies","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Noakhali","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bangladesh","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miet2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/confmiet.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Confy plus","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"272","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"104","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"38% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}