{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T23:28:24Z","timestamp":1780356504308,"version":"3.54.1"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031705519","type":"print"},{"value":"9783031705526","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70552-6_16","type":"book-chapter","created":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T04:02:14Z","timestamp":1725940934000},"page":"264-280","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["IndicBART Alongside Visual Element: Multimodal Summarization in\u00a0Diverse Indian Languages"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9488-3099","authenticated-orcid":false,"given":"Raghvendra","family":"Kumar","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6229-6562","authenticated-orcid":false,"given":"Deepak","family":"Prakash","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5494-9391","authenticated-orcid":false,"given":"Sriparna","family":"Saha","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1313-4063","authenticated-orcid":false,"given":"Shubham","family":"Sharma","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,9,11]]},"reference":[{"key":"16_CR1","doi-asserted-by":"crossref","unstructured":"Ahuja, K., et\u00a0al.: Mega: Multilingual evaluation of generative AI. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 4232\u20134267 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.258"},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Bhat, S., Varma, V., Pedanekar, N.: Generative models for Indic languages: Evaluating content generation capabilities. In: Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing, pp. 187\u2013195 (2023)","DOI":"10.26615\/978-954-452-092-2_021"},{"key":"16_CR3","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR4","doi-asserted-by":"publisher","unstructured":"Chen, J., Zhuge, H.: Extractive text-image summarization using multi-modal RNN. In: 2018 14th International Conference on Semantics, Knowledge and Grids (SKG), pp. 245\u2013248 (2018). https:\/\/doi.org\/10.1109\/SKG.2018.00033","DOI":"10.1109\/SKG.2018.00033"},{"key":"16_CR5","doi-asserted-by":"crossref","unstructured":"Dabre, R., Shrotriya, H., Kunchukuttan, A., Puduppully, R., Khapra, M.M., Kumar, P.: Indicbart: a pre-trained model for indic natural language generation. arXiv preprint arXiv:2109.02903 (2021)","DOI":"10.18653\/v1\/2022.findings-acl.145"},{"key":"16_CR6","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1007\/978-981-16-6624-7_14","volume-title":"Intelligent Data Engineering and Analytics","author":"SR Dash","year":"2022","unstructured":"Dash, S.R., Guha, P., Mallick, D.K., Parida, S.: Summarizing bengali text: an extractive approach. In: Satapathy, S.C., Peer, P., Tang, J., Bhateja, V., Ghosh, A. (eds.) Intelligent Data Engineering and Analytics, pp. 133\u2013140. Springer Nature Singapore, Singapore (2022)"},{"key":"16_CR7","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1007\/978-981-16-3346-1_15","volume-title":"Proceedings of Second Doctoral Symposium on Computational Intelligence","author":"S Dhankhar","year":"2022","unstructured":"Dhankhar, S., Gupta, M.K.: Automatic extractive summarization for English text: a brief survey. In: Gupta, D., Khanna, A., Kansal, V., Fortino, G., Hassanien, A.E. (eds.) Proceedings of Second Doctoral Symposium on Computational Intelligence. AISC, vol. 1374, pp. 183\u2013198. Springer, Singapore (2022). https:\/\/doi.org\/10.1007\/978-981-16-3346-1_15"},{"key":"16_CR8","doi-asserted-by":"crossref","unstructured":"Ghosh, A., et al.: Medsumm: a multimodal approach to summarizing code-mixed Hindi-English clinical queries. arXiv preprint arXiv:2401.01596 (2024)","DOI":"10.1007\/978-3-031-56069-9_8"},{"key":"16_CR9","doi-asserted-by":"publisher","unstructured":"Jain, R., Verma, A., Singh, A., Gangwar, V., Saha, S.: Aspect-based complaint and cause detection: a multimodal generative framework with external knowledge infusion. In: De Francisci Morales, G., Perlich, C., Ruchansky, N., Kourtellis, N., Baralis, E., Bonchi, F. (eds.) Machine Learning and Knowledge Discovery in Databases: Applied Data Science and Demo Track, ECML PKDD 2023, LNCS, vol. 14174, pp. 88\u2013104. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-43427-3_6","DOI":"10.1007\/978-3-031-43427-3_6"},{"issue":"13s","key":"16_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3584700","volume":"55","author":"A Jangra","year":"2023","unstructured":"Jangra, A., Mukherjee, S., Jatowt, A., Saha, S., Hasanuzzaman, M.: A survey on multi-modal summarization. ACM Comput. Surv. 55(13s), 1\u201336 (2023)","journal-title":"ACM Comput. Surv."},{"key":"16_CR11","doi-asserted-by":"crossref","unstructured":"Jangra, A., Saha, S., Jatowt, A., Hasanuzzaman, M.: Multi-modal summary generation using multi-objective optimization. In: Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 1745\u20131748 (2020)","DOI":"10.1145\/3397271.3401232"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Kevat, R., Degadwala, S.: A comprehensive review on Gujarati-text summarization through different features (2023)","DOI":"10.32628\/CSEIT2361051"},{"key":"16_CR13","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1007\/978-81-322-2250-7_29","volume-title":"Information Systems Design and Intelligent Applications","author":"KV Kumar","year":"2015","unstructured":"Kumar, K.V., Yadav, D., Sharma, A.: Graph based technique for Hindi text summarization. In: Mandal, J.K., Satapathy, S.C., Sanyal, M.K., Sarkar, P.P., Mukhopadhyay, A. (eds.) Information Systems Design and Intelligent Applications. AISC, vol. 339, pp. 301\u2013310. Springer, New Delhi (2015). https:\/\/doi.org\/10.1007\/978-81-322-2250-7_29"},{"key":"16_CR14","doi-asserted-by":"publisher","unstructured":"Kumar, R., Sinha, R., Saha, S., Jatowt, A.: Multimodal rumour detection: catching news that never transpired!. In: Fink, G.A., Jain, R., Kise, K., Zanibbi, R. (eds.) Document Analysis and Recognition - ICDAR 2023, ICDAR 2023, LNCS, vol. 14189, pp. 231\u2013248. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-41682-8_15","DOI":"10.1007\/978-3-031-41682-8_15"},{"issue":"8","key":"16_CR15","doi-asserted-by":"publisher","first-page":"5897","DOI":"10.1007\/s10462-021-09964-4","volume":"54","author":"Y Kumar","year":"2021","unstructured":"Kumar, Y., Kaur, K., Kaur, S.: Study of automatic text summarization approaches in different languages. Artif. Intell. Rev. 54(8), 5897\u20135929 (2021)","journal-title":"Artif. Intell. Rev."},{"key":"16_CR16","doi-asserted-by":"publisher","unstructured":"Lewis, M., et al.: BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Jurafsky, D., Chai, J., Schluter, N., Tetreault, J. (eds.) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 7871\u20137880. Association for Computational Linguistics, July 2020. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.703, https:\/\/aclanthology.org\/2020.acl-main.703","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"16_CR17","unstructured":"Lin, C.Y.: ROUGE: A package for automatic evaluation of summaries. In: Text Summarization Branches Out. pp. 74\u201381. Association for Computational Linguistics, Barcelona, Spain, July 2004, https:\/\/aclanthology.org\/W04-1013"},{"key":"16_CR18","unstructured":"Mihalcea, R., Tarau, P.: TextRank: bringing order into text. In: Lin, D., Wu, D. (eds.) Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing, pp. 404\u2013411. Association for Computational Linguistics, Barcelona, Spain, July 2004. https:\/\/aclanthology.org\/W04-3252"},{"key":"16_CR19","doi-asserted-by":"publisher","unstructured":"Modani, N., et al.: Summarizing multimedia content. In: Cellary, W., Mokbel, M., Wang, J., Wang, H., Zhou, R., Zhang, Y. (eds.) Web Information Systems Engineering - WISE 2016, WISE 2016, LNCS, Part II, vol. 10042, pp. 340\u2013348. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-48743-4_27","DOI":"10.1007\/978-3-319-48743-4_27"},{"key":"16_CR20","doi-asserted-by":"crossref","unstructured":"Muennighoff, N., et\u00a0al.: Crosslingual generalization through multitask finetuning. arXiv preprint arXiv:2211.01786 (2022)","DOI":"10.18653\/v1\/2023.acl-long.891"},{"key":"16_CR21","doi-asserted-by":"crossref","unstructured":"Nallapati, R., Zhou, B., Gulcehre, C., Xiang, B., et\u00a0al.: Abstractive text summarization using sequence-to-sequence RNNs and beyond. arXiv preprint arXiv:1602.06023 (2016)","DOI":"10.18653\/v1\/K16-1028"},{"key":"16_CR22","doi-asserted-by":"publisher","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: Bleu: a method for automatic evaluation of machine translation. In: Isabelle, P., Charniak, E., Lin, D. (eds.) Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, pp. 311\u2013318. Association for Computational Linguistics, Philadelphia, Pennsylvania, USA, July 2002. https:\/\/doi.org\/10.3115\/1073083.1073135, https:\/\/aclanthology.org\/P02-1040","DOI":"10.3115\/1073083.1073135"},{"key":"16_CR23","doi-asserted-by":"publisher","unstructured":"Radev, D.R., Jing, H., Sty\u015b, M., Tam, D.: Centroid-based summarization of multiple documents. Inf. Process. Manage. 40(6), 919\u2013938 (2004). https:\/\/doi.org\/10.1016\/j.ipm.2003.10.006, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0306457303000955","DOI":"10.1016\/j.ipm.2003.10.006"},{"key":"16_CR24","doi-asserted-by":"crossref","unstructured":"Sarwadnya, V.V., Sonawane, S.S.: Marathi extractive text summarizer using graph based model. In: 2018 fourth international conference on computing communication control and automation (ICCUBEA). pp.\u00a01\u20136. IEEE (2018)","DOI":"10.1109\/ICCUBEA.2018.8697741"},{"key":"16_CR25","unstructured":"Shen, S., Yao, Z., Gholami, A., Mahoney, M., Keutzer, K.: Powernorm: Rethinking batch normalization in transformers. In: International conference on machine learning. pp. 8741\u20138751. PMLR (2020)"},{"key":"16_CR26","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition (2015)"},{"key":"16_CR27","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1016\/j.procs.2016.05.121","volume":"87","author":"C Sunitha","year":"2016","unstructured":"Sunitha, C., Jaya, A., Ganesh, A.: A study on abstractive summarization techniques in indian languages. Procedia Computer Science 87, 25\u201331 (2016)","journal-title":"Procedia Computer Science"},{"key":"16_CR28","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., Polosukhin, I.: Attention is all you need (2023)"},{"issue":"3","key":"16_CR29","first-page":"1","volume":"18","author":"P Verma","year":"2019","unstructured":"Verma, P., Pal, S., Om, H.: A comparative analysis on hindi and english extractive text summarization. ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP) 18(3), 1\u201339 (2019)","journal-title":"ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)"},{"key":"16_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2022.108670","volume":"120","author":"P Verma","year":"2022","unstructured":"Verma, P., Verma, A., Pal, S.: An approach for extractive text summarization using fuzzy evolutionary and clustering algorithms. Appl. Soft Comput. 120, 108670 (2022)","journal-title":"Appl. Soft Comput."},{"key":"16_CR31","doi-asserted-by":"publisher","unstructured":"Verma, Y., Jangra, A., Verma, R., Saha, S.: Large scale multi-lingual multi-modal summarization dataset. In: Vlachos, A., Augenstein, I. (eds.) Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics. pp. 3620\u20133632. Association for Computational Linguistics, Dubrovnik, Croatia (May 2023). https:\/\/doi.org\/10.18653\/v1\/2023.eacl-main.263, https:\/\/aclanthology.org\/2023.eacl-main.263","DOI":"10.18653\/v1\/2023.eacl-main.263"},{"issue":"3","key":"16_CR32","doi-asserted-by":"publisher","first-page":"227","DOI":"10.3233\/ICA-220680","volume":"29","author":"S Wolyn","year":"2022","unstructured":"Wolyn, S., Simske, S.J.: Summarization assessment methodology for multiple corpora using queries and classification for functional evaluation. Integrated Computer-Aided Engineering 29(3), 227\u2013239 (2022)","journal-title":"Integrated Computer-Aided Engineering"},{"key":"16_CR33","doi-asserted-by":"crossref","unstructured":"Xue, L., Constant, N., Roberts, A., Kale, M., Al-Rfou, R., Siddhant, A., Barua, A., Raffel, C.: mt5: A massively multilingual pre-trained text-to-text transformer. In: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. pp. 483\u2013498 (2021)","DOI":"10.18653\/v1\/2021.naacl-main.41"},{"key":"16_CR34","doi-asserted-by":"crossref","unstructured":"Yadav, D., Desai, J., Yadav, A.K.: Automatic text summarization methods: A comprehensive review. arXiv preprint arXiv:2204.01849 (2022)","DOI":"10.1109\/ACCESS.2022.3231016"},{"key":"16_CR35","doi-asserted-by":"crossref","unstructured":"Yu, Z., Yu, J., Fan, J., Tao, D.: Multi-modal factorized bilinear pooling with co-attention learning for visual question answering. In: Proceedings of the IEEE international conference on computer vision. pp. 1821\u20131830 (2017)","DOI":"10.1109\/ICCV.2017.202"},{"key":"16_CR36","doi-asserted-by":"crossref","unstructured":"Zhu, J., Li, H., Liu, T., Zhou, Y., Zhang, J., Zong, C.: Msmo: Multimodal summarization with multimodal output. In: Proceedings of the 2018 conference on empirical methods in natural language processing. pp. 4154\u20134164 (2018)","DOI":"10.18653\/v1\/D18-1448"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition - ICDAR 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70552-6_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,15]],"date-time":"2025-06-15T14:07:30Z","timestamp":1749996450000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70552-6_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031705519","9783031705526"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70552-6_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"11 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icdar2024.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}