{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T05:03:12Z","timestamp":1764306192853,"version":"3.40.3"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031779602"},{"type":"electronic","value":"9783031779619"}],"license":[{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-77961-9_9","type":"book-chapter","created":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T13:54:32Z","timestamp":1732197272000},"page":"119-133","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Cross-Lingual Summarization of\u00a0Speech-to-Speech Translation: A Baseline"],"prefix":"10.1007","author":[{"given":"Pranav","family":"Karande","sequence":"first","affiliation":[]},{"given":"Balaram","family":"Sarkar","sequence":"additional","affiliation":[]},{"given":"Chandresh","family":"Kumar Maurya","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,22]]},"reference":[{"key":"9_CR1","doi-asserted-by":"crossref","unstructured":"Fitch, W.T.: The evolution of language: a comparative review. Biol. Philos. 20, 193\u2013203 (2005)","DOI":"10.1007\/s10539-005-5597-1"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Popuri, S., et al.: Enhanced Direct Speech-to-Speech Translation Using Self-supervised Pre-training and Data Augmentation (2022). Interspeech","DOI":"10.21437\/Interspeech.2022-11032"},{"key":"9_CR3","unstructured":"Wang, Y., Bai, J., Huang, R., Li, R., Hong, Z., Zhao, Z.: Speech-to-Speech Translation with Discrete-Unit-Based Style Transfer. arXiv:abs\/2309.07566 (2023)"},{"key":"9_CR4","unstructured":"Inaguma, H., et al.: UnitY: two-pass direct speech-to-speech translation with discrete units. arXiv:abs\/2212.08055 (2022)"},{"key":"9_CR5","unstructured":"Zhou, G., Lam, T., Birch, A., Haddow, B.: Prosody in Cascade and Direct Speech-to-Text Translation: a case study on Korean Wh-Phrases. Findings (2024)"},{"key":"9_CR6","unstructured":"Sarkar, B., Maurya, C.K., Agrahri, A.: Direct speech to text translation: bridging the modality gap using SimSiam. In: Proceedings of the 6th International Conference on Natural Language and Speech Processing (ICNLSP 2023), pp. 250\u2013255 (2023)"},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Zhou, Q., Yang, N., Wei, F., Huang, S., Zhou, M., Zhao, T.: Neural document summarization by jointly learning to score and select sentences. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 654\u2013663 (2018)","DOI":"10.18653\/v1\/P18-1061"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Zhong, M., Liu, P., Wang, D., Qiu, X., Huang, X.: Searching for effective neural extractive summarization: what works and what\u2019s next. In: Annual Meeting of the Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/P19-1100"},{"key":"9_CR9","unstructured":"Wang, D., Liu, P., Zhong, M., Fu, J., Qiu, X., Huang, X.: Exploring domain shift in extractive text summarization. arXiv:abs\/1908.11664 (2019)"},{"key":"9_CR10","doi-asserted-by":"crossref","unstructured":"Wang, D., Liu, P., Zheng, Y., Qiu, X., Huang, X.: Heterogeneous Graph Neural Networks for Extractive Document Summarization. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 6209\u20136219 (2020)","DOI":"10.18653\/v1\/2020.acl-main.553"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Narayan, S., Cohen, S.B., Lapata, M.: Ranking sentences for extractive summarization with reinforcement learning. In: North American Chapter of the Association for Computational Linguistics (2018)","DOI":"10.18653\/v1\/N18-1158"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Arumae, K., Liu, F.: Reinforced extractive summarization with question-focused rewards. arXiv:abs\/1805.10392 (2018)","DOI":"10.18653\/v1\/P18-3015"},{"key":"9_CR13","doi-asserted-by":"crossref","unstructured":"Jadhav, A., Rajan, V.: Extractive summarization with SWAP-NET: sentences and words from alternating pointer networks. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 142\u2013151 (2018)","DOI":"10.18653\/v1\/P18-1014"},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"Liu, Yang and Mirella Lapata.: Text Summarization with Pretrained Encoders. arXiv:abs\/1908.08345 (2019)","DOI":"10.18653\/v1\/D19-1387"},{"key":"9_CR15","doi-asserted-by":"publisher","unstructured":"Givchi, A., Ramezani, R., Baraani-Dastjerdi, A.: Graph-based abstractive biomedical text summarization. J. Biomed Inform. 132, 104099 (2022). https:\/\/doi.org\/10.1016\/j.jbi.2022.104099.Epub 2022 Jun 11. PMID: 35700914","DOI":"10.1016\/j.jbi.2022.104099."},{"key":"9_CR16","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1162\/tacl_a_00313","volume":"8","author":"S Rothe","year":"2020","unstructured":"Rothe, S., Narayan, S., Severyn, A.: Leveraging pre-trained checkpoints for sequence generation tasks. Trans. Assoc. Comput. Linguist. 8, 264\u2013280 (2020)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"9_CR17","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv:abs\/1907.11692 (2019)"},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Sharma, R., et al.: End-to-end speech summarization using restricted self-attention. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 8072\u20138076 (2021)","DOI":"10.1109\/ICASSP43922.2022.9747320"},{"key":"9_CR19","doi-asserted-by":"crossref","unstructured":"Matsuura, K., et al.: Leveraging large text corpora for end-to-end speech summarization. In: International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135 (2023)","DOI":"10.1109\/ICASSP49357.2023.10094993"},{"key":"9_CR20","doi-asserted-by":"crossref","unstructured":"Monteiro, R., Pernes, D.: Towards end-to-end speech-to-text summarization. arXiv:abs\/2306.05432 (2023)","DOI":"10.1007\/978-3-031-40498-6_27"},{"key":"9_CR21","unstructured":"Gangi, M.A.D., et al.: MuST-C: a multilingual speech translation corpus. In: North American Chapter of the Association for Computational Linguistics (2019)"},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Lewis, Mike et al.: BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Annual Meeting of the Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Chen, D., Bolton, J., Manning, C.D.: A thorough examination of the CNN\/daily mail reading comprehension task. In: Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2358\u20132367 (2016)","DOI":"10.18653\/v1\/P16-1223"},{"key":"9_CR24","doi-asserted-by":"crossref","unstructured":"Kumar, G.K., et al.: Towards building text-to-speech systems for the next billion users. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135 (2022)","DOI":"10.1109\/ICASSP49357.2023.10096069"},{"key":"9_CR25","doi-asserted-by":"crossref","unstructured":"Xue, L., et al.: mT5: a massively multilingual pre-trained text-to-text transformer. In: North American Chapter of the Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2021.naacl-main.41"},{"key":"9_CR26","doi-asserted-by":"crossref","unstructured":"Grusky, M., Naaman, M., Artzi, Y.: Newsroom: a dataset of 1.3 million summaries with diverse extractive strategies. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers), pp. 708\u2013719 (2018)","DOI":"10.18653\/v1\/N18-1065"},{"key":"9_CR27","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Neural Information Processing Systems (2017)"},{"key":"9_CR28","doi-asserted-by":"crossref","unstructured":"Ott, M., et al.: fairseq: a fast, extensible toolkit for sequence modeling. In: North American Chapter of the Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/N19-4009"},{"key":"9_CR29","doi-asserted-by":"crossref","unstructured":"Tang, Y., et al.: Multilingual translation from denoising pre-training. In: Findings of the Association for Computational Linguistics: ACL-IJCNLP, 2021, pp. 3450\u20133466 (2021)","DOI":"10.18653\/v1\/2021.findings-acl.304"},{"key":"9_CR30","unstructured":"Kudugunta, S., et al.: MADLAD-400: a multilingual and document-level large audited dataset. arXiv:abs\/2309.04662 (2023)"},{"key":"9_CR31","unstructured":"Seamless Communication, et al.: Seamless: multilingual expressive and streaming speech translation. arXiv:abs\/2312.05187 (2023)"},{"key":"9_CR32","unstructured":"Nllb team, et al.: No Language Left Behind: Scaling Human-Centered Machine Translation. arXiv:abs\/2207.04672 (2022)"},{"key":"9_CR33","first-page":"1401","volume":"21","author":"C Raffel","year":"2019","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21, 1401\u201314067 (2019)","journal-title":"J. Mach. Learn. Res."},{"key":"9_CR34","doi-asserted-by":"crossref","unstructured":"Ali, A., Renals, S.: Word error rate estimation for speech recognition: e-WER. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pp. 20\u201324 (2018)","DOI":"10.18653\/v1\/P18-2004"},{"key":"9_CR35","doi-asserted-by":"crossref","unstructured":"Post, M.: A call for clarity in reporting BLEU scores. In: Conference on Machine Translation (2018)","DOI":"10.18653\/v1\/W18-6319"},{"key":"9_CR36","unstructured":"Lin, C.-Y.: ROUGE: a package for automatic evaluation of summaries. In: Annual Meeting of the Association for Computational Linguistics (2004)"},{"key":"9_CR37","unstructured":"Zhang, T., et al.: BERTScore: evaluating text generation with BERT. arXiv:abs\/1904.09675 (2019)"},{"key":"9_CR38","doi-asserted-by":"crossref","unstructured":"Zhong, M., et al.: Towards a unified multi-dimensional evaluator for text generation. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pp. 2023\u20132038 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.131"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-77961-9_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,9]],"date-time":"2025-01-09T16:05:03Z","timestamp":1736438703000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-77961-9_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,22]]},"ISBN":["9783031779602","9783031779619"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-77961-9_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,22]]},"assertion":[{"value":"22 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Belgrade","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Serbia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/specom2024.ftn.uns.ac.rs\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}