{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:06:20Z","timestamp":1776884780498,"version":"3.51.2"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031424472","type":"print"},{"value":"9783031424489","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-42448-9_5","type":"book-chapter","created":{"date-parts":[[2023,9,10]],"date-time":"2023-09-10T23:02:21Z","timestamp":1694386941000},"page":"48-59","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Cem Mil Podcasts: A Spoken Portuguese Document Corpus for\u00a0Multi-modal, Multi-lingual and\u00a0Multi-dialect Information Access Research"],"prefix":"10.1007","author":[{"given":"Ekaterina","family":"Garmash","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Edgar","family":"Tanaka","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ann","family":"Clifton","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joana","family":"Correia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sharmistha","family":"Jat","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Winstead","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rosie","family":"Jones","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jussi","family":"Karlgren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,9,11]]},"reference":[{"key":"5_CR1","unstructured":"Anchor: Anchor web: Now localized for more creators around the world (2022), https:\/\/blog.anchor.fm\/updates\/anchor-web-localization. Accessed Sept 2022"},{"key":"5_CR2","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1007\/978-3-030-50732-9_20","volume-title":"HCI International 2020 - Posters","author":"MJ Antunes","year":"2020","unstructured":"Antunes, M.J., Salaverr\u00eda, R.: Examining independent podcasts in Portuguese iTunes. In: Stephanidis, C., Antona, M. (eds.) HCII 2020. CCIS, vol. 1226, pp. 149\u2013153. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-50732-9_20"},{"key":"5_CR3","unstructured":"Beltagy, I., Peters, M.E., Cohan, A.: Longformer: The long-document transformer (2020). https:\/\/arxiv.org\/abs\/2004.05150"},{"key":"5_CR4","unstructured":"Buitinck, L., et al.: API design for machine learning software: experiences from the scikit-learn project. In: ECML PKDD Workshop: Languages for Data Mining and Machine Learning (2013)"},{"key":"5_CR5","doi-asserted-by":"crossref","unstructured":"Clifton, A., et al.: 100,000 podcasts: a spoken english document corpus. In: Proceedings of the 28th International Conference on Computational Linguistics (COLING). International Committee on Computational Linguistics (2020). https:\/\/podcastsdataset.byspotify.com\/","DOI":"10.18653\/v1\/2020.coling-main.519"},{"key":"5_CR6","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). Association for Computational Linguistics, Minneapolis, Minnesota (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423, https:\/\/aclanthology.org\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"5_CR7","doi-asserted-by":"publisher","unstructured":"El-Kassas, W.S., Salama, C.R., Rafea, A.A., Mohamed, H.K.: Automatic text summarization: a comprehensive survey. Expert Syst. Appl. 165, 113679 (2021). https:\/\/doi.org\/10.1016\/j.eswa.2020.113679, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0957417420305030","DOI":"10.1016\/j.eswa.2020.113679"},{"key":"5_CR8","unstructured":"Fan, A., et al.: Beyond English-centric multilingual machine translation (2020). https:\/\/arxiv.org\/abs\/2010.11125"},{"key":"5_CR9","unstructured":"International Monetary Fund: World economic outlook database (2023). https:\/\/www.imf.org\/en\/Publications\/WEO\/weo-database\/2023\/April\/weo-report. Accessed May 2023"},{"key":"5_CR10","unstructured":"Jones, R., et al.: TREC 2020 podcasts track overview. In: Voorhees, E.M., Ellis, A. (eds.) NIST Special Publication 1266: The Twenty-Ninth Text REtrieval Conference Proceedings (TREC 2020). NIST, Gaithersburg (2021)"},{"key":"5_CR11","unstructured":"Karlbom, H.: Abstractive Summarization of Podcast Transcriptions. Master\u2019s thesis, Uppsala University (2021)"},{"key":"5_CR12","doi-asserted-by":"crossref","unstructured":"Karlgren, J.: Lexical variation in English language podcasts, editorial media, and social media. North Eur. J. Lang. Technol. 8 (2022)","DOI":"10.3384\/nejlt.2000-1533.2022.3566"},{"key":"5_CR13","unstructured":"Karlgren, J., et al.: TREC 2021 podcasts track overview. In: Voorhees, E.M., Ellis, A. (eds.) NIST Special Publication 335: The Thirtieth Text REtrieval Conference Proceedings (TREC 2021). NIST, Gaithersburg (2022)"},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Lewis, M., et al.: BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension (2019). https:\/\/arxiv.org\/abs\/1910.13461","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"5_CR15","unstructured":"Lin, C.Y.: ROUGE: a package for automatic evaluation of summaries. In: Text Summarization Branches Out. Association for Computational Linguistics (2004). https:\/\/aclanthology.org\/W04-1013"},{"key":"5_CR16","unstructured":"Liu, Y., et al.: Multilingual denoising pre-training for neural machine translation (2020). https:\/\/arxiv.org\/abs\/2001.08210"},{"key":"5_CR17","unstructured":"Lui, M., Baldwin, T.: langid.py: an off-the-shelf language identification tool. In: Proceedings of the ACL 2012 System Demonstrations. Association for Computational Linguistics (2012). https:\/\/pypi.org\/project\/langid\/"},{"key":"5_CR18","unstructured":"Mihalcea, R., Tarau, P.: Textrank: bringing order into text. In: Proceedings of the 2004 conference on Empirical Methods in Natural Language Processing (2004)"},{"key":"5_CR19","unstructured":"Morais, R., Giacomelli, F., Grafolin, T., Rocha, F.: Audience transformations and new audio experiences: an analysis of the trends and consumption habits of podcasts by Brazilian listeners. J. Audience Reception Stud. 18(1) (2021)"},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"Reddy, S., Yu, Y., Pappu, A., Sivaraman, A., Rezapour, R., Jones, R.: Detecting Extraneous Content in Podcasts (2021). https:\/\/arxiv.org\/abs\/2103.02585","DOI":"10.18653\/v1\/2021.eacl-main.99"},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"Rodrigues, J., et al.: Advancing neural encoding of Portuguese with transformer albertina pt-* (2023). https:\/\/arxiv.org\/abs\/2305.06721","DOI":"10.1007\/978-3-031-49008-8_35"},{"key":"5_CR22","unstructured":"Tang, Y., et al.: Multilingual translation with extensible multilingual pretraining and finetuning (2020). https:\/\/arxiv.org\/abs\/2008.00401"},{"key":"5_CR23","unstructured":"Whitner, G.: The meteoric rise of podcasting (2020). https:\/\/musicoomph.com\/podcast-statistics"},{"key":"5_CR24","unstructured":"Wikipedia: Portuguese language (2022). https:\/\/en.wikipedia.org\/wiki\/Portuguese_language. Accessed Sept 2022"},{"key":"5_CR25","doi-asserted-by":"crossref","unstructured":"Xue, L., et al.: mT5: a massively multilingual pre-trained text-to-text transformer (2021). https:\/\/arxiv.org\/abs\/2010.11934","DOI":"10.18653\/v1\/2021.naacl-main.41"}],"container-title":["Lecture Notes in Computer Science","Experimental IR Meets Multilinguality, Multimodality, and Interaction"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-42448-9_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T16:22:37Z","timestamp":1710260557000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-42448-9_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031424472","9783031424489"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-42448-9_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"11 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CLEF","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference of the Cross-Language Evaluation Forum for European Languages","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Thessaloniki","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"clef2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/clef2023.clef-initiative.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"35","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7 Best of 2022 Labs + 13 Lab Overviews","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}