{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T22:06:14Z","timestamp":1779228374678,"version":"3.51.4"},"reference-count":63,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,2,4]],"date-time":"2026-02-04T00:00:00Z","timestamp":1770163200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100002809","name":"Generalitat de Catalunya","doi-asserted-by":"publisher","award":["MCIN\/AEI\/10.13039\/501100011033"],"award-info":[{"award-number":["MCIN\/AEI\/10.13039\/501100011033"]}],"id":[{"id":"10.13039\/501100002809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014440","name":"Espa\u00f1a Ministerio de Ciencia Innovaci\u00f3n y Universidades","doi-asserted-by":"publisher","award":["PID2022-139004OA-I00"],"award-info":[{"award-number":["PID2022-139004OA-I00"]}],"id":[{"id":"10.13039\/100014440","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014440","name":"Espa\u00f1a Ministerio de Ciencia Innovaci\u00f3n y Universidades","doi-asserted-by":"publisher","award":["PID2024-156022OB-C33"],"award-info":[{"award-number":["PID2024-156022OB-C33"]}],"id":[{"id":"10.13039\/100014440","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008530","name":"European Regional Development Fund","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100008530","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005774","name":"Universitat de Barcelona","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100005774","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100031478","name":"NextGenerationEU","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100031478","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006433","name":"Barcelona Supercomputing Center","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100006433","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computer Speech &amp; Language"],"published-print":{"date-parts":[[2026,10]]},"DOI":"10.1016\/j.csl.2026.101945","type":"journal-article","created":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T16:00:16Z","timestamp":1769011216000},"page":"101945","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["LaFresCat: A studio-quality Catalan multi-accent speech dataset for text-to-speech synthesis"],"prefix":"10.1016","volume":"100","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-1712-7565","authenticated-orcid":false,"given":"Alex","family":"Peir\u00f3-Lilja","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9005-4138","authenticated-orcid":false,"given":"Carme","family":"Armentano-Oller","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3202-2610","authenticated-orcid":false,"given":"Jos\u00e9","family":"Giraldo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7002-9851","authenticated-orcid":false,"given":"Wendy","family":"Elvira-Garc\u00eda","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3662-3640","authenticated-orcid":false,"given":"Ignasi","family":"Esquerra","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0192-7740","authenticated-orcid":false,"given":"Rodolfo","family":"Zevallos","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5414-4710","authenticated-orcid":false,"given":"Cristina","family":"Espa\u00f1a-Bonet","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6604-1903","authenticated-orcid":false,"given":"Mart\u00ed","family":"Llopart-Font","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2291-057X","authenticated-orcid":false,"given":"Baybars","family":"K\u00fclebi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7160-9513","authenticated-orcid":false,"given":"Mireia","family":"Farr\u00fas","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.csl.2026.101945_b1","series-title":"Proceedings of the Twelfth Language Resources and Evaluation Conference","first-page":"4218","article-title":"Common voice: A massively-multilingual speech corpus","author":"Ardila","year":"2020"},{"key":"10.1016\/j.csl.2026.101945_b2","series-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)","first-page":"2142","article-title":"Becoming a high-resource language in speech: The catalan case in the common voice corpus","author":"Armentano-Oller","year":"2024"},{"key":"10.1016\/j.csl.2026.101945_b3","doi-asserted-by":"crossref","first-page":"39","DOI":"10.3389\/frai.2020.00039","article-title":"A new acoustic-based pronunciation distance measure","volume":"3","author":"Bartelds","year":"2020","journal-title":"Front. Artif. Intell."},{"key":"10.1016\/j.csl.2026.101945_b4","doi-asserted-by":"crossref","first-page":"104","DOI":"10.1016\/j.wocn.2016.08.004","article-title":"Classification of regional dialects, international dialects, and nonnative accents","volume":"58","author":"Bent","year":"2016","journal-title":"J. Phon."},{"key":"10.1016\/j.csl.2026.101945_b5","series-title":"Praat: Doing phonetics by computer","author":"Boersma","year":"2023"},{"key":"10.1016\/j.csl.2026.101945_b6","series-title":"Proceedings of the Sixth International Conference on Language Resources and Evaluation","doi-asserted-by":"crossref","DOI":"10.63317\/3m8qyt5qdg99","article-title":"Corpus and voices for catalan speech synthesis","author":"Bonafonte","year":"2008"},{"key":"10.1016\/j.csl.2026.101945_b7","series-title":"Dialectology","first-page":"3","article-title":"Dialect and language","author":"Chambers","year":"1998"},{"key":"10.1016\/j.csl.2026.101945_b8","series-title":"Els Parlars Catalans","author":"Veny i Clar","year":"1991"},{"key":"10.1016\/j.csl.2026.101945_b9","first-page":"31","article-title":"Catal00e0 occidental \/ catal00e0oriental, encara","volume":"37","author":"Veny i Clar","year":"2015","journal-title":"Estud. Rom\u00e0nics"},{"key":"10.1016\/j.csl.2026.101945_b10","series-title":"Interspeech","article-title":"Experiments with the ABI (accents of the british isles) speech corpus","author":"D\u2019Arcy","year":"2008"},{"key":"10.1016\/j.csl.2026.101945_b11","series-title":"Proceedings of the 12th Language Resources and Evaluation Conference","first-page":"6532","article-title":"Open-source Multi-speaker Corpora of the English Accents in the British Isles","author":"Demirsahin","year":"2020"},{"issue":"1","key":"10.1016\/j.csl.2026.101945_b12","doi-asserted-by":"crossref","first-page":"57","DOI":"10.3390\/languages7010057","article-title":"Hypercorrection as a symptom of language change: Majorcan Catalan standard pronunciation","volume":"7","author":"Dols","year":"2022","journal-title":"Languages"},{"key":"10.1016\/j.csl.2026.101945_b13","series-title":"MD3: The multi-dialect dataset of dialogues","first-page":"4059","author":"Eisenstein","year":"2023"},{"key":"10.1016\/j.csl.2026.101945_b14","article-title":"ProDis: A dialectometric tool for acoustic prosodic data","volume":"97","author":"Elvira-Garc\u00eda","year":"2017","journal-title":"Speech Commun."},{"issue":"5","key":"10.1016\/j.csl.2026.101945_b15","doi-asserted-by":"crossref","first-page":"378","DOI":"10.1037\/h0031619","article-title":"Measuring nominal scale agreement among many raters","volume":"76","author":"Fleiss","year":"1971","journal-title":"Psychol. Bull."},{"key":"10.1016\/j.csl.2026.101945_b16","series-title":"De Los Trovadores En Espa\u00f1a : Estudio De Lengua Y Poes\u00eda Provenzal","author":"Mil\u00e0 i Fontanals","year":"1861"},{"key":"10.1016\/j.csl.2026.101945_b17","series-title":"IberSPEECH 2024","first-page":"196","article-title":"Enhancing crowdsourced audio for text-to-speech models","author":"Giraldo","year":"2024"},{"issue":"11","key":"10.1016\/j.csl.2026.101945_b18","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1145\/3422622","article-title":"Generative adversarial networks","volume":"63","author":"Goodfellow","year":"2020","journal-title":"Commun. ACM"},{"key":"10.1016\/j.csl.2026.101945_b19","series-title":"Intonational variation in the british isles","first-page":"343","author":"Grabe","year":"2002"},{"key":"10.1016\/j.csl.2026.101945_b20","series-title":"Proceedings of the Twelfth Language Resources and Evaluation Conference","first-page":"6504","article-title":"Crowdsourcing latin American spanish for low-resource text-to-speech","author":"Guevara-Rukoz","year":"2020"},{"issue":"4","key":"10.1016\/j.csl.2026.101945_b21","doi-asserted-by":"crossref","first-page":"458","DOI":"10.1016\/j.wocn.2005.10.001","article-title":"Factors influencing speech perception in the context of a merger-in-progress","volume":"34","author":"Hay","year":"2006","journal-title":"J. Phon."},{"key":"10.1016\/j.csl.2026.101945_b22","series-title":"Proceedings of the 35th International Conference on Machine Learning","first-page":"2410","article-title":"Efficient neural audio synthesis","author":"Kalchbrenner","year":"2018"},{"key":"10.1016\/j.csl.2026.101945_b23","series-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","article-title":"Glow-TTS: a generative flow for text-to-speech via monotonic alignment search","author":"Kim","year":"2020"},{"key":"10.1016\/j.csl.2026.101945_b24","series-title":"Conditional variational autoencoder with adversarial learning for end-to-end text-to-speech","author":"Kim","year":"2021"},{"key":"10.1016\/j.csl.2026.101945_b25","doi-asserted-by":"crossref","first-page":"326","DOI":"10.1016\/j.csl.2017.01.005","article-title":"Multilingual processing of speech via web services","volume":"45","author":"Kisler","year":"2017","journal-title":"Comput. Speech Lang."},{"key":"10.1016\/j.csl.2026.101945_b26","series-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","article-title":"HiFi-GAN: generative adversarial networks for efficient and high fidelity speech synthesis","author":"Kong","year":"2020"},{"key":"10.1016\/j.csl.2026.101945_b27","series-title":"DiffWave: A versatile diffusion model for audio synthesis","author":"Kong","year":"2021"},{"key":"10.1016\/j.csl.2026.101945_b28","series-title":"Proceedings of the Workshop ParlaCLARIN III Within the 13th Language Resources and Evaluation Conference","first-page":"125","article-title":"ParlamentParla: A speech corpus of catalan parliamentary sessions","author":"K\u00fclebi","year":"2022"},{"key":"10.1016\/j.csl.2026.101945_b29","series-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems","article-title":"Melgan: generative adversarial networks for conditional waveform synthesis","author":"Kumar","year":"2019"},{"key":"10.1016\/j.csl.2026.101945_b30","series-title":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"1","article-title":"Torchaudio-squim: Reference-less speech quality and intelligibility measures in torchaudio","author":"Kumar","year":"2023"},{"key":"10.1016\/j.csl.2026.101945_b31","series-title":"2023 11th International Conference on Learning Representations","article-title":"Bigvgan: a universal neural vocoder with large-scale training","author":"Lee","year":"2023"},{"key":"10.1016\/j.csl.2026.101945_b32","unstructured":"Lipman, Y., Chen, R.T., Ben-Hamu, H., Nickel, M., Le, M., 2023. Flow Matching for Generative Modeling. In: 11th International Conference on Learning Representations. ICLR 2023."},{"key":"10.1016\/j.csl.2026.101945_b33","series-title":"Interspeech 2022","article-title":"VoiceFixer: A unified framework for high-fidelity speech restoration","author":"Liu","year":"2022"},{"key":"10.1016\/j.csl.2026.101945_b34","series-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"11966","article-title":"A ConvNet for the 2020s","author":"Liu","year":"2022"},{"key":"10.1016\/j.csl.2026.101945_b35","series-title":"phonR: tools for phoneticians and phonologists","author":"McCloy","year":"2016"},{"key":"10.1016\/j.csl.2026.101945_b36","doi-asserted-by":"crossref","unstructured":"Mehta, S., Tu, R., Beskow, J., Sz\u00e9kely, \u00c9., Henter, G.E., 2024. Matcha-TTS: A fast TTS architecture with conditional flow matching. In: Proc. ICASSP.","DOI":"10.1109\/ICASSP48485.2024.10448291"},{"key":"10.1016\/j.csl.2026.101945_b37","series-title":"Proceedings of the 2025 ACM Conference on Fairness, Accountability, and Transparency, FAccT \u201925","first-page":"228","article-title":"\u201cIt\u2019s not a representation of me\u201d: examining accent bias and digital exclusion in synthetic ai voice services","author":"Michel","year":"2025"},{"key":"10.1016\/j.csl.2026.101945_b38","series-title":"Interspeech 2021","article-title":"NISQA: A deep CNN-self-attention model for multidimensional speech quality prediction with crowdsourced datasets","author":"Mittag","year":"2021"},{"key":"10.1016\/j.csl.2026.101945_b39","article-title":"Computational comparison and classification of dialects","volume":"9","author":"Nerbonne","year":"2000","journal-title":"Dialectol. Geolinguist."},{"issue":"1","key":"10.1016\/j.csl.2026.101945_b40","doi-asserted-by":"crossref","DOI":"10.1121\/1.4894063","article-title":"Best practices in measuring vowel merger","volume":"20","author":"Nycz","year":"2014","journal-title":"Proc. Meet. Acoust."},{"key":"10.1016\/j.csl.2026.101945_b41","series-title":"The 9th ISCA Speech Synthesis Workshop, SSW 2016, Sunnyvale, CA, USA, September 13-15, 2016","first-page":"125","article-title":"WaveNet: A generative model for raw audio","author":"van den Oord","year":"2016"},{"key":"10.1016\/j.csl.2026.101945_b42","series-title":"Proceedings of the 38th International Conference on Machine Learning","first-page":"8599","article-title":"Grad-TTS: A diffusion probabilistic model for text-to-speech","volume":"139","author":"Popov","year":"2021"},{"key":"10.1016\/j.csl.2026.101945_b43","doi-asserted-by":"crossref","unstructured":"Prenger, R., Valle, R., Catanzaro, B., 2019. Waveglow: A Flow-based Generative Network for Speech Synthesis. In: ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). pp. 3617\u20133621. http:\/\/dx.doi.org\/10.1109\/ICASSP.2019.8683143.","DOI":"10.1109\/ICASSP.2019.8683143"},{"issue":"49","key":"10.1016\/j.csl.2026.101945_b44","first-page":"201","article-title":"L\u2019atles interactiu de l\u2019entonaci\u00f3 del catal\u00e0 i el tra\u00e7at de les isoglosses entonatives del catal\u00e0","author":"Prieto","year":"2010","journal-title":"Caplletra. Rev. Int. Filol."},{"key":"10.1016\/j.csl.2026.101945_b45","series-title":"9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021","article-title":"FastSpeech 2: Fast and high-quality end-to-end text to speech","author":"Ren","year":"2021"},{"key":"10.1016\/j.csl.2026.101945_b46","series-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems","article-title":"FastSpeech: fast, robust and controllable text to speech","author":"Ren","year":"2019"},{"key":"10.1016\/j.csl.2026.101945_b47","first-page":"749","article-title":"Perceptual evaluation of speech quality (PESQ)-a new method for speech quality assessment of telephone networks and codecs","volume":"vol. 2","author":"Rix","year":"2001"},{"key":"10.1016\/j.csl.2026.101945_b48","series-title":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"626","article-title":"SDR \u2013 half-baked or well done?","author":"Roux","year":"2019"},{"key":"10.1016\/j.csl.2026.101945_b49","doi-asserted-by":"crossref","unstructured":"Saeki, T., Maiti, S., Takamichi, S., Watanabe, S., Saruwatari, H., 2024. SpeechBERTScore: Reference-Aware Automatic Evaluation of Speech Generation Leveraging NLP Evaluation Metrics. pp. 4943\u20134947. http:\/\/dx.doi.org\/10.21437\/Interspeech.2024-1508.","DOI":"10.21437\/Interspeech.2024-1508"},{"key":"10.1016\/j.csl.2026.101945_b50","series-title":"UTMOS: Utokyo-SaruLab system for VoiceMOS challenge 2022","author":"Saeki","year":"2022"},{"key":"#cr-split#-10.1016\/j.csl.2026.101945_b51.1","unstructured":"Schiel, F., 1999. Automatic Phonetic Transcription of Non-Prompted Speech. In: Ohala, J.J. (Ed.), Proceedings of the XIVth International Congress of Phonetic Sciences : ICPhS 99"},{"key":"#cr-split#-10.1016\/j.csl.2026.101945_b51.2","unstructured":"San Francisco, 1 - 7 August 1999. San Francisco, pp. 607-610, URL http:\/\/nbn-resolving.de\/urn\/resolver.pl?urn=nbn:de:bvb:19-epub-13682-6."},{"key":"10.1016\/j.csl.2026.101945_b52","series-title":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"4779","article-title":"Natural TTS synthesis by conditioning wavenet on MEL spectrogram predictions","author":"Shen","year":"2018"},{"key":"10.1016\/j.csl.2026.101945_b53","series-title":"Proceedings of the Ninth Workshop on NLP for Similar Languages, Varieties and Dialects","first-page":"20","article-title":"dialectR: Doing dialectometry in R","author":"Shim","year":"2022"},{"key":"10.1016\/j.csl.2026.101945_b54","series-title":"Vocos: Closing the gap between time-domain and Fourier-based neural vocoders for high-quality audio synthesis","author":"Siuzdak","year":"2023"},{"key":"10.1016\/j.csl.2026.101945_b55","doi-asserted-by":"crossref","first-page":"127063","DOI":"10.1016\/j.neucom.2023.127063","article-title":"Roformer: enhanced transformer with rotary position embedding","volume":"568","author":"Su","year":"2024","journal-title":"Neurocomputing"},{"key":"10.1016\/j.csl.2026.101945_b56","article-title":"Attention is all you need","volume":"vol. 30","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.csl.2026.101945_b57","series-title":"2013 International Conference Oriental COCOSDA Held Jointly with 2013 Conference on Asian Spoken Language Research and Evaluation (O-COCOSDA\/CASLRE)","first-page":"1","article-title":"The voice bank corpus: Design, collection and data analysis of a large regional accent speech database","author":"Veaux","year":"2013"},{"key":"10.1016\/j.csl.2026.101945_b58","series-title":"SUPERSEDED - CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit","author":"Veaux","year":"2016"},{"key":"10.1016\/j.csl.2026.101945_b59","series-title":"Fon\u00e8tica Descriptiva Del Catal\u00e0 (Assaig De Caracteritzaci\u00f3 De La Pron\u00fancia Del Vocalisme I Consonantisme Del Catal\u00e0 Al Segle XX)","author":"Recasens i Vives","year":"1991"},{"key":"10.1016\/j.csl.2026.101945_b60","article-title":"Evaluation of a technique for improving the mapping of multiple speakers\u2019 vowel spaces in the F1\u2013F2 plane","volume":"9","author":"Watt","year":"2002","journal-title":"Leeds Work. Pap. Linguist."},{"key":"10.1016\/j.csl.2026.101945_b61","series-title":"Accents of English: Beyond the British Isles","author":"Wells","year":"1982"},{"key":"10.1016\/j.csl.2026.101945_b62","doi-asserted-by":"crossref","first-page":"1699","DOI":"10.1109\/TASLP.2024.3363414","article-title":"Accented text-to-speech synthesis with limited data","volume":"32","author":"Zhou","year":"2024","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Proc."}],"container-title":["Computer Speech &amp; Language"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0885230826000082?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0885230826000082?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T21:12:59Z","timestamp":1779225179000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0885230826000082"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,10]]},"references-count":63,"alternative-id":["S0885230826000082"],"URL":"https:\/\/doi.org\/10.1016\/j.csl.2026.101945","relation":{},"ISSN":["0885-2308"],"issn-type":[{"value":"0885-2308","type":"print"}],"subject":[],"published":{"date-parts":[[2026,10]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"LaFresCat: A studio-quality Catalan multi-accent speech dataset for text-to-speech synthesis","name":"articletitle","label":"Article Title"},{"value":"Computer Speech & Language","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.csl.2026.101945","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Authors. Published by Elsevier Ltd.","name":"copyright","label":"Copyright"}],"article-number":"101945"}}