{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T14:12:22Z","timestamp":1774879942246,"version":"3.50.1"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T00:00:00Z","timestamp":1766534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T00:00:00Z","timestamp":1766534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s10772-025-10244-7","type":"journal-article","created":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T10:34:26Z","timestamp":1766572466000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Analyzing cross-language similarities to enhance low-resource text-to-speech via transfer learning, case study: the Moroccan Berber Amazigh"],"prefix":"10.1007","volume":"29","author":[{"given":"Fatim Ezzahrae","family":"Dorhmi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Imane","family":"Lasri","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Naoufal","family":"El-Marzouki","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anouar","family":"Riadsolh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mourad","family":"Elbelkacemi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hamid","family":"Ez-Zahraouy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,24]]},"reference":[{"key":"10244_CR1","doi-asserted-by":"crossref","unstructured":"Byambadorj, Z., Nishimura, R., Ayush, A., Ohta, K., & Kitaoka, N. (2021) Text-to-speech system for low-resource language using cross-lingual transfer learning and data augmentation. EURASIP Journal on Audio, Speech, and Music Processing, 2021(1).","DOI":"10.1186\/s13636-021-00225-4"},{"key":"10244_CR2","unstructured":"Chen, M., Tan, X., Bohan, L., Liu, Y., Qin, T., Zhao, S., & Liu, T.-Y. (2021). Adaspeech: Adaptive text-to-speech for custom voice. In International conference on learning representations."},{"key":"10244_CR3","unstructured":"Chen, Y.-C., Chi, P.-H., Yang, S.-W., Chang, K.-W., Lin, J.-H., Huang, S.-F., Liu, D.-R., Liu, C.-L., Lee, C.-K., & Lee, H.-Y. (2021). Speechnet: A universal modularized model for speech processing tasks. arXiv preprint arXiv:2105.03070."},{"key":"10244_CR4","doi-asserted-by":"crossref","unstructured":"Chen, Y.-J., Tao, T., Yeh, C.-C., & Lee, H.-Y. (2019a). End-to-end text-to-speech for low-resource languages by cross-lingual transfer learning. In Proceedings of Interspeech 2019 (pp. 2075\u20132079). ISCA.","DOI":"10.21437\/Interspeech.2019-2730"},{"key":"10244_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Y.-J., Tao, T., Yeh, C.-C., & Lee, H.-Y. (2019b). End-to-end text-to-speech for low-resource languages by cross-lingual transfer learning. In Proceedings of Interspeech 2019 (pp. 2075\u20132079).","DOI":"10.21437\/Interspeech.2019-2730"},{"key":"10244_CR6","unstructured":"Chen, Y., Assael, Y., Shillingford, B., Budden, D., Reed, S., Zen, H., Wang, Q., Cobo, L. C., Trask, A., Laurie, B., Gulcehre, C., van den Oord, A., Vinyals, O., & de Freitas, N. (2018). Sample efficient adaptive text-to-speech. In International conference on learning representations."},{"key":"10244_CR7","doi-asserted-by":"crossref","unstructured":"Chung, Y.-A., Wang, Y., Hsu, W.-N., Zhang, Y., & Skerry-Ryan, R. (2019). Semi-supervised training for improving data efficiency in end-to-end speech synthesis. In International conference on acoustics, speech and signal processing (ICASSP) (pp. 6940\u20136944). IEEE.","DOI":"10.1109\/ICASSP.2019.8683862"},{"key":"10244_CR8","unstructured":"Cooper, E. L. (2019). Text-to-speech synthesis using found data for low-resource languages. PhD thesis, Columbia University."},{"key":"10244_CR9","doi-asserted-by":"crossref","unstructured":"de Korte, M., Kim, J., & Klabbers, E. (2020). Efficient neural speech synthesis for low-resource languages through multilingual modeling. In Proceedings of Interspeech 2020 (pp. 2967\u20132971).","DOI":"10.21437\/Interspeech.2020-2664"},{"key":"10244_CR10","doi-asserted-by":"crossref","unstructured":"Demirsahin, I., Jansche, M., & Gutkin, A. (2018). A unified phonological representation of south Asian languages for multilingual text-to-speech. In 6th Workshop on poken language technologies for under-resourced languages (SLTU 2018), 29-31 August 2018, Gurugram, India.","DOI":"10.21437\/SLTU.2018-17"},{"key":"10244_CR11","volume-title":"of Studi Camito-Semitici.","author":"L. Galand","year":"2010","unstructured":"Galand, L. (2010). Regards sur le berb\u00e8re. In of Studi Camito-Semitici. (Vol. 8). Milano: Centro Studi Camito-Semitici."},{"key":"10244_CR12","doi-asserted-by":"crossref","unstructured":"Gutkin, A. (2017). Uniform multilingual multi-speaker acoustic model for statistical parametric speech synthesis of low-resourced languages. In Interspeech 2017.","DOI":"10.21437\/Interspeech.2017-37"},{"key":"10244_CR13","unstructured":"Hemati, H., & Borth, D. (2020). Using ipa-based tacotron for data efficient cross-lingual speaker adaptation and pronunciation enhancement. arXiv preprint arXiv:2011.06392."},{"key":"10244_CR14","doi-asserted-by":"crossref","unstructured":"Huybrechts, G., Merritt, T., Comini, G., Perz, B., Shah, R., & Lorenzo-Trueba, J. (2020). Low-resource expressive text-to-speech using data augmentation. arXiv preprint arXiv:2011.05707.","DOI":"10.1109\/ICASSP39728.2021.9413466"},{"key":"10244_CR15","unstructured":"Institut Royal de la Culture Amazighe (IRCAM). (2016). Dictionnaire g\u00e9n\u00e9ral de l\u2019amazighe. IRCAM. PDF document."},{"key":"10244_CR16","unstructured":"Jia, Y., Zhang, Y., Weiss, R., Wang, Q., Shen, J., Ren, F., Chen, Z., Nguyen, P., Pang, R., Moreno, I. L., Wu, Y. (2018). Transfer learning from speaker verification to multispeaker text-to-speech synthesis. In NIPS'18) (pp. 4485\u20134495)."},{"key":"10244_CR17","unstructured":"Jiahui, N., Wang, L., Gao, H., Qian, K., Zhang, Y., Chang, S., & Hasegawa-Johnson, M. (2022). Unsupervised text-to-speech synthesis by unsupervised automatic speech recognition. Interspeech 2022."},{"key":"10244_CR18","doi-asserted-by":"crossref","unstructured":"Jin, X., Tan, X., Ren, Y., Qin, T., Jian, L., Zhao, S., & Liu, T.-Y. (2020). Lrspeech: Extremely low-resource speech synthesis and recognition. In Proceedings of the 26th ACM SIGKDD international conference on knowledge discovery & data mining (pp. 2802\u20132812.","DOI":"10.1145\/3394486.3403331"},{"key":"10244_CR19","doi-asserted-by":"crossref","unstructured":"Junyi, A., Wang, R., Zhou, L., Wang, C., Ren, S., Wu, Y., Liu, S., Ko, T.., Li, , Zhang, Y., Wei, Z., Qian, Y., Li, J., & Wei, F. (2022). Speecht5: Unified-modal encoder-decoder pre-training for spoken language processing. In Proceedings of the 60th annual meeting of the association for computational linguistics (volume 1: Long papers) (pp. 5723\u20135738).","DOI":"10.18653\/v1\/2022.acl-long.393"},{"key":"10244_CR20","unstructured":"Kossmann, M. G., & Stroomer, H. J. (1997). Berber phonology. In Phonologies of Asia and Africa (pp. 461\u2013475). Eisenbrauns."},{"key":"10244_CR21","doi-asserted-by":"publisher","first-page":"179798","DOI":"10.1109\/ACCESS.2020.3027619","volume":"8","author":"A. Kurniawati","year":"2020","unstructured":"Kurniawati, A., Adriani, M., & Jatmiko, W. (2020). Hierarchical transfer learning for multilingual, multi-speaker, and style transfer dnn-based tts on low-resource languages. IEEE Access, 8, 179798\u2013179812.","journal-title":"IEEE Access"},{"key":"10244_CR22","doi-asserted-by":"publisher","first-page":"1336","DOI":"10.1162\/tacl_a_00430","volume":"9","author":"K. Lakhotia","year":"2021","unstructured":"Lakhotia, K., Kharitonov, E., Hsu, W.-N., Adi, Y., Polyak, A., Bolte, B., Nguyen, T.-A., Copet, J., Baevski, A., Mohamed, A., & Dupoux, E. (2021). On generative spoken language modeling from raw audio. Transactions of the Association for Computational Linguistics, 9, 1336\u20131354.","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"10244_CR24","doi-asserted-by":"crossref","unstructured":"Liu, A. H., Tao, T., Lee, H.-Y., & Lee, L.-S. (2020). Towards unsupervised speech recognition and synthesis with quantized speech representation learning. In 2020 IEEE international conference on acoustics, speech and signal processing (ICASSP 2020) (pp. 7259\u20137263). IEEE.","DOI":"10.1109\/ICASSP40776.2020.9053571"},{"key":"10244_CR25","unstructured":"Mutian, H., Yang, J., & Lei, H. (2021). Multilingual byte2speech text-to-speech models are few-shot spoken language learners. arXiv preprint arXiv:2103.03541."},{"key":"10244_CR26","unstructured":"Pariente, I. (2021). Theoretical issues in modern hebrew phonology. PhD thesis, Netherlands Graduate School of Linguistics (LOT) ."},{"key":"10244_CR27","unstructured":"Ping, W., Peng, K., Gibiansky, A., Arik, S. O., Kannan, A., Narang, S., Raiman, J., & Miller, J. (2017). Deep voice 3: Scaling text-to-speech with convolutional sequence learning. arXiv preprint arXiv:1710.07654."},{"key":"10244_CR28","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-8884-6","volume-title":"Dual learning","author":"T. Qin","year":"2020","unstructured":"Qin, T. (2020). Dual learning. Springer."},{"key":"10244_CR29","unstructured":"Qiong, H., Marchi, E., Winarsky, D., Stylianou, Y., Naik, D., & Kajarekar, S. (2019). Neural text-to-speech adaptation from low quality public recordings. In Speech synthesis workshop (Vol. 10)."},{"issue":"140","key":"10244_CR30","first-page":"1","volume":"21","author":"C. Raffel","year":"2020","unstructured":"Raffel, C., Shazeer, N., Roberts, A., Lee, K., Narang, S., Matena, M., Zhou, Y., Wei, L., & Liu, P. J. (2020). Exploring the limits of transfer learning with a unified text-to-text transformer. Journal of Machine Learning Research, 21(140), 1\u201367.","journal-title":"Journal of Machine Learning Research"},{"key":"10244_CR31","unstructured":"Ren, Y., Tan, X., Qin, T., Zhao, S., Zhao, Z., & Liu, T.-Y. (2019) Almost unsupervised text to speech and automatic speech recognition. In Proceedings of the 36th international conference on machine learning (PLMR 97) (pp. 5410\u20135419)."},{"issue":"2","key":"10244_CR33","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1017\/S0025100313000388","volume":"44","author":"R. Ridouane","year":"2014","unstructured":"Ridouane, R. (2014). Tashlhiyt berber. Journal of the International Phonetic Association, 44(2), 207\u2013221.","journal-title":"Journal of the International Phonetic Association"},{"key":"10244_CR34","unstructured":"Sawalha, M., Brierley, C., & Atwell, E. (2014). Automatically generated, phonemic arabic-ipa pronunciation tiers for the boundary annotated qur\u2019an dataset for machine learning (version 2.0). In Proceedings of LRE-Rel 2: 2nd Workshop on language resource and evaluation for religious texts, (LREC 2014) post-conference workshop (pp. 42\u201347). University of Leeds, Reykjavik, Iceland."},{"issue":"2","key":"10244_CR35","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1017\/S0025100300004886","volume":"23","author":"R. G. Schuh","year":"1993","unstructured":"Schuh, R. G., & Yalwa, L. D. (1993). Hausa. Journal of the International Phonetic Association, 23(2), 77\u201382.","journal-title":"Journal of the International Phonetic Association"},{"key":"10244_CR36","doi-asserted-by":"crossref","unstructured":"Sedgwick, P. (2012). Pearson\u2019s correlation coefficient. BMJ, 345.","DOI":"10.1136\/bmj.e4483"},{"key":"10244_CR37","doi-asserted-by":"crossref","unstructured":"Shah, R., Pokora, K., Ezzerg, A., Klimkov, V., Huybrechts, G., Putrycz, B., Korzekwa, D., & Merritt, T. (2021) Non-autoregressive tts with explicit duration modelling for low-resource highly expressive speech. In Proceedings of the 10th ISCA speech synthesis workshop (SSW 10) (pp. 155\u2013160). ISCA.","DOI":"10.21437\/SSW.2021-17"},{"key":"10244_CR38","doi-asserted-by":"crossref","unstructured":"Shen, J., Pang, R., Weiss, R. J., Schuster, M., Jaitly, N., Yang, Z., Chen, Z., Zhang, Y., Wang, Y., Skerry-Ryan, R., Saurous, R. A., Agiomyrgiannakis, Y., & Wu, Y. (2018). Natural tts synthesis by conditioning wavenet on mel spectrogram predictions. In International conference on acoustics, speech and signal processing (ICASSP) (pp. 4779\u20134783). IEEE.","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"10244_CR39","unstructured":"Sotelo, J., Mehri, S., Kumar, K., Santos, J. F., Kastner, K., Courville, A., & Bengio, Y. (2017). Char2wav: End-to-end speech synthesis."},{"key":"10244_CR40","unstructured":"Taigman, Y., Wolf, L., Polyak, A., & Nachmani, E. (2017). Voiceloop: Voice fitting and synthesis via a phonological loop. arXiv preprint arXiv:1707.06588."},{"key":"10244_CR41","doi-asserted-by":"crossref","unstructured":"Tan, X., Chen, J., He, D., Xia, Y., Qin, T., & Liu, T.-Y. (2019). Multilingual neural machine translation with language clustering. In Proceedings of the 2019 conference on empirical methods in natural language processing and the 9th international joint conference on natural language processing (EMNLP-IJCNLP) (pp. 962\u2013972).","DOI":"10.18653\/v1\/D19-1089"},{"key":"10244_CR42","doi-asserted-by":"crossref","unstructured":"Tao, T., Chen, Y.-J., Liu, A. H., & Lee, H.-Y. (2020). Semi-supervised learning for multi-speaker text-to-speech synthesis using discrete speech representation. In Proceedings of Interspeech 2020 (pp. 3191\u20133195).","DOI":"10.21437\/Interspeech.2020-1824"},{"key":"10244_CR43","doi-asserted-by":"crossref","unstructured":"Tjandra, A., Sakti, S., & Nakamura, S. (2017). Listening while speaking: Speech chain by deep learning. In 2017 IEEE automatic speech recognition and U understanding workshop (ASRU) (pp. 301\u2013308). IEEE.","DOI":"10.1109\/ASRU.2017.8268950"},{"key":"10244_CR44","doi-asserted-by":"crossref","unstructured":"Tjandra, A., Sakti, S., & Nakamura, S. (2018). Machine speech chain with one-shot speaker adaptation. In Proceedings of Interspeech 2018 (pp. 887\u2013891).","DOI":"10.21437\/Interspeech.2018-1558"},{"key":"10244_CR45","doi-asserted-by":"crossref","unstructured":"Tjandra, A., Sisman, B., Zhang, M., Sakti, S., Haizhou, L., & Nakamura, S. (2019). VQVAE unsupervised unit discovery and multi-scale code2spec inverter for zerospeech challenge 2019. In Proceedings of Interspeech 2019 (pp. 1118\u20131122).","DOI":"10.21437\/Interspeech.2019-3232"},{"key":"10244_CR46","doi-asserted-by":"crossref","unstructured":"Toyin, H. O., Djanibekov, A., Kulkarni, A., & Aldarmaki, H. (2023). Artst: Arabic text and speech transformer. arXiv preprint arXiv:2310.16621.","DOI":"10.18653\/v1\/2023.arabicnlp-1.5"},{"key":"10244_CR47","doi-asserted-by":"crossref","unstructured":"Wang, P., Qian, Y., Soong, F. K., Lei, H., & Zhao, H. (2015). Word embedding for recurrent neural network based TTS synthesis. In 2015 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 4879\u20134883). IEEE.","DOI":"10.1109\/ICASSP.2015.7178898"},{"key":"10244_CR48","doi-asserted-by":"crossref","unstructured":"Wang, Y., Skerry-Ryan, R., Stanton, D., Wu, Y., Weiss, R. J., Jaitly, N., Yang, Z., Xiao, Y., Chen, Z., Bengio, S., Le, Q., Agiomyrgiannakis, Y., Clark, R., & Saurous, R. A. (2017). Tacotron: Towards end-to-end speech synthesis. Interspeech, 4006\u20134010.","DOI":"10.21437\/Interspeech.2017-1452"},{"key":"10244_CR49","unstructured":"Wang, Y., Stanton, D., Zhang, Y., Skerry-Ryan, R., Battenberg, E., Shor, J., Xiao, Y., Ren, F., Jia, Y., & Saurous, R. A. (2018). Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis. arXiv preprint arXiv:1803.09017."},{"key":"10244_CR50","doi-asserted-by":"crossref","unstructured":"Wind, J. (1989). The evolutionary history of the human speech organs. Studies in Language Origins, 1, 173\u2013197.","DOI":"10.1075\/z.los1.12win"},{"key":"10244_CR51","doi-asserted-by":"crossref","unstructured":"Yu, Q., Liu, P., Wu, Z., Ang, S. K., Meng, H., & Cai, L. (2016). Learning cross-lingual information with multilingual blstm for speech synthesis of low-resource languages In IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 5545\u20135549). IEEE.","DOI":"10.1109\/ICASSP.2016.7472738"},{"key":"10244_CR52","doi-asserted-by":"crossref","unstructured":"Zhang, H., & Lin, Y. (2020a). Unsupervised learning for sequence-to-sequence text-to-speech for low-resource languages. In Proceedings of Interspeech 2020 (pp. 3161\u20133165).","DOI":"10.21437\/Interspeech.2020-1403"},{"key":"10244_CR53","doi-asserted-by":"crossref","unstructured":"Zhang, H., & Lin, Y. (2020b). Unsupervised learning for sequence-to-sequence text-to-speech for low-resource languages. Interspeech.","DOI":"10.21437\/Interspeech.2020-1403"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-025-10244-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-025-10244-7","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-025-10244-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T13:22:31Z","timestamp":1774876951000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-025-10244-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,24]]},"references-count":51,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["10244"],"URL":"https:\/\/doi.org\/10.1007\/s10772-025-10244-7","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,24]]},"assertion":[{"value":"13 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declartions"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"15"}}