{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,12]],"date-time":"2024-07-12T18:10:01Z","timestamp":1720807801970},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2018,11,22]],"date-time":"2018-11-22T00:00:00Z","timestamp":1542844800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"PHC-Utique CMCU","award":["15G1405"],"award-info":[{"award-number":["15G1405"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1007\/s10772-018-09558-6","type":"journal-article","created":{"date-parts":[[2018,11,22]],"date-time":"2018-11-22T08:43:36Z","timestamp":1542876216000},"page":"895-906","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Evaluation of speech unit modelling for HMM-based speech synthesis for Arabic"],"prefix":"10.1007","volume":"21","author":[{"given":"Amal","family":"Houidhek","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vincent","family":"Colotte","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zied","family":"Mnasri","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Denis","family":"Jouvet","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,11,22]]},"reference":[{"key":"9558_CR1","doi-asserted-by":"crossref","unstructured":"Abdel-Hamid, O., Abdou, S. M., & Rashwan, M. (2006). Improving Arabic HMM based speech synthesis quality. In Interpseech 2006, 9th Annual Conference of the International Speech Communication Association. Pittsburgh, Pennsylvania, USA.","DOI":"10.21437\/Interspeech.2006-390"},{"key":"9558_CR2","doi-asserted-by":"crossref","unstructured":"Abdelmalek, R., & Mnasri, Z. (2016). High quality Arabic Text-to-speech synthesis using unit selection. In SSD\u20192016, IEEE Conference on Signal, Systems and Devices. Leipzig, Germany","DOI":"10.1109\/SSD.2016.7473681"},{"key":"9558_CR3","unstructured":"Ahmed, B. (2004). R\u00e9alisation d\u2019un syst\u00e8me hybride de synth\u00e8se de la parole Arabe utilisant un dictionnaire de polyphones. In JEP-TALN2004. Journ\u00e9es d\u2019Etude sur la Parole. Maroc: F\u00e8s."},{"key":"9558_CR4","doi-asserted-by":"crossref","unstructured":"Al-Ani, S. H. (1970). Arabic phonology: An acoustical and physiological investigation. In ERIC.","DOI":"10.1515\/9783110878769"},{"key":"9558_CR5","unstructured":"Baloul, S. (2003). D\u00e9veloppement d\u2019un syst\u00e8me automatique de synth\u00e8se de la parole \u00e0 partir du texte arabe standard voyell\u00e9. Doctoral dissertation, Le Mans."},{"key":"9558_CR6","unstructured":"Black, A., Taylor, P., Caley, R., & Clark, R. (1998). The festival speech synthesis system."},{"key":"9558_CR7","doi-asserted-by":"crossref","unstructured":"Black, A. W., Zen, H., & Tokuda, K. (2007). Statistical parametric speech synthesis. In ICASSP 2007, IEEE International Conference on Acoustics, Speech and Signal Processing, vol.\u00a04 (pp.\u00a0IV-1229). Honolulu, HI, USA.","DOI":"10.1109\/ICASSP.2007.367298"},{"key":"9558_CR8","doi-asserted-by":"crossref","unstructured":"Buchholz, S., & Latorre, J. (2011). Crowdsourcing preference tests, and how to detect cheating. In INTERSPEECH\u20192011, Annual Conference of the International Speech Communication Association.","DOI":"10.21437\/Interspeech.2011-764"},{"key":"9558_CR9","doi-asserted-by":"crossref","unstructured":"Chalamandaris, A., Tsiakoulis, P., Karabetsos, S., & Raptis, S. (2013). The ILSP\/INNOETICS Text-to-Speech System for the Blizzard Challenge 2013. In The Blizzard Challenge 2013 workshop, September 2013. Reykjavik, Iceland.","DOI":"10.21437\/Blizzard.2013-5"},{"key":"9558_CR10","unstructured":"Cheffour, N., Benabbou, A., & Mouradi, A. (2000). \u00c9tude et Evaluation de la Di-Syllabe comme Unit\u00e9 Acoustique pour le Syst\u00e8me de Synth\u00e8se Arabe PARADIS. In LREC\u20192000, International Conference on Language Resources and Evaluation, Athens, Greece."},{"key":"9558_CR11","unstructured":"Halabi, N. (2015). Modern standard Arabic speech corpus. Doctoral dissertation in University of Southampton."},{"key":"9558_CR12","unstructured":"Halabi, N., & Wald, W. (2016). Phonetic inventory for an Arabic speech corpus. In LREC 2016, 10th International Conference on Language Resources and Evaluation, (pp.\u00a0734\u2013738) Slovenia."},{"key":"9558_CR13","unstructured":"Halpern, J. (2009). Word stress and vowel neutralization in modern standard Arabic. In Proceedings of the Second International Conference on Arabic Language Resources and Tools."},{"key":"9558_CR14","doi-asserted-by":"crossref","unstructured":"Hunt, A. J., & Black, A. W. (1996). Unit selection in a concatenative speech synthesis system using a large speech database. In ICASSP\u20191996, IEEE International Conference on Acoustics, Speech and Signal Processing, vol.\u00a01 (pp.\u00a0373\u2013376). Atlanta Georgia, USA.","DOI":"10.1109\/ICASSP.1996.541110"},{"key":"9558_CR15","unstructured":"ITU (1996). Recommendation P.800. Methods for subjective determination of transmission quality. International Telecommunication Union."},{"key":"9558_CR16","unstructured":"Jurafsky, D., & Martin, J. H. (2009). Speech and language processing: An introduction to natural language processing, computational linguistics, and speech recognition. Englewood Cliffs: Pearson\/Prentice Hall."},{"key":"9558_CR17","doi-asserted-by":"crossref","unstructured":"Kawahara, H., Masuda-Katsuse, I., & De Cheveign\u00e9, A. (1999). Restructuring speech representations using a pitch-adaptive time frequency smoothing and an instantaneous-frequency- based F0 extraction: Possible role of a repetitive structure in sounds. In Speech Communication, vol.\u00a027 (pp.\u00a0187\u2013207).","DOI":"10.1016\/S0167-6393(98)00085-5"},{"key":"9558_CR18","unstructured":"Khalil, K., & Cherif, M. C. (2013). Arabic HMM-based speech synthesis. In ICEESA\u20192013, International Conference on Electrical Engineering and software Applications, (pp.\u00a01\u20135). Tunisia: Hammamet."},{"key":"9558_CR19","unstructured":"Khouja, M. K., & Zrigui, M. (2005). Dur\u00e9e des consonnes g\u00e9min\u00e9es en parole Arabe: mesures et comparaison. In TALN-RECITAL 2005, Rencontres des \u00c9tudiants Chercheurs en Informatique pour le Traitement Automatique des Langues, Dourdan, France."},{"key":"9558_CR20","doi-asserted-by":"crossref","unstructured":"Kishore, S. P., & Black, A. W. (2003). Unit size in unit-selection speech synthesis. In EUROSPEECH\u20192003, Eighth European Conference on Speech Communication and Technology.","DOI":"10.21437\/Eurospeech.2003-133"},{"key":"9558_CR21","doi-asserted-by":"publisher","first-page":"971","DOI":"10.1121\/1.383940","volume":"67","author":"DH Klatt","year":"1980","unstructured":"Klatt, D. H. (1980). Software for a cascade\/parallel formant synthesizer. The Journal of the Acoustical Society of America, 67, 971\u2013995.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"9558_CR22","doi-asserted-by":"crossref","unstructured":"Koishida, K., Tokuda, K., Kobayashi, T., & Imai, S. (1997). Efficient encoding of mel-generalized cepstrum for CELP coders. In ICASSP\u20191997, IEEE International Conference on Acoustic, Speech and Signal Processing (pp.\u00a01355\u20131358).","DOI":"10.1109\/ICASSP.1997.596198"},{"key":"9558_CR23","unstructured":"Kouloughli, D. (1976). Contribution \u00e0 l\u2019\u00e9tude de l\u2019accent en arabe litt\u00e9raire. In Annales de l\u2019Universit\u00e9 d\u2019Abidjan S\u00e9rie H: Linguistique Abidjan, vol.\u00a09 (pp.\u00a0115\u2013130)."},{"key":"9558_CR24","doi-asserted-by":"crossref","unstructured":"Krstulovic, S., Hunecke, A., & Schroder, M. (2007). An HMM-based speech synthesis system applied to German and its adaptation to a limited set of expressive football announcements. In EUROSPEECH\u20192007, European Conference on speech Communication and Technology, vol.\u00a07.","DOI":"10.21437\/Interspeech.2007-527"},{"issue":"2","key":"9558_CR25","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1177\/002383098803100205","volume":"31","author":"A Laufer","year":"1998","unstructured":"Laufer, A., & Baer, T. (1998). The emphatic and pharyngeal sounds in Hebrew and in Arabic. Language and Speech, 31(2), 181\u2013205.","journal-title":"Language and Speech"},{"key":"9558_CR26","first-page":"153","volume-title":"SSW\u20192013, ISCA Tutorial and Research Workshop on Speech Synthesis","author":"S Maguer Le","year":"2013","unstructured":"Le Maguer, S., Barbot, N., & Boeffard, O. (2013). Evaluation of contextual descriptors for HMM-based speech synthesis in French. In SSW\u20192013, ISCA Tutorial and Research Workshop on Speech Synthesis (pp.\u00a0153\u2013158). Spain: Barcelona."},{"key":"9558_CR27","doi-asserted-by":"crossref","unstructured":"Moulines, E., Emerard, F., Larreur, D., Le Saint Milon, J. L., Le Faucheur, L., Marty, F., Charpentier, F., & Sorin, C. (1990). A real-time French text-to-speech system generating high-quality synthetic speech. In ICASSP\u20191990, IEEE International Conference on Acoustics, Speech, and Signal Processing (pp.\u00a0309\u2013312).","DOI":"10.1109\/ICASSP.1990.115650"},{"key":"9558_CR28","unstructured":"Newman, D. (1984). The phonetics of Arabic. In Journal of the American Oriental Society, vol.\u00a046 (pp.\u00a01\u20136)."},{"key":"9558_CR29","doi-asserted-by":"crossref","unstructured":"Rajouani, A., Najim, M., Chiadmi, D., & Zyoute, M. (1987). Synthesis-by-rule of Arabic language. In EUROSPEECH\u2019987, European Conference on Speech Technology.","DOI":"10.21437\/ECST.1987-8"},{"key":"9558_CR30","unstructured":"Schwarz, D., Beller, G., Verbrugghe, B., & Britton, S. (2006). Real-time corpus-based concatenative synthesis with catart. In DAFx\u20192006, 9th International Conference on Digital Audio Effects (pp.\u00a0279\u2013282)."},{"key":"9558_CR31","doi-asserted-by":"crossref","unstructured":"Selouani, S. A., & Caelen, J. (1998). Arabic phonetic features recognition using modular connectionist architectures. In IVTTA\u20191998, IEEE Workshop on Interactive Voice Technology for Telecommunications Applications (pp.\u00a0155\u2013160). Torino, Italy.","DOI":"10.1109\/IVTTA.1998.727712"},{"key":"9558_CR32","doi-asserted-by":"crossref","unstructured":"Sil\u00e9n, H., Helander, E., Nurminen, J., & Gabbouj, M. (2010). Analysis of duration prediction accuracy in HMM-based speech synthesis. In SP\u20192010, Speech Prosody.","DOI":"10.21437\/SpeechProsody.2010-79"},{"key":"9558_CR33","doi-asserted-by":"crossref","unstructured":"Silverman, K., Beckman, M., Pitrelli, J., Ostendorf, M., Wightman, C., Price, P., Pierrehumbert, J., & Hirschberg, J. (1992). Tobi: A standard for labelling English prosody. In ICSLP\u20191992, International Conference on Spoken Language Processing, vol.\u00a01 (pp.\u00a0867\u2013870).","DOI":"10.21437\/ICSLP.1992-260"},{"key":"9558_CR34","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511816338","volume-title":"Text-to-speech synthesis","author":"P Taylor","year":"2009","unstructured":"Taylor, P. (2009). Text-to-speech synthesis. Cambridge: Cambridge University Press."},{"key":"9558_CR35","unstructured":"Taylor, P. A., Nairn, I. A., Sutherland, A. M., Jack, M. A., Bagshaw, P. C., Renals, S., & Sutherland, A. M. (1991). A real time speech synthesis system. In IEEE Symposium (pp.\u00a0101\u2013106)."},{"key":"9558_CR36","unstructured":"Tokuda, K., Zen, H., & Black, A. W. (2002). An HMM-based speech synthesis system applied to English. In IEEE Speech Synthesis Workshop (pp.\u00a0227\u2013230)."},{"key":"9558_CR37","unstructured":"Watts, O., Stan, A., Clark, R. A., Mamiya, Y., Giurgiu, M., Yamagishi, J., & King, S. (2013). Unsupervised and lightly-supervised learning for rapid construction of TTS systems in multiple languages from \u2018found\u2019data: evaluation and analysis. In SSW\u20192013, 8th ISCA Speech Synthesis Workshop."},{"key":"9558_CR38","doi-asserted-by":"crossref","unstructured":"Wu, Z., Watts, O., & King, S. (2016). Merlin: An open source neural network speech synthesis system. In SSW\u20192016, 9th ISCA Speech Synthesis Workshop, Sunnyvale, USA.","DOI":"10.21437\/SSW.2016-33"},{"key":"9558_CR39","doi-asserted-by":"crossref","unstructured":"Yoshimura, T., Tokuda, K., Masuko, T., Kobayashi, T., & Kitamura, T. (1999). Simultaneous modeling of spectrum, pitch and duration in HMM-based speech synthesis. In EUROSPEECH\u20191999, European Conference on Speech Communication and Technology.","DOI":"10.21437\/Eurospeech.1999-513"},{"key":"9558_CR40","unstructured":"Young, S. J. (1994). The HTK hidden Markov model toolkit: Design and philosophy. Department of Engineering, Cambridge University, UK, Tech. Rep. TR.152."},{"key":"9558_CR41","doi-asserted-by":"crossref","unstructured":"Zen, H., & Sak, H. (2015). Unidirectional long short-term memory recurrent neural network with recurrent output layer for low-latency speech synthesis. In ICASSP\u20192015, IEEE International Conference on Acoustics, Speech and Signal Processing, 2015 (pp.\u00a04470\u20134474).","DOI":"10.1109\/ICASSP.2015.7178816"},{"key":"9558_CR42","doi-asserted-by":"crossref","unstructured":"Zen, H., Senior, A., & Schuster, M. (2013). Statistical parametric speech synthesis using deep neural networks. In ICASSP\u20192013, IEEE International Conference on Acoustics, Speech and Signal Processing (pp.\u00a07962\u20137966).","DOI":"10.1109\/ICASSP.2013.6639215"},{"key":"9558_CR43","doi-asserted-by":"crossref","unstructured":"Zen, H., Toda, T., & Tokuda, K. (2006). The Nitech-NAIST HMM-based speech synthesis system for the Blizzard Challenge 2006. In Proceedings Blizzard Challenge Workshop.","DOI":"10.21437\/Blizzard.2006-3"},{"key":"9558_CR44","doi-asserted-by":"crossref","unstructured":"Zen, H., Tokuda, K., & Black, A. W. (2009). Statistical parametric speech synthesis. In Speech Communication, vol.\u00a051, no 11 (pp.\u00a01039\u20131064).","DOI":"10.1016\/j.specom.2009.04.004"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-018-09558-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-018-09558-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-018-09558-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,12]],"date-time":"2024-07-12T17:18:50Z","timestamp":1720804730000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-018-09558-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,11,22]]},"references-count":44,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["9558"],"URL":"https:\/\/doi.org\/10.1007\/s10772-018-09558-6","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,11,22]]},"assertion":[{"value":"1 March 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 September 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 November 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}