{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T16:42:36Z","timestamp":1765039356134},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2017,10,7]],"date-time":"2017-10-07T00:00:00Z","timestamp":1507334400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1007\/s10772-017-9463-8","type":"journal-article","created":{"date-parts":[[2017,10,7]],"date-time":"2017-10-07T02:37:45Z","timestamp":1507343865000},"page":"959-976","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["A waveform concatenation technique for text-to-speech synthesis"],"prefix":"10.1007","volume":"20","author":[{"given":"Soumya Priyadarsini","family":"Panda","sequence":"first","affiliation":[]},{"given":"Ajit Kumar","family":"Nayak","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,10,7]]},"reference":[{"issue":"3","key":"9463_CR1","doi-asserted-by":"crossref","first-page":"459","DOI":"10.1016\/j.specom.2011.10.010","volume":"54","author":"J Adell","year":"2012","unstructured":"Adell, J., Escudero, D., & Bonafonte, A. (2012). Production of filled pauses in concatenative speech synthesis based on the underlying fluent sentence. Speech Communication, 54(3), 459\u2013476.","journal-title":"Speech Communication"},{"issue":"5","key":"9463_CR2","doi-asserted-by":"crossref","first-page":"786","DOI":"10.1016\/j.specom.2011.01.004","volume":"53","author":"F Al\u00edas","year":"2011","unstructured":"Al\u00edas, F., Formiga, L., & Llora, X. (2011). Efficient and reliable perceptual weight tuning for unit-selection text-to-speech synthesis based on active interactive genetic algorithms: A proof-of-concept. Speech Communication, 53(5), 786\u2013800.","journal-title":"Speech Communication"},{"key":"9463_CR3","doi-asserted-by":"crossref","unstructured":"Bellur, A., Narayan, K. B., Krishnan, K. R., Murthy, H. (2011). Prosody modeling for syllable-based concatenative speech synthesis of Hindi and Tamil. In IEEE National conference on communications (NCC) (pp.\u00a01\u20135).","DOI":"10.1109\/NCC.2011.5734737"},{"issue":"1","key":"9463_CR4","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1016\/S0167-6393(98)00045-4","volume":"26","author":"C Beno\u0131\u0302t","year":"1998","unstructured":"Beno\u0131\u0302t, C., & Le Goff, B. (1998). Audio-visual speech synthesis from French text: Eight years of models, designs and evaluation at the ICP. Speech Communication, 26(1), 117\u2013129.","journal-title":"Speech Communication"},{"key":"9463_CR5","doi-asserted-by":"crossref","unstructured":"Black, A., & Tokuda, K. (2005). The blizzard challenge 2005: Evaluating corpus-based speech synthesis on common databases. In Proceedings of interspeech.","DOI":"10.21437\/Interspeech.2005-72"},{"key":"9463_CR6","doi-asserted-by":"crossref","unstructured":"Black, A. W., & Taylor, P. A. (1997). Automatically clustering similar units for unit selection in speech synthesis.","DOI":"10.21437\/Eurospeech.1997-219"},{"key":"9463_CR7","doi-asserted-by":"crossref","first-page":"149","DOI":"10.1016\/j.specom.2015.05.008","volume":"72","author":"MQ Cai","year":"2015","unstructured":"Cai, M. Q., Ling, Z. H., & Dai, L. R. (2015). Statistical parametric speech synthesis using a hidden trajectory model. Speech Communication, 72, 149\u2013159.","journal-title":"Speech Communication"},{"issue":"7\u20138","key":"9463_CR8","doi-asserted-by":"crossref","first-page":"678","DOI":"10.1016\/j.specom.2010.03.004","volume":"52","author":"C Christiansen","year":"2010","unstructured":"Christiansen, C., Pedersen, M. S., & Dau, T. (2010). Prediction of speech intelligibility based on an auditory preprocessing model. Speech Communication, 52(7\u20138), 678\u2013692.","journal-title":"Speech Communication"},{"issue":"10","key":"9463_CR9","doi-asserted-by":"crossref","first-page":"906","DOI":"10.1016\/j.specom.2008.12.004","volume":"51","author":"Z Handley","year":"2009","unstructured":"Handley, Z. (2009). Is text-to-speech synthesis ready for use in computer-assisted language learning? Speech Communication, 51(10), 906\u2013919.","journal-title":"Speech Communication"},{"key":"9463_CR10","doi-asserted-by":"crossref","unstructured":"Hunt, A. J., & Black, A. W. (1996). Unit selection in a concatenative speech synthesis system using a large speech database. In IEEE International conference on acoustics, speech, and signal processing (pp.\u00a0373\u2013376).","DOI":"10.1109\/ICASSP.1996.541110"},{"issue":"1","key":"9463_CR11","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1016\/S0167-6393(02)00081-X","volume":"40","author":"A Iida","year":"2003","unstructured":"Iida, A., Campbell, N., Higuchi, F., & Yasumura, M. (2003). A corpus-based speech synthesis system with emotion. Speech Communication, 40(1), 161\u2013187.","journal-title":"Speech Communication"},{"key":"9463_CR12","doi-asserted-by":"crossref","unstructured":"Kishore, S. P., & Black, A. W. (2003). Unit size in unit selection speech synthesis. In INTERSPEECH.","DOI":"10.21437\/Eurospeech.2003-133"},{"key":"9463_CR13","unstructured":"Kishore, S. P., Black, A. W., Kumar, R., & Sangal, R. (2003). Experiments with unit selection speech databases for Indian languages. In National seminar on language technology tools, Hyderabad, India."},{"key":"9463_CR14","unstructured":"Kishore, S. P., Kumar, R., & Sangal, R. (2002). A data driven synthesis approach for Indian languages using syllable as basic unit. In Proceedings of international conference on NLP (ICON) (pp.\u00a0311\u2013316)."},{"key":"9463_CR15","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1016\/j.specom.2015.05.003","volume":"72","author":"Y Li","year":"2015","unstructured":"Li, Y., Tao, J., Hirose, K., Xu, X., & Lai, W. (2015). Hierarchical stress modeling and generation in mandarin for expressive text-to-speech. Speech Communication, 72, 59\u201373.","journal-title":"Speech Communication"},{"issue":"2","key":"9463_CR16","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1016\/j.csl.2010.05.008","volume":"25","author":"H Morton","year":"2011","unstructured":"Morton, H., Gunson, N., Marshall, D., McInnes, F., Ayres, A., & Jack, M. (2011). Usability assessment of text-to-speech synthesis for additional detail in an automated telephone banking system. Computer Speech & Language, 25(2), 341\u2013362.","journal-title":"Computer Speech & Language"},{"key":"9463_CR17","unstructured":"Murthy, H. A., Bellur, A., Viswanath, V., Narayanan, B., Susan, A., Kasthuri, G., \u2026, Prahallad, K. (2010). Building unit selection speech synthesis in Indian languages: An initiative by an Indian consortium. In Proceedings of COCOSDA, Kathmandu, Nepal."},{"key":"9463_CR18","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1007\/s10772-011-9094-4","volume":"14","author":"NP Narendra","year":"2011","unstructured":"Narendra, N. P., Rao, K. S., Ghosh, K., Vempada, R. R., & Maity, S. (2011). Development of syllable-based text to speech synthesis system in Bengali. International Journal of Speech Technology, 14, 167\u2013181.","journal-title":"International Journal of Speech Technology"},{"key":"9463_CR19","doi-asserted-by":"crossref","unstructured":"Panda, S. P., & Nayak, A. K. (2014). Integration of fuzzy if-then rule with waveform concatenation technique for text-to-speech synthesis in Odia. In IEEE International conference on information technology (ICIT) (pp.\u00a088\u201393).","DOI":"10.1109\/ICIT.2014.37"},{"key":"9463_CR20","unstructured":"Panda, S. P., & Nayak, A. K. (2014). A rule-based concatenative approach to speech synthesis in Indian language text-to-speech systems. In Intelligent computing, communication and devices (pp.\u00a0523\u2013531). New Delhi: Springer."},{"issue":"3","key":"9463_CR21","doi-asserted-by":"crossref","first-page":"305","DOI":"10.1007\/s10772-015-9271-y","volume":"18","author":"SP Panda","year":"2015","unstructured":"Panda, S. P., & Nayak, A. K. (2015). An efficient model for text-to-speech synthesis in Indian languages. International Journal of Speech Technology, 18(3), 305\u2013315.","journal-title":"International Journal of Speech Technology"},{"issue":"2","key":"9463_CR22","doi-asserted-by":"crossref","first-page":"557","DOI":"10.1166\/asl.2016.6862","volume":"22","author":"SP Panda","year":"2016","unstructured":"Panda, S. P., & Nayak, A. K. (2016). Modified Rule-based concatenative technique for intelligible speech synthesis in Indian languages. Advanced Science Letters, 22(2), 557\u2013563.","journal-title":"Advanced Science Letters"},{"issue":"1","key":"9463_CR23","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1007\/s10772-015-9320-6","volume":"19","author":"SP Panda","year":"2016","unstructured":"Panda, S. P., & Nayak, A. K. (2016). Automatic speech segmentation in syllable centric speech recognition system. International Journal of Speech Technology, 19(1), 9\u201318.","journal-title":"International Journal of Speech Technology"},{"issue":"3\u20134","key":"9463_CR24","doi-asserted-by":"crossref","first-page":"170","DOI":"10.1504\/IJGUC.2015.070676","volume":"6","author":"SP Panda","year":"2015","unstructured":"Panda, S. P., Nayak, A. K., & Patnaik, S. (2015). Text-to-speech synthesis with an Indian language perspective. International Journal of Grid and Utility Computing, 6(3\u20134), 170\u2013178.","journal-title":"International Journal of Grid and Utility Computing"},{"key":"9463_CR25","doi-asserted-by":"crossref","unstructured":"Patil, H., Patel, T. B., Shah, N. J., Sailor, H. B., Krishnan, R., Kasthuri, G. R., \u2026 Murthy, H. (2013). A syllable-based framework for unit selection synthesis in 13 Indian languages. In IEEE International conference on Asian spoken language research and evaluation (O-COCOSDA\/CASLRE) (pp.\u00a01\u20138).","DOI":"10.1109\/ICSDA.2013.6709851"},{"key":"9463_CR26","doi-asserted-by":"crossref","unstructured":"Prahallad, K., Vadapalli, A., Elluru, N., Mantena, G., Pulugundla, B., Bhaskararao, P., \u2026 Black, A. W. (2013). The blizzard challenge 2013\u2013Indian language task. In Blizzard challenge workshop.","DOI":"10.21437\/Blizzard.2013-2"},{"issue":"4","key":"9463_CR27","doi-asserted-by":"crossref","first-page":"556","DOI":"10.1109\/TASL.2008.2010884","volume":"17","author":"SM Prasanna","year":"2009","unstructured":"Prasanna, S. M., Reddy, B. S., & Krishnamoorthy, P. (2009). Vowel onset point detection using source, spectral peaks, and modulation spectrum energies. IEEE Transactions on Audio, Speech, and Language Processing, 17(4), 556\u2013565.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9463_CR28","doi-asserted-by":"crossref","unstructured":"Raghavendra, E. V., Desai, S., Yegnanarayana, B., Black, A. W., & Prahallad, K. (2008). Global syllable set for building speech synthesis in Indian languages. In IEEE Spoken language technology workshop, 2008 (SLT 2008) (pp.\u00a049\u201352).","DOI":"10.1109\/SLT.2008.4777837"},{"key":"9463_CR29","unstructured":"Rama, J., Ramakrishnan, A. G., Muralishankar, R., & Prathibha, R. (2002). A complete text-to-speech synthesis system in Tamil. In WSS\u2019 proceedings (pp.\u00a0191\u2013194)."},{"issue":"5","key":"9463_CR30","doi-asserted-by":"crossref","first-page":"1105","DOI":"10.1016\/j.csl.2013.02.003","volume":"27","author":"VR Reddy","year":"2013","unstructured":"Reddy, V. R., & Rao, K. S. (2013). Two-stage intonation modeling using feed forward neural networks for syllable based text-to-speech synthesis. Computer Speech & Language, 27(5), 1105\u20131126.","journal-title":"Computer Speech & Language"},{"key":"9463_CR42","unstructured":"Retrieved July 12, 2017, from http:\/\/tdil.mit.gov.in\/ ."},{"key":"9463_CR43","unstructured":"Retrieved July 12, 2017, from http:\/\/dhvani.sourceforge.net ."},{"key":"9463_CR44","unstructured":"Retrieved July 12, 2017, from http:\/\/www.unicode.org\/ ."},{"issue":"3","key":"9463_CR31","doi-asserted-by":"crossref","first-page":"230","DOI":"10.1016\/j.specom.2007.01.007","volume":"49","author":"M Rojc","year":"2007","unstructured":"Rojc, M., & Ka\u010di\u010d, Z. (2007). Time and space-efficient architecture for a corpus-based text-to-speech synthesis system. Speech Communication, 49(3), 230\u2013249.","journal-title":"Speech Communication"},{"issue":"9","key":"9463_CR32","doi-asserted-by":"crossref","first-page":"697","DOI":"10.1016\/j.specom.2007.04.006","volume":"49","author":"H Romsdorfer","year":"2007","unstructured":"Romsdorfer, H., & Pfister, B. (2007). Text analysis and language identification for polyglot text-to-speech synthesis. Speech communication, 49(9), 697\u2013724.","journal-title":"Speech communication"},{"key":"9463_CR33","doi-asserted-by":"crossref","unstructured":"Talesara, S., Patil, H. A., Patel, T., Sailor, H., & Shah, N. A. (2013). Novel Gaussian filter-based automatic labeling of speech data for TTS system in Gujarati language. In ICALP proceedings (pp.\u00a0139\u2013142).","DOI":"10.1109\/IALP.2013.46"},{"key":"9463_CR34","unstructured":"Thomas, S., Rao, M. N., Murthy, H., & Ramalingam, C. S. (2006). Natural sounding TTS based on syllable-like units. In IEEE 14th European signal processing conference (pp.\u00a01\u20135)."},{"key":"9463_CR35","doi-asserted-by":"crossref","first-page":"1278","DOI":"10.1109\/TASL.2010.2089679","volume":"19","author":"S Tiomkin","year":"2011","unstructured":"Tiomkin, S., Malah, D., Shechtman, S., & Kons, Z. (2011). A Hybrid Text-to-speech system that combines concatenative and statistical synthesis units. IEEE Transactions on Audio, Speech and Language Processing, 19, 1278\u20131288.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"key":"9463_CR36","doi-asserted-by":"crossref","first-page":"176","DOI":"10.1016\/j.specom.2015.06.005","volume":"72","author":"M Toman","year":"2015","unstructured":"Toman, M., Pucher, M., Moosm\u00fcller, S., & Schabus, D. (2015). Unsupervised and phonologically controlled interpolation of Austrian German language varieties for speech synthesis. Speech Communication, 72, 176\u2013193.","journal-title":"Speech Communication"},{"issue":"2","key":"9463_CR37","doi-asserted-by":"crossref","first-page":"196","DOI":"10.1016\/j.csl.2007.07.002","volume":"22","author":"HM Torres","year":"2008","unstructured":"Torres, H. M., & Gurlekian, J. A. (2008). Acoustic speech unit segmentation for concatenative synthesis. Computer Speech & Language, 22(2), 196\u2013206.","journal-title":"Computer Speech & Language"},{"key":"9463_CR38","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1016\/j.csl.2003.12.001","volume":"19","author":"M Viswanathan","year":"2005","unstructured":"Viswanathan, M. (2005). Measuring speech quality for text-to-speech systems: Development and assessment of a modified mean opinion score (MOS) scale. Computer Speech and Language, 19, 55\u201383.","journal-title":"Computer Speech and Language"},{"key":"9463_CR39","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1016\/j.specom.2014.04.002","volume":"63","author":"XJ Xia","year":"2014","unstructured":"Xia, X. J., Ling, Z. H., Jiang, Y., & Dai, L. R. (2014). HMM-based unit selection speech synthesis using log likelihood ratios derived from perceptual data. Speech Communication, 63, 27\u201337.","journal-title":"Speech Communication"},{"issue":"2","key":"9463_CR40","doi-asserted-by":"crossref","first-page":"266","DOI":"10.1016\/j.specom.2012.08.009","volume":"55","author":"CY Yeh","year":"2013","unstructured":"Yeh, C. Y., Chang, S. C., & Hwang, S. H. (2013). A consistency analysis on an acoustic module for Mandarin text-to-speech. Speech Communication, 55(2), 266\u2013277.","journal-title":"Speech Communication"},{"issue":"5","key":"9463_CR41","doi-asserted-by":"crossref","first-page":"771","DOI":"10.1016\/j.ijhcs.2003.07.004","volume":"60","author":"J York","year":"2004","unstructured":"York, J., & Pendharkar, P. C. (2004). Human\u2013computer interaction issues for mobile computing in a variable work context. International Journal of Human-Computer Studies, 60(5), 771\u2013797.","journal-title":"International Journal of Human-Computer Studies"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-017-9463-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9463-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9463-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,27]],"date-time":"2024-06-27T15:38:58Z","timestamp":1719502738000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-017-9463-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,10,7]]},"references-count":44,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2017,12]]}},"alternative-id":["9463"],"URL":"https:\/\/doi.org\/10.1007\/s10772-017-9463-8","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,10,7]]}}}