{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,10,16]],"date-time":"2023-10-16T21:32:19Z","timestamp":1697491939153},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2007,3,1]],"date-time":"2007-03-01T00:00:00Z","timestamp":1172707200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2007,3]]},"DOI":"10.1007\/s10772-008-9015-3","type":"journal-article","created":{"date-parts":[[2009,1,5]],"date-time":"2009-01-05T23:32:50Z","timestamp":1231198370000},"page":"31-44","source":"Crossref","is-referenced-by-count":2,"title":["Integrating coding techniques into LP-based Mandarin text-to-speech synthesis"],"prefix":"10.1007","volume":"10","author":[{"given":"Hwai-Tsu","family":"Hu","sequence":"first","affiliation":[]},{"given":"Hsin-Min","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2009,1,6]]},"reference":[{"key":"9015_CR1","volume-title":"Talking machines: theories, models and designs","year":"1992","unstructured":"Bailly, G., Benoit, C., & Sawallis, T. (Eds.) (1992). Talking machines: theories, models and designs. Amsterdam: North Holland, Elsevier."},{"issue":"9","key":"9015_CR2","doi-asserted-by":"crossref","first-page":"1317","DOI":"10.1109\/26.61370","volume":"38","author":"S. H. Chen","year":"1990","unstructured":"Chen, S. H., & Wang, Y. R. (1990). Vector quantization of pitch information in Mandarin speech. IEEE Transactions on Communications, 38(9), 1317\u20131320.","journal-title":"IEEE Transactions on Communications"},{"issue":"3","key":"9015_CR3","doi-asserted-by":"crossref","first-page":"226","DOI":"10.1109\/89.668817","volume":"6","author":"S. H. Chen","year":"1998","unstructured":"Chen, S. H., Hwang, S. H., & Wang, Y. R. (1998). An RNN-based prosodic information synthesizer for Mandarin text-to-speech. IEEE Transactions on Speech and Audio Processing, 6(3), 226\u2013239.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9015_CR4","doi-asserted-by":"crossref","unstructured":"Chiang, C. Y., Chen, S. H., & Wang, Y. R. (2005). On the inter-syllable coarticulation effect of pitch modeling for Mandarin speech. In Proceeding of interspeech (pp. 3269\u20133272).","DOI":"10.21437\/Interspeech.2005-569"},{"issue":"4","key":"9015_CR5","doi-asserted-by":"crossref","first-page":"2026","DOI":"10.1121\/1.411319","volume":"96","author":"D. G. Childers","year":"1994","unstructured":"Childers, D. G., & Hu, H. T. (1994). Speech synthesis by glottal excited linear prediction. Journal of the Acoustical Society of America, 96(4), 2026\u20132036.","journal-title":"Journal of the Acoustical Society of America"},{"key":"9015_CR6","unstructured":"Choi, J., Hon, H. W., Lebrun, J. L., Lee, S. P., Loudon, G., Phan, V. H., & Yogananthan, S. (1994). Yanhui, a\u00a0software based high performance Mandarin text-to-speech system. In Proc. ROCLING XII (pp. 35\u201350)."},{"issue":"7","key":"9015_CR7","doi-asserted-by":"crossref","first-page":"481","DOI":"10.1109\/TSA.2002.803437","volume":"10","author":"F. C. Chou","year":"2002","unstructured":"Chou, F. C., Tseng, C. Y., & Lee, L. S. (2002). A set of corpus-based text-to-speech synthesis technologies for Mandarin Chinese. IEEE Transactions on Speech and Audio Processing, 10(7), 481\u2013494.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"issue":"2","key":"9015_CR8","first-page":"143","volume":"17","author":"M. Chu","year":"1998","unstructured":"Chu, M., Tang, D., Si, H., Tian, X., & Lu, S. (1998). Research on perception of juncture between syllables in Chinese. Chinese Journal of Acoustics, 17(2), 143\u2013152.","journal-title":"Chinese Journal of Acoustics"},{"key":"9015_CR9","doi-asserted-by":"crossref","unstructured":"Cohen, G., & Malah, D. (1995). Speech analysis and synthesis using a glottal excited AR model with DTW-based glottal determination. In 18th Convention of electrical and Electronics Engineers, 3.2.3 (pp. 1\u20135).","DOI":"10.1109\/EEIS.1995.513822"},{"issue":"4","key":"9015_CR10","doi-asserted-by":"crossref","first-page":"233","DOI":"10.1250\/ast.5.233","volume":"5","author":"H. Fujisaki","year":"1984","unstructured":"Fujisaki, H., & Hirose, K. (1984). Analysis of voice fundamental frequency contours for declarative sentences of Japanese. Journal of the Acoustical Society of Japan (E), 5(4), 233\u2013241.","journal-title":"Journal of the Acoustical Society of Japan (E)"},{"issue":"8","key":"9015_CR11","first-page":"1654","volume":"E83-D","author":"H. T. Hu","year":"2000","unstructured":"Hu, H. T., Kuo, F. J., & Wang, H. J. (2000). A pseudo glottal excitation model for the linear prediction vocoder with speech signals coded at 1.6 kbps. IEICE Transactions on Information and Systems, E83-D(8), 1654\u20131661.","journal-title":"IEICE Transactions on Information and Systems"},{"key":"9015_CR12","unstructured":"Hund, A. (1993). Software dreams and talking machines. Available at http:\/\/us.geocities.com\/tim_hobbs.geo\/sw2.htm ."},{"issue":"8","key":"9015_CR13","doi-asserted-by":"crossref","first-page":"720","DOI":"10.1049\/el:19920456","volume":"28","author":"S. H. Hwang","year":"1992","unstructured":"Hwang, S. H., & Chen, S. H. (1992). Neural network synthesizer of pause duration for Mandarin text-to-speech. Electronics Letters, 28(8), 720\u2013721.","journal-title":"Electronics Letters"},{"key":"9015_CR14","doi-asserted-by":"crossref","unstructured":"Hwang, S. H., Chen, S. H., & Wang, Y. R. (1996). A Mandarin text-to-speech system. In Proc. 4th int. conf. spoken language (Vol.\u00a03, pp. 1421\u20131424).","DOI":"10.21437\/ICSLP.1996-363"},{"key":"9015_CR15","unstructured":"Klatt, D. H. (1982). The Klattalk text-to-speech system. In Proc. IEEE int. conf. acoust. speech signal process (Vol.\u00a07, pp. 1589\u20131592)."},{"key":"9015_CR16","doi-asserted-by":"crossref","unstructured":"Laroche, J., Stylianou, Y., & Moulines, E. (1993). HNS: Speech modification based on a harmonic\u2009+\u2009noise model. In Proc. IEEE int. conf. acoust. speech signal process (Vol.\u00a02, pp. 550\u2013553).","DOI":"10.1109\/ICASSP.1993.319365"},{"issue":"9","key":"9015_CR17","doi-asserted-by":"crossref","first-page":"1309","DOI":"10.1109\/29.31286","volume":"37","author":"L. S. Lee","year":"1989","unstructured":"Lee, L. S., Tseng, C. Y., & Ouh-Young, M. (1989). The synthesis rules in a Chinese text-to-speech system. IEEE Transactions on Acoustics, Speech, and Signal Processing, 37(9), 1309\u20131320.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"issue":"3","key":"9015_CR18","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1109\/89.232612","volume":"1","author":"L. S. Lee","year":"1993","unstructured":"Lee, L. S., Tseng, C. Y., & Hsieh, C. J. (1993). Improved tone concatenation rules in a formant-based Chinese text-to-speech system. IEEE Transactions on Speech and Audio Processing, 1(3), 287\u2013294.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"issue":"6","key":"9015_CR19","first-page":"545","volume":"E81-D","author":"Y. J. Lin","year":"1998","unstructured":"Lin, Y. J., & Yu, M. S. (1998). An efficient Mandarin text-to-speech system on time domain. IEICE Transactions on Information and Systems, E81-D(6), 545\u2013555.","journal-title":"IEICE Transactions on Information and Systems"},{"key":"9015_CR20","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1109\/TCOM.1980.1094577","volume":"COM-208","author":"Y. Linde","year":"1980","unstructured":"Linde, Y., Buzo, A., & Gray, R. M. (1980). An algorithm for vector quantizer design. IEEE Transactions on Communications, COM-208, 84\u201395.","journal-title":"IEEE Transactions on Communications"},{"key":"9015_CR21","unstructured":"Liu, C. S., Ju, G. H., Wang, W. J., Wang, H. C., & Lai, W. H. (1991). A new speech synthesizer for text-to-speech system using multipulse excitation with pitch predictor. In Proc. IEEE int. conf. computer process. Chinese and oriental languages (pp. 205\u2013209)."},{"issue":"4","key":"9015_CR22","doi-asserted-by":"crossref","first-page":"242","DOI":"10.1109\/89.397089","volume":"3","author":"A. V. McCree","year":"1995","unstructured":"McCree, A. V., & Barnwell III, T. P. (1995). A mixed excitation LPC vocoder model for low bit rate speech coding. IEEE Transactions on Speech and Audio Processing, 3(4), 242\u2013250.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"issue":"5\/6","key":"9015_CR23","doi-asserted-by":"crossref","first-page":"453","DOI":"10.1016\/0167-6393(90)90021-Z","volume":"9","author":"E. Moulines","year":"1990","unstructured":"Moulines, E., & Charpentier, F. (1990). Pitch-synchronous waveform processing techniques for text-to-speech synthesis using diphones. Speech Communication, 9(5\/6), 453\u2013467.","journal-title":"Speech Communication"},{"key":"9015_CR24","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1016\/0167-6393(94)00054-E","volume":"16","author":"E. Moulines","year":"1995","unstructured":"Moulines, E., & Laroche, J. (1995). Non-parametric techniques for pitch-scale and time-scale modification of speech. Speech Communication, 16, 175\u2013205.","journal-title":"Speech Communication"},{"issue":"1","key":"9015_CR25","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1109\/89.221363","volume":"1","author":"K. K. Paliwal","year":"1993","unstructured":"Paliwal, K. K., & Atal, B. S. (1993). Efficient vector quantization of LPC parameters at 24 bits\/frame. IEEE Transactions on Speech and Audio Processing, 1(1), 3\u201314.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9015_CR26","doi-asserted-by":"crossref","unstructured":"Silva, S. S., & Netto, S. L. (2004). Closed-form estimation of the amplitude commands in the automatic extraction of the Fujisaki\u2019s model. In Proc. IEEE int. conf. acoust. speech signal process (Vol.\u00a01, pp. 621\u2013624).","DOI":"10.1109\/ICASSP.2004.1326062"},{"issue":"1","key":"9015_CR27","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1109\/89.221364","volume":"1","author":"F. K. Soong","year":"1993","unstructured":"Soong, F. K., & Juang, B. H. (1993). Optimal quantization of LSP parameters. IEEE Transactions on Speech and Audio Processing, 1(1), 15\u201324.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9015_CR28","doi-asserted-by":"crossref","unstructured":"Supplee, L. M., Cohn, R. P., & Collura, J. S. (1997). MELP: the new federal standard at 2400 bps. In Proc. IEEE int. conf. acoust. speech signal process (Vol.\u00a02, pp. 1591\u20131594).","DOI":"10.1109\/ICASSP.1997.596257"},{"key":"9015_CR29","unstructured":"Taylor, P., Black, A. W., & Caley, R. (1998). The architecture of the festival speech synthesis system. In Proceedings of the third ESCA workshop in speech synthesis (pp. 147\u2013151). Available at http:\/\/www.cstr.ed.ac.uk\/projects\/festival\/ ."},{"key":"9015_CR30","doi-asserted-by":"crossref","first-page":"284","DOI":"10.1016\/j.specom.2005.03.015","volume":"46","author":"C. Y. Tseng","year":"2005","unstructured":"Tseng, C. Y., Pin, S. H., Lee, Y., Wang, H. M., & Chen, Y. C. (2005). Fluent speech prosody: Framework and modeling. Speech Communications, 46, 284\u2013309.","journal-title":"Speech Communications"},{"issue":"4","key":"9015_CR31","doi-asserted-by":"crossref","first-page":"586","DOI":"10.1109\/TASSP.1987.1165151","volume":"35","author":"A. Varga","year":"1987","unstructured":"Varga, A., & Fallside, F. (1987). A technique for using multipulse linear predictive speech synthesis in text-to-speech type systems. IEEE Transactions on Acoustics, Speech, and Signal Processing, 35(4), 586\u2013587.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9015_CR32","unstructured":"Wu, C. H., Chen, C. H., & Juang, S. C. (1995). An CELP-based prosodic information modification and generation of Mandarin text-to-speech. In Proc. ROCLING XIII (pp. 233\u2013251)."},{"issue":"3","key":"9015_CR33","doi-asserted-by":"crossref","first-page":"731","DOI":"10.1109\/TCE.2003.1233811","volume":"49","author":"C. Yu","year":"2003","unstructured":"Yu, C., & Hu, H. T. (2003). Design and implementation of an ASIC architecture for 1.6 kbps speech synthesis. IEEE Transactions on Consumer Electronics, 49(3), 731\u2013736.","journal-title":"IEEE Transactions on Consumer Electronics"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-008-9015-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-008-9015-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-008-9015-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,23]],"date-time":"2023-05-23T11:03:20Z","timestamp":1684839800000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-008-9015-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007,3]]},"references-count":33,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2007,3]]}},"alternative-id":["9015"],"URL":"https:\/\/doi.org\/10.1007\/s10772-008-9015-3","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2007,3]]}}}