{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:51:56Z","timestamp":1740099116568,"version":"3.37.3"},"publisher-location":"Cham","reference-count":19,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319995786"},{"type":"electronic","value":"9783319995793"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-99579-3_39","type":"book-chapter","created":{"date-parts":[[2018,8,24]],"date-time":"2018-08-24T07:36:09Z","timestamp":1535096169000},"page":"367-376","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Improving Intelligibility of Black-Box Speech Synthesizers in Noise"],"prefix":"10.1007","author":[{"given":"Thomas","family":"Manzini","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alan","family":"Black","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,8,25]]},"reference":[{"key":"39_CR1","unstructured":"Black, A.W., Lenzo, K.A.: Flite: a small fast run-time synthesis engine. In: 4th ISCA Tutorial and Research Workshop (ITRW) on Speech Synthesis (2001)"},{"issue":"3","key":"39_CR2","doi-asserted-by":"publisher","first-page":"1562","DOI":"10.1121\/1.2166600","volume":"119","author":"M Cooke","year":"2006","unstructured":"Cooke, M.: A glimpsing model of speech perception in noise. J. Acoust. Soc. Am. 119(3), 1562\u20131573 (2006)","journal-title":"J. Acoust. Soc. Am."},{"issue":"6","key":"39_CR3","doi-asserted-by":"publisher","first-page":"3615","DOI":"10.1121\/1.414959","volume":"99","author":"T Dau","year":"1996","unstructured":"Dau, T., P\u00fcschel, D., Kohlrausch, A.: A quantitative model of the \u201ceffective\u201d signal processing in the auditory system. i. model structure. J. Acoust. Soc. Am. 99(6), 3615\u20133622 (1996)","journal-title":"J. Acoust. Soc. Am."},{"key":"39_CR4","unstructured":"Davies, M.: The corpus of contemporary American English (Coca): 450 million words, 1990\u20132012. Brigham Young University (2002)"},{"key":"39_CR5","unstructured":"Duddington, J.: eSpeak text to speech (2012)"},{"key":"39_CR6","unstructured":"Durette, P.N.: gTTS: a python interface for google\u2019s text to speech api (2017). https:\/\/github.com\/pndurette\/gTTS . Accessed 15 Apr 2018"},{"issue":"3","key":"39_CR7","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1145\/1226736.1226763","volume":"50","author":"F Fiedrich","year":"2007","unstructured":"Fiedrich, F., Burghardt, P.: Agent-based systems for disaster management. Commun. ACM 50(3), 41\u201342 (2007)","journal-title":"Commun. ACM"},{"key":"39_CR8","doi-asserted-by":"crossref","unstructured":"Imran, M., Castillo, C., Lucas, J., Meier, P., Vieweg, S.: AIDR: Artificial intelligence for disaster response. In: Proceedings of the 23rd International Conference on World Wide Web, pp. 159\u2013162. ACM (2014)","DOI":"10.1145\/2567948.2577034"},{"key":"39_CR9","doi-asserted-by":"crossref","unstructured":"Kamath, S., Loizou, P.: A multi-band spectral subtraction method for enhancing speech corrupted by colored noise. In: ICASSP, vol. 4, pp. 44164\u201344164. Citeseer (2002)","DOI":"10.1109\/ICASSP.2002.5745591"},{"issue":"4","key":"39_CR10","doi-asserted-by":"publisher","first-page":"2395","DOI":"10.1121\/1.1784440","volume":"116","author":"MC Killion","year":"2004","unstructured":"Killion, M.C., Niquette, P.A., Gudmundsen, G.I., Revit, L.J., Banerjee, S.: Development of a quick speech-in-noise test for measuring signal-to-noise ratio loss in normal-hearing and hearing-impaired listeners. J. Acoust. Soc. Am. 116(4), 2395\u20132405 (2004)","journal-title":"J. Acoust. Soc. Am."},{"issue":"2","key":"39_CR11","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1109\/TASSP.1980.1163394","volume":"28","author":"R McAulay","year":"1980","unstructured":"McAulay, R., Malpass, M.: Speech enhancement using a soft-decision noise suppression filter. IEEE Trans. Acoust. Speech Signal Process. 28(2), 137\u2013145 (1980)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"39_CR12","doi-asserted-by":"crossref","unstructured":"Park, Y., Patwardhan, S., Visweswariah, K., Gates, S.C.: An empirical analysis of word error rate and keyword error rate. In: Ninth Annual Conference of the International Speech Communication Association (2008)","DOI":"10.21437\/Interspeech.2008-537"},{"issue":"1","key":"39_CR13","doi-asserted-by":"publisher","first-page":"593","DOI":"10.1121\/1.412282","volume":"97","author":"MK Pichora-Fuller","year":"1995","unstructured":"Pichora-Fuller, M.K., Schneider, B.A., Daneman, M.: How young and old adults listen to and remember speech in noise. J. Acoust. Soc. Am. 97(1), 593\u2013608 (1995)","journal-title":"J. Acoust. Soc. Am."},{"key":"39_CR14","doi-asserted-by":"crossref","unstructured":"Ravichander, A., Manzini, T., Grabmair, M., Neubig, G., Francis, J., Nyberg, E.: How would you say it? eliciting lexically diverse dialogue for supervised semantic parsing. In: Proceedings of the 18th Annual SIGdial Meeting on Discourse and Dialogue, pp. 374\u2013383 (2017)","DOI":"10.18653\/v1\/W17-5545"},{"key":"39_CR15","doi-asserted-by":"crossref","unstructured":"Schmidt-Nielsen, A.: Intelligibility and acceptability testing for speech technology. Technical report, Naval Research Lab, Washington DC (1992)","DOI":"10.21236\/ADA252015"},{"key":"39_CR16","doi-asserted-by":"crossref","unstructured":"Valentini-Botinhao, C., Yamagishi, J., King, S.: Can objective measures predict the intelligibility of modified hmm-based synthetic speech in noise? In: Twelfth Annual Conference of the International Speech Communication Association (2011)","DOI":"10.1109\/ICASSP.2011.5947507"},{"key":"39_CR17","doi-asserted-by":"crossref","unstructured":"Valentini-Botinhao, C., Yamagishi, J., King, S.: Evaluation of objective measures for intelligibility prediction of hmm-based synthetic speech in noise. In: 2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5112\u20135115. IEEE (2011)","DOI":"10.1109\/ICASSP.2011.5947507"},{"issue":"3","key":"39_CR18","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1016\/0167-6393(93)90095-3","volume":"12","author":"A Varga","year":"1993","unstructured":"Varga, A., Steeneken, H.J.: Assessment for automatic speech recognition: Ii. noisex-92: a database and an experiment to study the effect of additive noise on speech recognition systems. Speech Commun. 12(3), 247\u2013251 (1993)","journal-title":"Speech Commun."},{"key":"39_CR19","unstructured":"Wang, Y.Y., Acero, A., Chelba, C.: Is word error rate a good indicator for spoken language understanding accuracy. In: 2003 IEEE Workshop on Automatic Speech Recognition and Understanding, ASRU 2003, pp. 577\u2013582. IEEE (2003)"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-99579-3_39","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,30]],"date-time":"2022-08-30T04:58:57Z","timestamp":1661835537000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-99579-3_39"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319995786","9783319995793"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-99579-3_39","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}