{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T21:44:15Z","timestamp":1771105455090,"version":"3.50.1"},"reference-count":76,"publisher":"Elsevier BV","issue":"1-3","license":[{"start":{"date-parts":[[1998,8,1]],"date-time":"1998-08-01T00:00:00Z","timestamp":901929600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Speech Communication"],"published-print":{"date-parts":[[1998,8]]},"DOI":"10.1016\/s0167-6393(98)00027-2","type":"journal-article","created":{"date-parts":[[2003,4,5]],"date-time":"2003-04-05T03:57:58Z","timestamp":1049515078000},"page":"3-27","source":"Crossref","is-referenced-by-count":100,"title":["Should recognizers have ears?"],"prefix":"10.1016","volume":"25","author":[{"given":"Hynek","family":"Hermansky","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/S0167-6393(98)00027-2_BIB1","doi-asserted-by":"crossref","unstructured":"Aikawa, K., Singer, H., Kawahara, H., Tohkura, Y., 1993. A dynamic cepstrum incorporating time-frequency masking and its application to continuous speech recognition. In: Proceedings of the International Conference on Acoust. Speech and Signal Processing, Minneapolis, MN, pp. II-668\u2013671","DOI":"10.1109\/ICASSP.1993.319399"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB2","doi-asserted-by":"crossref","unstructured":"Allen, J.B., 1994. How do humans process and recognize speech?. IEEE Trans. Speech Audio Process. 2 (4), 567\u2013577","DOI":"10.1109\/89.326615"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB3","doi-asserted-by":"crossref","unstructured":"Arai, T.M., Pavel, H.H., Avendano, C., 1996. Intelligibility of speech with filtered time trajectories of spectral envelopes. In: Proceedings of the International Conference on Spoken Language Processing, Philadelphia, pp. 2490\u20132493","DOI":"10.1109\/ICSLP.1996.607318"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB4","unstructured":"Attias, H., Schreiner, C.E., 1998. Coding of naturalistic stimuli by auditory midbrain neurons. In: Advances in Neural Information Processing Systems, Vol. 10. Morgan Kaufmann, Los Altos, CA"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB5","doi-asserted-by":"crossref","unstructured":"Avendano, C. van Vuuren, S., Hermansky, H., 1996. Data based filter design for RASTA-like channel normalization in ASR. In: Proceedings of the International Conference on Spoken Language Processing, Philadelphia","DOI":"10.1109\/ICSLP.1996.607213"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB6","doi-asserted-by":"crossref","unstructured":"Avendano, C., Hermansky, H., 1997. On the properties of temporal processing for speech in adverse environments. In: Proceedings of 1997 Workshop on Applications of Signal Processing to Audio and Acoustics, Mohonk Mountain House, New Paltz, New York","DOI":"10.1109\/ASPAA.1997.625589"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB7","doi-asserted-by":"crossref","unstructured":"Bladon, A., 1983. Two-formant models of vowel perception: Shortcomings and enhancements. Speech Communication 2, 305\u2013313","DOI":"10.1016\/0167-6393(83)90047-X"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB8","doi-asserted-by":"crossref","unstructured":"Bourlard, H., Morgan, N., 1994. Connectionist Speech Recognition \u2013 A Hybrid Approach. Kluwer Academic Publishers, Dordrecht","DOI":"10.1007\/978-1-4615-3210-1"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB9","unstructured":"Bourlard, H., Hermansky, H., Morgan, N., 1996. Copernicus and ASR challenge: Waiting for Kepler. In: Proceedings of the ARPA ASR Workshop Spring 1996, Arden House, NY, pp. 157\u2013162"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB10","doi-asserted-by":"crossref","unstructured":"Bourlard, H., Dupont, S., 1996. A new ASR approach based on independent processing and re-combination of partial frequency bands. In: Proceedings of the International Conference on Spoken Language Processing, Philadelphia, pp. 426\u2013429","DOI":"10.21437\/ICSLP.1996-90"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB11","unstructured":"Bridle, J.S., Brown, M.D., 1974. An experimental automatic word recognition system. JSRU Report No. 1003, Joint Speech Research Unit, Ruislip, England"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB12","doi-asserted-by":"crossref","unstructured":"Broad, D., Hermansky, H., 1989. The front cavity\/F2' hypothesis tested by data on tongue movements. J. Acoust. Soc. Amer. 86 (Suppl. 1), S13\u2013S14","DOI":"10.1121\/1.2027307"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB13","doi-asserted-by":"crossref","unstructured":"Brown, P., 1987. The acoustic-modeling problem in automatic speech recognition. Ph.D. Thesis, Computer Science Department, Carnegie Mellon University","DOI":"10.21236\/ADA188529"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB14","doi-asserted-by":"crossref","unstructured":"Cohen, J.R., 1989. Application of an auditory model to speech recognition. J. Acoust. Soc. Amer. 85 (6), 2623\u20132629","DOI":"10.1121\/1.397756"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB15","doi-asserted-by":"crossref","unstructured":"Cooper, F.S., Delattre, P.C., Liberman, A.M., Borst, J.M., Gerstman, L.J., 1952. Some experiments on the perception of synthetic speech sounds. J. Acoust. Soc. Amer. 24, 579\u2013606","DOI":"10.1121\/1.1906940"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB16","doi-asserted-by":"crossref","unstructured":"Cook, G.D., Christie, J.D., Clarkson, P.R., Hochberg, M.M., Logan, B.T., Robinson, A.J., 1996. Real-time recognition of broadcast radio speech. In: Proceedings of the International Conference on Acoust. Speech and Signal Processing, pp. 141\u2013144","DOI":"10.1109\/ICASSP.1996.540310"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB17","doi-asserted-by":"crossref","unstructured":"Chistovich, L.A., 1985. Central auditory processing of peripheral vowel spectra. J. Acoust. Soc. Amer. 77, 789\u2013805","DOI":"10.1121\/1.392049"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB18","doi-asserted-by":"crossref","unstructured":"Davis, S.B., Mermelstein, P., 1980. Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans. Acoust. Speech Signal Process. 28 (4), 357\u2013366","DOI":"10.1109\/TASSP.1980.1163420"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB19","unstructured":"deCharms, C.R., Blake, D., Merzenich, M.M., 1997. Sound feature decomposition by the primary auditory cortex. In: 1997 Workshop on Advances in Neural Information Processing, Breckenridge, Colorado (submitted to Science, also unpublished technical memo)"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB20","doi-asserted-by":"crossref","unstructured":"Drullman, R., Festen, J.M., Plomp, R.,1994. Effect of temporal envelope smearing on speech reception. J. Acoust. Soc. Amer. 95, 1053\u20131064","DOI":"10.1121\/1.408467"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB21","doi-asserted-by":"crossref","unstructured":"Drullman, R., Festen, J.M., Plomp, R., 1994. Effect of reducing slow temporal modulations on speech reception. J. Acoust. Soc. Amer. 95, 2670\u20132680","DOI":"10.1121\/1.409836"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB22","unstructured":"Fant, G., Risberg, A., 1962. Auditory matching of vowels with two formant synthetic sounds. Quarterly Progress and Status Report 4, Speech Transmission Laboratory, Royal Institute of Technology, Stockholm"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB23","unstructured":"Fant, G., 1965. Acoustic description and classification of phonetic units. Ericsson Technics, No. 1, reprinted in: Fant, G., 1973. Speech Sounds and Features. MIT Press, Cambridge, MA"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB24","doi-asserted-by":"crossref","unstructured":"Flanagan, J.L., 1972. Speech Analysis Synthesis and Perception, second edition. Springer, Berlin","DOI":"10.1007\/978-3-662-01562-9"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB25","unstructured":"Fletcher, H., 1953. Speech and Hearing in Communication. Krieger, New York"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB26","doi-asserted-by":"crossref","unstructured":"Fujimura, O., 1964. On the second spectral peak of front vowels: A perceptual study of the role of the second and third formants. Language and Speech 10, 181\u2013193","DOI":"10.1177\/002383096701000304"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB27","doi-asserted-by":"crossref","unstructured":"Furui, S., 1981. Cepstral analysis technique for automatic speaker verification. IEEE Trans. Acoust. Speech Signal Process. 29, 254\u2013272","DOI":"10.1109\/TASSP.1981.1163530"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB28","doi-asserted-by":"crossref","unstructured":"Geman, S., Bienenstock, E., Doursat, R., 1992. Neural networks and the bias\/variance dilemma. Neural Computation 4 (1), 1\u201358","DOI":"10.1162\/neco.1992.4.1.1"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB29","doi-asserted-by":"crossref","unstructured":"Green, P.D., Cooke, M.P., Crawford, M.D., 1995. Auditory scene analysis and hidden Markov model recognition of speech in noise. In: Proceedings of International Conference on Acoust. Speech and Signal Processing, Detroit, MI, pp. 401\u2013404","DOI":"10.1109\/ICASSP.1995.479606"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB30","unstructured":"Greenberg, S., 1997. On the origins of speech intelligibility in the real world. In: Proceedings of ESCA-NATO Tutorial and Research Workshop on Robust speech recognition for unknown communication channels, Pont-a-Mousson, France, pp. 23\u201332"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB31","doi-asserted-by":"crossref","unstructured":"Hanson, B.A., Applebaum, T.H., Junqua, J.C., 1996. Spectral dynamics for speech recognition under adverse conditions. In: Lee, C.H., Soong, F.K., Paliwal, K.K. (Eds.), Automatic Speech and Speaker Recognition. Kluwer Academic Publishers, Dordrecht","DOI":"10.1007\/978-1-4613-1367-0_14"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB32","doi-asserted-by":"crossref","unstructured":"Hanson, B., Wong, D., 1984. The harmonic magnitude suppression (HMS) technique for intelligibility enhancement in the presence in interfering speech. In: Proceedings of International Conference on Acoust. Speech and Signal Processing, pp. 18.A.5.1\u201318.A.5.4","DOI":"10.1109\/ICASSP.1984.1172548"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB33","unstructured":"Helmholtz, H., 1954. On the Sensation of Tone. Dover, New York"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB34","doi-asserted-by":"crossref","unstructured":"Hermansky, H., 1990. Perceptual linear predictive (PLP) analysis of speech. J. Acoust. Soc. Amer. 87 (4), 1738\u20131752","DOI":"10.1121\/1.399423"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB35","unstructured":"Hermansky, H., 1995. Exploring temporal domain for robustness in speech recognition. In: Proceedings of the 15th International Congress on Acoustics, Vol. II, Trondheim, Norway, pp. 61\u201364"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB36","doi-asserted-by":"crossref","unstructured":"Hermansky, H., Tibrewala, S., Pavel, M., 1996. Towards ASR on partially corrupted speech. In: Proceedings of International Conference on Spoken Language Processing, Philadelphia, PA, pp. 462\u2013465","DOI":"10.21437\/ICSLP.1996-123"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB37","doi-asserted-by":"crossref","unstructured":"Hermansky, H., Fujisaki, H., Sato, Y., 1983. Analysis and synthesis of speech based on spectral transform linear predictive method. In: Proceedings of International Conference on Acoust. Speech and Signal Processing, Boston, MA, pp. 777\u2013780","DOI":"10.1109\/ICASSP.1983.1172025"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB38","doi-asserted-by":"crossref","unstructured":"Hermansky, H., Morgan, N., 1994. RASTA processing of speech. IEEE Trans. Speech Audio Process. 2 (4), 578\u2013589","DOI":"10.1109\/89.326616"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB39","doi-asserted-by":"crossref","unstructured":"Hermansky, H., Wan, E., Avendano, C., 1995. Speech enhancement based on temporal processing. In: Proceedings of International Conference on Acoust. Speech and Signal Processing, Detroit, MI, pp. 405\u2013408","DOI":"10.1109\/ICASSP.1995.479607"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB40","doi-asserted-by":"crossref","unstructured":"Hermansky, H., Broad, D., 1989. The effective second formant F2' and the vocal tract front cavity. In: Proceedings of International Conference on Acoust. Speech and Signal Processing, Glasgow, Scotland, pp. 480\u2013483","DOI":"10.1109\/ICASSP.1989.266468"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB41","doi-asserted-by":"crossref","unstructured":"Hermansky, H., Morgan, N., Bayya, A., Kohn, P., 1991. Compensation for the effect of the communication channel in auditory-like analysis of speech (RASTA-PLP). In: Proceedings of Eurospeech'91, Genova, Italy, pp. 1367\u20131371","DOI":"10.1109\/ACSSC.1991.186557"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB42","unstructured":"Hermansky, H., Pavel, M., 1995. Psychophysics of speech engineering systems. Invited paper, 13th International Congress on Phonetic Sciences, Stockholm, Sweden, pp. 42\u201349"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB43","unstructured":"Hermansky, H., 1988. Modulation spectrum in speech processing. In: Prochazka, A., Uhlir, J., Rayner, P.J.W., Kingsbury, N.G. (Eds.), Signal Analysis and Prediction. Birkhauser, Boston"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB44","doi-asserted-by":"crossref","unstructured":"Hirsch, H.G., Meyer, P., Ruehl, H., 1991. Improved speech recognition using high-pass filtering of subband envelopes. In: Proceedings of Eurospeech'91, Genova, Italy, pp. 413\u2013416","DOI":"10.21437\/Eurospeech.1991-105"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB45","doi-asserted-by":"crossref","unstructured":"Houtgast, T., Steeneken, H.J.M., 1985. A review of the MTF concept in room acoustics and its use for estimating speech intelligibility in auditoria. J. Acoust. Soc. Amer. 77 (3), 1069\u20131077","DOI":"10.1121\/1.392224"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB46","doi-asserted-by":"crossref","unstructured":"Hunt, M.J., 1979. A statistical approach to metrics for word and syllable recognition. J. Acoust. Soc. Amer. 66 (S1), S35(A)","DOI":"10.1121\/1.2017735"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB47","doi-asserted-by":"crossref","unstructured":"Hunt, M., Lefebvre, C., 1989. A comparison of several acoustic representations for speech recognition with degraded and undegraded speech. In: Proceedings of International Conference on Acoust. Speech and Signal Processing, Glasgow, Scotland, pp. 262\u2013265","DOI":"10.1109\/ICASSP.1989.266415"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB48","doi-asserted-by":"crossref","unstructured":"Itahashi, S., Yokoyama, S., 1976. Automatic formant extraction utilizing mel scale and equal loudness contour. In: Proceedings of International Conference on Acoust. Speech and Signal Processing, Philadelphia, PA, pp. 310\u2013313","DOI":"10.1109\/ICASSP.1976.1170074"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB49","doi-asserted-by":"crossref","unstructured":"Janseen, R.D.T., Fanty, M., Cole, R.A., 1991. Speaker independent phonetic classification in continuous English letters. In: Proceedings of International Joint Conference on Neural Networks, Seattle, WA, pp. II-801\u2013808","DOI":"10.1109\/IJCNN.1991.155437"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB50","doi-asserted-by":"crossref","unstructured":"Jestead, W., Bacon, S.P., Lehman, J.R., 1982. Forward masking as a function of frequency, masker level, and signal delay. J. Acoust. Soc. Amer. 950\u2013962","DOI":"10.1121\/1.387576"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB51","doi-asserted-by":"crossref","unstructured":"Kanedera, N., Arai, T., Hermansky, H., Pavel, M., 1997. On the importance of various modulation frequencies for speech recognition. In: Proceedings of Eurospeech'97, Rhodos, Greece, pp. 1079\u20131082","DOI":"10.21437\/Eurospeech.1997-104"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB52","unstructured":"Kanedera, N., Arai, T., Hermansky, H., Pavel, M., 1997. On the relative importance of various components of the modulation spectrum for automatic speech recognition. Submitted to Speech Communication"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB53","unstructured":"Klatt, D.H., 1982. Speech processing strategies based on auditory models. In: Carlson, R., Granstrom, B. (Eds.), The Representation of Speech in The Peripheral Auditory System. Elsevier Biomedical Press, New York, pp. 181\u2013202"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB54","unstructured":"Kozhevnikov, V.A., Chistovich, L.A., 1967. Speech: Articulation and Perception. Translated from Russian by US Department of Commerce, p. 250, 251"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB55","unstructured":"Ladefoged, P., 1967. Three Areas of Experimental Phonetics. Oxford Univ. Press, Oxford, p. 65"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB56","doi-asserted-by":"crossref","unstructured":"Lim, J.S., 1979. Spectral root homomorphic deconvolution system. IEEE Trans. Acoust. Speech Signal Process. 27 (3), 223\u2013233","DOI":"10.1109\/TASSP.1979.1163234"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB57","doi-asserted-by":"crossref","unstructured":"Lippmann, R.P., 1995. Accurate consonant perception without mid-frequency speech energy. IEEE Trans. Speech and Audio 4 (1), 66\u201369","DOI":"10.1109\/TSA.1996.481454"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB58","doi-asserted-by":"crossref","unstructured":"Makino, S., Kawabata, T., Kido, K., 1983. Recognition of consonant based on the perceptron model. In: Proceedings of International Conference on Acoust. Speech and Signal Processing, Boston, MA, pp. 738\u2013741","DOI":"10.1109\/ICASSP.1983.1172080"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB59","doi-asserted-by":"crossref","unstructured":"Malayath, N., Hermansky, H., Kain, A., 1997. Towards decomposing the sources of variability in speech. In: Proceedings of Eurospeech'97, Rhodos, Greece","DOI":"10.21437\/Eurospeech.1997-167"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB60","unstructured":"Marr, D., 1982. Vision. Freeman, San Francisco, CA"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB61","unstructured":"Mermelstein, P., 1976. Distance measures for speech recognition, psychological and instrumental. In: Chen, R.C.H. (Ed.), Pattern Recognition and Artificial Intelligence. Academic Press, New York, pp. 374\u2013388"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB62","doi-asserted-by":"crossref","unstructured":"Neumayer, L., Weintraub, M., 1994. Probabilistic optimum filtering for robust speech recognition. In: Proceedings of International Conference on Acoust. Speech and Signal Processing, Adelaide, Australia, pp. I-417\u2013420","DOI":"10.1109\/ICASSP.1994.389267"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB63","unstructured":"Pavel, M., 1980. Homogeneity in complete and partial masking. Ph.D. Thesis, New York University"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB64","doi-asserted-by":"crossref","unstructured":"Pavel, M., Hermansky, H., 1994. Temporal masking in automatic speech recognition. J. Acoust. Soc. Amer. A 95, 2876","DOI":"10.1121\/1.409409"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB65","doi-asserted-by":"crossref","unstructured":"Pols, L.C.W., 1971. Real-time recognition of spoken words. IEEE Trans. Comput. 20 (C) 972\u2013978","DOI":"10.1109\/T-C.1971.223391"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB66","doi-asserted-by":"crossref","unstructured":"Rosenberg, A.E., Lee, C., Soong, F.K., 1994. Cepstral channel normalization techniques for HMM-based speaker verification. In: Proceedings of International Conference on Spoken Language Processing, Yokohama, Japan, pp. 1835\u20131838","DOI":"10.21437\/ICSLP.1994-461"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB67","doi-asserted-by":"crossref","unstructured":"Seneff, S., 1985. A joint synchrony\/mean-rate model of auditory speech processing. J. Phonetics 16 (1), 55\u201376","DOI":"10.1016\/S0095-4470(19)30466-8"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB68","doi-asserted-by":"crossref","unstructured":"Stevens, J.C., Hall, J.W., 1966. Brightness and loudness as functions of stimulus duration. Perception and Psychophysics 1, 319\u2013327","DOI":"10.3758\/BF03215796"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB69","doi-asserted-by":"crossref","unstructured":"Stevens, K.N., 1996. Applying phonetic knowledge to lexical access. In: Proceedings of Eurospeech'95, Madrid, Spain, p. 3","DOI":"10.1016\/B978-044481607-8\/50068-2"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB70","doi-asserted-by":"crossref","unstructured":"Tibrewala, S., Hermansky, H., 1997. Multi-band and adaptation approaches to robust speech recognition. In: Proceedings of Eurospeech'97, Rhodos, Greece, pp. 2619\u20132622","DOI":"10.21437\/Eurospeech.1997-411"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB71","doi-asserted-by":"crossref","unstructured":"van Vuuren, S., Hermansky, H., 1997. Data-driven design of RASTA-like filters. In: Proceedings of Eurospeech'97, Rhodos, Greece, pp. 409\u2013412","DOI":"10.21437\/Eurospeech.1997-151"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB72","unstructured":"Waibel, A., Hanazawa, T., Hinton, G., Shikano, K., Lang, K., 1988. Phoneme recognition using time-delay neural networks, Proceedings of International Conference on Acoust. Speech and Signal Processing, New York, pp. 107\u2013110"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB73","doi-asserted-by":"crossref","unstructured":"Wang, K., Shamma, S.S., 1995. Spectral shape analysis in the central auditory system. IEEE Trans. Speech Audio Process. 3 (5), 382\u2013394","DOI":"10.1109\/89.466657"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB74","doi-asserted-by":"crossref","unstructured":"Watkins, A.J., Makin, S.J., 1997. Some effects of filtered context on the perception of vowels and fricatives. J. Acoust. Soc. Amer. 99 (1), 588\u2013594","DOI":"10.1121\/1.414515"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB75","doi-asserted-by":"crossref","unstructured":"Woodland, P.C., Gales, M.J.F., Pye, D., 1996. Improving environmental robustness in large vocabulary speech recognition. In: Proceedings of International Conference on Acoust. Speech and Signal Processing, pp. 65\u201368","DOI":"10.1109\/ICASSP.1996.540291"},{"key":"10.1016\/S0167-6393(98)00027-2_BIB76","doi-asserted-by":"crossref","unstructured":"Zwicker, E., 1975. Scaling. In: Keidel O., Neff W. (Eds.), Handbook of Sensory Physiology, Vol. V.3. Springer, Berlin, pp. 401\u2013448","DOI":"10.1007\/978-3-642-65995-9_9"}],"container-title":["Speech Communication"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167639398000272?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167639398000272?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2023,4,15]],"date-time":"2023-04-15T22:01:49Z","timestamp":1681596109000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167639398000272"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1998,8]]},"references-count":76,"journal-issue":{"issue":"1-3","published-print":{"date-parts":[[1998,8]]}},"alternative-id":["S0167639398000272"],"URL":"https:\/\/doi.org\/10.1016\/s0167-6393(98)00027-2","relation":{},"ISSN":["0167-6393"],"issn-type":[{"value":"0167-6393","type":"print"}],"subject":[],"published":{"date-parts":[[1998,8]]}}}