{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T12:26:00Z","timestamp":1769689560181,"version":"3.49.0"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2012,11,1]],"date-time":"2012-11-01T00:00:00Z","timestamp":1351728000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Cogn Comput"],"published-print":{"date-parts":[[2013,12]]},"DOI":"10.1007\/s12559-012-9196-6","type":"journal-article","created":{"date-parts":[[2012,10,31]],"date-time":"2012-10-31T07:22:21Z","timestamp":1351668141000},"page":"426-441","source":"Crossref","is-referenced-by-count":10,"title":["Auditory-Inspired Morphological Processing of Speech Spectrograms: Applications in Automatic Speech Recognition and Speech Enhancement"],"prefix":"10.1007","volume":"5","author":[{"given":"Joyner","family":"Cadore","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Francisco J.","family":"Valverde-Albacete","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ascensi\u00f3n","family":"Gallardo-Antol\u00edn","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Carmen","family":"Pel\u00e1ez-Moreno","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2012,11,1]]},"reference":[{"issue":"1","key":"9196_CR1","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1109\/TASSP.1975.1162650","volume":"23","author":"J Baker","year":"1975","unstructured":"Baker J. The Dragon system\u2014an overview. IEEE Trans Acoust Speech Signal Process. 1975;23(1):24\u201329","journal-title":"IEEE Trans Acoust Speech Signal Process."},{"issue":"10","key":"9196_CR2","first-page":"765","volume":"50","author":"J Beerends","year":"2002","unstructured":"Beerends J, Hekstra A, Rix A, Hollier M. Perceptual evaluation of speech quality (PESQ) the new ITU standard for end-to-end speech quality assessment. Part II: psychoacoustic model. J Audio Eng Soc. 2002;50(10):765\u201378","journal-title":"J Audio Eng Soc."},{"key":"9196_CR3","doi-asserted-by":"crossref","unstructured":"Berouti M, Schwartz R, Makhoul J Enhancement of speech corrupted by acoustic noise. IEEE Int Conf Acoust Speech Signal Process. 1979;4:208\u2013211. IEEE.","DOI":"10.1109\/ICASSP.1979.1170788"},{"key":"9196_CR4","doi-asserted-by":"crossref","unstructured":"Bourlard H, Morgan N. Hybrid HMM\/ANN systems for speech recognition: overview and new research directions. Adapt Process Seq Data Struct. 1998;389\u2013417.","DOI":"10.1007\/BFb0054006"},{"key":"9196_CR5","unstructured":"Cole R, Muthusamy Y, Fanty M. The isolet spoken letter database. 2011. http:\/\/www.cslu.ogi.edu\/corpora\/isolet ."},{"issue":"4","key":"9196_CR6","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"Davis S, Mermelstein P. Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans Acoust Speech Signal Process. 1980;28(4):357\u201366","journal-title":"IEEE Trans Acoust Speech Signal Process"},{"key":"9196_CR7","doi-asserted-by":"crossref","unstructured":"Dougherty ER, Lotufo RA. Hands-on morphological image processing. Tutorial texts in optical engineering, vol. TT59. SPIE press 2003.","DOI":"10.1117\/3.501104"},{"issue":"6","key":"9196_CR8","doi-asserted-by":"crossref","first-page":"1109","DOI":"10.1109\/TASSP.1984.1164453","volume":"32","author":"Y Ephraim","year":"1984","unstructured":"Ephraim Y, Malah D. Speech enhancement using a minimum mean-square error short-time spectral amplitude estimator. IEEE Trans Acoust Speech Signal Process. 1984;32(6):1109\u201321","journal-title":"IEEE Trans Acoust Speech Signal Process."},{"key":"9196_CR9","unstructured":"Evans N, Mason J, Roach M, et\u00a0al. Noise compensation using spectrogram morphological filtering. In: Proceedings of the 4th IASTED International Conference on Signal and Image Processing. 2002. pp. 157\u201361."},{"key":"9196_CR10","doi-asserted-by":"crossref","unstructured":"Ezeiza A, L\u00f3pez de Ipi\u00f1a K, Hern\u00e1ndez C, Barroso N. Enhancing the feature extraction process for automatic speech recognition with fractal dimensions. Cogn Comput. 2012. pp. 1\u20136.","DOI":"10.1007\/s12559-012-9165-0"},{"key":"9196_CR11","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-540-68888-4","volume-title":"Psycho-acoustics: facts and models","author":"H Fastl","year":"2007","unstructured":"Fastl H, Zwicker E. Psycho-acoustics: facts and models, 3rd edn. New York: Springer; 2007.","edition":"3"},{"key":"9196_CR12","doi-asserted-by":"crossref","unstructured":"Faundez-Zanuy M, Hussain A, Mekyska J, Sesa-Nogueras E, Monte-Moreno E, Esposito A, Chetouani M, Garre-Olmo J, Abel A, Smekal Z, L\u00f3pez de Ipi\u00f1a K. Biometric applications related to human beings: there is life beyond security. Cogn Comput. 2012; 1\u201316.","DOI":"10.1007\/s12559-012-9169-9"},{"issue":"1","key":"9196_CR13","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1121\/1.396964","volume":"84","author":"M Florentine","year":"1988","unstructured":"Florentine M, Fastl H, Buus S. Temporal integration in normal hearing, cochlear impairment, and impairment simulated by masking. J Acoust Soc Am. 1998; 84(1):195\u2013203.","journal-title":"J Acoust Soc Am"},{"key":"9196_CR14","unstructured":"Gelbart D, Hemmert W, Holmberg M, Morgan N. Noisy ISOLET and ISOLET testbeds. database. 2011. http:\/\/www.icsi.berkeley.edu\/Speech\/papers\/eurospeech05-onset\/isolet\/ ."},{"issue":"1\u20132","key":"9196_CR15","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1016\/0378-5955(90)90170-T","volume":"47","author":"B Glasberg","year":"1990","unstructured":"Glasberg B, Moore B. Derivation of auditory filter shapes from notched-noise data. Hear Res. 1990;47(1\u20132):103\u201338","journal-title":"Hear Res"},{"key":"9196_CR16","volume-title":"Digital image processing","author":"R Gonzalez","year":"1993","unstructured":"Gonzalez R, Woods R Digital image processing. Boston: Addison-Wesley; 1993."},{"key":"9196_CR17","doi-asserted-by":"crossref","unstructured":"Greenberg S. The integration of phonetic knowledge in speech technology, Text, Speech and Language Technology vol. 25, chap. From here to utility. New York: Springer; 2005. pp. 107\u2013132.","DOI":"10.1007\/1-4020-2637-4_7"},{"key":"9196_CR18","doi-asserted-by":"crossref","first-page":"381","DOI":"10.1016\/j.specom.2009.12.006","volume":"52","author":"TS Gunawan","year":"2010","unstructured":"Gunawan TS, Ambikairajah E, Epps J. Perceptual speech enhancement exploiting temporal masking properties of human auditory system. Speech Commun. 2010;52:381\u201393","journal-title":"Speech Commun."},{"key":"9196_CR19","doi-asserted-by":"crossref","unstructured":"Hansen J, Pellom B. An effective quality evaluation protocol for speech enhancement algorithms. In: International Conference on Spoken Language Processing. Sydney, Australia; 1998. pp. 2819\u201322.","DOI":"10.21437\/ICSLP.1998-350"},{"key":"9196_CR20","doi-asserted-by":"crossref","unstructured":"Heckmann M, Domont X, Joublin F, Goerick C A hierarchical framework for spectro-temporal feature extraction. Speech Commun. 2010; (53):736\u201352.","DOI":"10.1016\/j.specom.2010.08.006"},{"key":"9196_CR21","doi-asserted-by":"crossref","unstructured":"Hirsch H, Pearce D. The AURORA experimental framework for the performance evaluation of speech recognition systems under noisy conditions. In: ASR2000-Automatic Speech Recognition: Challenges for the new Millenium ISCA Tutorial and Research Workshop (ITRW) 2000.","DOI":"10.21437\/ICSLP.2000-743"},{"issue":"1","key":"9196_CR22","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1109\/TASL.2007.911054","volume":"16","author":"Y Hu","year":"2008","unstructured":"Hu Y, Loizou P. Evaluation of objective quality measures for speech enhancement. IEEE Trans Audio Speech Lang Process. 2008;16(1):229\u201338.","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"9196_CR23","doi-asserted-by":"crossref","unstructured":"Hu Y, Loizou P. Evaluation of objective measures for speech enhancement. In: Proceedings of the Interspeech. 2006; pp. 1447\u201350 .","DOI":"10.21437\/Interspeech.2006-84"},{"key":"9196_CR24","doi-asserted-by":"crossref","unstructured":"Hurmalainen A, Virtanen T Modelling spectro-temporal dynamics in factorisation-based noise-robust automatic speech recognition. In: International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE 2012; pp. 4113\u201316.","DOI":"10.1109\/ICASSP.2012.6288823"},{"issue":"1","key":"9196_CR25","doi-asserted-by":"crossref","first-page":"412","DOI":"10.1121\/1.417975","volume":"101","author":"T Irino","year":"1997","unstructured":"Irino T, Patterson R A time-domain, level-dependent auditory filter: The gammachirp. J Acoust Soc Am 1997;101(1):412\u201319.","journal-title":"J Acoust Soc Am"},{"issue":"6","key":"9196_CR26","doi-asserted-by":"crossref","first-page":"2222","DOI":"10.1109\/TASL.2006.874669","volume":"14","author":"T Irino","year":"2006","unstructured":"Irino T, Patterson R. A dynamic compressive gammachirp auditory filterbank. IEEE Trans Audio Speech Lang Process. 2006;14(6):2222\u201332","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"3","key":"9196_CR27","doi-asserted-by":"crossref","first-page":"250","DOI":"10.1109\/TIT.1975.1055384","volume":"21","author":"F Jelinek","year":"1975","unstructured":"Jelinek F, Bahl L, Mercer R. Design of a linguistic statistical decoder for the recognition of continuous speech. IEEE Trans Inf Theory. 1975;21(3):250\u201356","journal-title":"IEEE Trans Inf Theory"},{"issue":"4","key":"9196_CR28","doi-asserted-by":"crossref","first-page":"950","DOI":"10.1121\/1.387576","volume":"71","author":"W Jesteadt","year":"1982","unstructured":"Jesteadt W, Bacon SP, Lehman JR. Forward masking as a function of frequency, masker level, and signal delay. J Acoust Soc Am. 1982;71(4):950\u201362","journal-title":"J Acoust Soc Am"},{"key":"9196_CR29","unstructured":"Klatt D. Prediction of perceived phonetic distance from critical-band spectra: a first step. In: IEEE International Conference on Acoustics, Speech, and Signal Processing, vol. 7, 1982. pp. 1278\u201381."},{"key":"9196_CR30","unstructured":"Loizou P. Matlab software. 2011. http:\/\/www.utdallas.edu\/loizou\/speech\/software.htm ."},{"key":"9196_CR31","doi-asserted-by":"crossref","first-page":"336","DOI":"10.1016\/j.csl.2012.02.002","volume":"26","author":"C Mart\u00ednez","year":"2012","unstructured":"Mart\u00ednez C, Goddard J, Milone D, Rufiner H. Bioinspired sparse spectro-temporal representation of speech for robust classification. Comput Speech Lang. 2012;26:336\u201348.","journal-title":"Comput Speech Lang"},{"key":"9196_CR32","unstructured":"Matheron G, Serra J. The birth of mathematical morphology. In: Proceedings of the 6th International Symposium on Mathematical Morphology. Sydney, Australia; 2002. pp. 1\u201316."},{"issue":"3","key":"9196_CR33","doi-asserted-by":"crossref","first-page":"702","DOI":"10.1121\/1.393460","volume":"79","author":"R Meddis","year":"1986","unstructured":"Meddis R. Simulation of mechanical to neural transduction in the auditory receptor. J Acoust Soc Am. 1986;79(3):702\u201311","journal-title":"J Acoust Soc Am."},{"issue":"3","key":"9196_CR34","doi-asserted-by":"crossref","first-page":"1056","DOI":"10.1121\/1.396050","volume":"83","author":"R Meddis","year":"1988","unstructured":"Meddis R. Simulation of auditory-neural transduction: further studies. J Acoust Soc Am. 1988;83(3):1056\u201363","journal-title":"J Acoust Soc Am"},{"key":"9196_CR35","doi-asserted-by":"crossref","unstructured":"Meyer B, Kollmeier B. Robustness of spectro-temporal features against intrinsic and extrinsic variations in automatic speech recognition. Speech Commun. 2010;53:753\u201367","DOI":"10.1016\/j.specom.2010.07.002"},{"key":"9196_CR36","doi-asserted-by":"crossref","first-page":"750","DOI":"10.1121\/1.389861","volume":"74","author":"B Moore","year":"1983","unstructured":"Moore B, Glasberg B. Suggested formulae for calculating auditory-filter bandwidths and excitation patterns. J Acoust Soc Am. 1983;74:750.","journal-title":"J Acoust Soc Am."},{"issue":"1\u20132","key":"9196_CR37","doi-asserted-by":"crossref","first-page":"70","DOI":"10.1016\/S0378-5955(03)00347-2","volume":"188","author":"B Moore","year":"2004","unstructured":"Moore B, Glasberg B. A revised model of loudness perception applied to cochlear hearing loss. Hear Res. 2004;188(1\u20132):70\u201388","journal-title":"Hear Res"},{"key":"9196_CR38","doi-asserted-by":"crossref","first-page":"429","DOI":"10.1016\/B978-0-08-041847-6.50054-X","volume":"83","author":"R Patterson","year":"1992","unstructured":"Patterson R, Robinson K, Holdsworth J, McKeown D, Zhang C, Allerhand M. Complex sounds and auditory images. Aud Physiol Percept 1992;83:429\u201346","journal-title":"Aud Physiol Percept"},{"issue":"3","key":"9196_CR39","doi-asserted-by":"crossref","first-page":"1377","DOI":"10.1121\/1.3458854","volume":"128","author":"C Pel\u00e1ez-Moreno","year":"2010","unstructured":"Pel\u00e1ez-Moreno C, Garc\u00eda-Moral A, Valverde-Albacete F. Analyzing phonetic confusions using formal concept analysis. J Acoust Soc Am. 2010;128(3):1377\u201390","journal-title":"J Acoust Soc Am."},{"key":"9196_CR40","volume-title":"Objective measures of speech quality","author":"S Quackenbush","year":"1988","unstructured":"Quackenbush S, Barnwell T, Clements M. Objective measures of speech quality. Upper Saddle River: Prentice Hall Englewood Cliffs; 1988."},{"key":"9196_CR41","volume-title":"Discrete-time speech signal processing. Signal Processing","author":"TF Quatieri","year":"2002","unstructured":"Quatieri TF (2002) Discrete-time speech signal processing. Principles and Practice. Signal Processing. Upper Saddle River: Prentice Hall; 2002."},{"key":"9196_CR42","volume-title":"Fundamentals of speech recognition","author":"L Rabiner","year":"1993","unstructured":"Rabiner L, Juang BH. Fundamentals of speech recognition. Signal Processing. Upper Saddle River: Prentice Hall; 1993."},{"issue":"10","key":"9196_CR43","first-page":"755","volume":"50","author":"A Rix","year":"2002","unstructured":"Rix A, Hollier M, Hekstra A, Beerends J. Perceptual evaluation of speech quality (PESQ), the new ITU standard for end-to-end speech quality assessment. Part I: Time-delay compensation. J Acoust Soc Am. 2002;50(10):755\u201364","journal-title":"J Acoust Soc Am"},{"key":"9196_CR44","doi-asserted-by":"crossref","unstructured":"Scalart P, Filho J. Speech enhancement based on a priori signal to noise estimation. In: IEEE International Conference on Acoustics, Speech, and Signal Processing, IEEE 1986. pp. 629\u201332.","DOI":"10.1109\/ICASSP.1996.543199"},{"key":"9196_CR45","doi-asserted-by":"crossref","unstructured":"Serra J, Soille P (eds). Mathematical morphology and its application to image processing. Computational imaging and vision. Kluwer Academic 1994.","DOI":"10.1007\/978-94-011-1040-2"},{"key":"9196_CR46","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1121\/1.1915893","volume":"8","author":"SS Stevens","year":"1937","unstructured":"Stevens SS, Volkmann J, Newman EB. A scale for the measurement of the psychological magnitude of pitch. J Acoust Soc Am. 1937;8:185\u201390.","journal-title":"J Acoust Soc Am"},{"key":"9196_CR47","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1098\/rstb.1992.0069","volume":"336","author":"Q Summerfield","year":"1992","unstructured":"Summerfield Q, Culling J. Auditory segregation of competing voices: absence of effectes of FM or AM coherence. Philos Trans R Soc Lond. 1992;336:357\u201366","journal-title":"Philos Trans R Soc Lond"},{"issue":"5","key":"9196_CR48","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1016\/j.specom.2007.03.001","volume":"49","author":"L ten Bosch","year":"2007","unstructured":"ten Bosch L, Kirchhoff K. Editorial note: Bridging the gap between human and automatic speech recognition. Speech Commun. 2007;49(5):331\u20135","journal-title":"Speech Commun"},{"key":"9196_CR49","unstructured":"Weiss NA, Hasset MJ. Introductory statistics. Addison- Wesley, Reading; 1993. pp. 407\u201308."},{"key":"9196_CR50","doi-asserted-by":"crossref","unstructured":"Yeh J, Chen C. Auditory front-ends for noise-robust automatic speech recognition. In: 7th International Symposium on Chinese Spoken Language Process (ISCSLP), IEEE 2010. pp. 205\u201308.","DOI":"10.1109\/ISCSLP.2010.5684910"},{"key":"9196_CR51","doi-asserted-by":"crossref","unstructured":"Yin H, Hohmann V, Nadeu C. Acoustic features for speech recognition based on gammatone filterbank and instantaneous frequency. Speech Commun. 2010;53:707\u201315.","DOI":"10.1016\/j.specom.2010.04.008"},{"key":"9196_CR52","volume-title":"The ear as a communication receiver","author":"E Zwicker","year":"1999","unstructured":"Zwicker E, Feldtkeller R. The ear as a communication receiver. Woodbury: Acoustical Society of America; 1999."},{"issue":"6","key":"9196_CR53","doi-asserted-by":"crossref","first-page":"1508","DOI":"10.1121\/1.387850","volume":"71","author":"E Zwicker","year":"1982","unstructured":"Zwicker E, Jaroszewski A. Inverse frequency dependence of simultaneous tone-on-tone masking patterns at low levels. J Acoust Soc Am. 1982;71(6):1508\u201312.","journal-title":"J Acoust Soc Am"},{"key":"9196_CR54","doi-asserted-by":"crossref","first-page":"1523","DOI":"10.1121\/1.385079","volume":"68","author":"E Zwicker","year":"1980","unstructured":"Zwicker E, Terhardt E. Analytical expressions for critical-band rate and critical bandwidth as a function of frequency. J Acoust Soc Am. 1980;68:1523","journal-title":"J Acoust Soc Am"}],"container-title":["Cognitive Computation"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12559-012-9196-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s12559-012-9196-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12559-012-9196-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,20]],"date-time":"2025-04-20T00:23:40Z","timestamp":1745108620000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s12559-012-9196-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,11,1]]},"references-count":54,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2013,12]]}},"alternative-id":["9196"],"URL":"https:\/\/doi.org\/10.1007\/s12559-012-9196-6","relation":{},"ISSN":["1866-9956","1866-9964"],"issn-type":[{"value":"1866-9956","type":"print"},{"value":"1866-9964","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,11,1]]}}}