{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,14]],"date-time":"2024-09-14T12:40:38Z","timestamp":1726317638258},"reference-count":40,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computer Speech &amp; Language"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1016\/j.csl.2024.101713","type":"journal-article","created":{"date-parts":[[2024,8,23]],"date-time":"2024-08-23T05:06:31Z","timestamp":1724389591000},"page":"101713","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Significance of chirp MFCC as a feature in speech and audio applications"],"prefix":"10.1016","volume":"89","author":[{"ORCID":"http:\/\/orcid.org\/0000-0003-3312-2015","authenticated-orcid":false,"given":"S. Johanan","family":"Joysingh","sequence":"first","affiliation":[]},{"given":"P.","family":"Vijayalakshmi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nagarajan","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.csl.2024.101713_b1","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1016\/j.neunet.2021.03.004","article-title":"Speaker recognition based on deep learning: An overview","volume":"140","author":"Bai","year":"2021","journal-title":"Neural Netw."},{"issue":"4","key":"10.1016\/j.csl.2024.101713_b2","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1155\/S1110865702000720","article-title":"Audio classification in speech and music: a comparison between a statistical and a neural approach","volume":"2002","author":"Bugatti","year":"2002","journal-title":"EURASIP J. Adv. Signal Process."},{"issue":"12","key":"10.1016\/j.csl.2024.101713_b3","doi-asserted-by":"crossref","first-page":"1993","DOI":"10.1109\/TASLP.2014.2359159","article-title":"A feature study for classification-based speech separation at low signal-to-noise ratios","volume":"22","author":"Chen","year":"2014","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"issue":"4","key":"10.1016\/j.csl.2024.101713_b4","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","article-title":"Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences","volume":"28","author":"Davis","year":"1980","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"year":"2018","series-title":"A neural attention model for speech command recognition","author":"de Andrade","key":"10.1016\/j.csl.2024.101713_b5"},{"issue":"4","key":"10.1016\/j.csl.2024.101713_b6","doi-asserted-by":"crossref","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","article-title":"Front-end factor analysis for speaker verification","volume":"19","author":"Dehak","year":"2010","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"10.1016\/j.csl.2024.101713_b7","series-title":"International Conference on Acoustics, Speech and Signal Processing","first-page":"4237","article-title":"Support vector machines and joint factor analysis for speaker verification","author":"Dehak","year":"2009"},{"issue":"1","key":"10.1016\/j.csl.2024.101713_b8","doi-asserted-by":"crossref","first-page":"190","DOI":"10.1109\/TASL.2006.876858","article-title":"Significance of the modified group delay feature in speech recognition","volume":"15","author":"Hegde","year":"2007","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"10.1016\/j.csl.2024.101713_b9","series-title":"International Symposium on Multimedia","first-page":"135","article-title":"Speech\/music classification of short audio segments","author":"Hirvonen","year":"2014"},{"issue":"10","key":"10.1016\/j.csl.2024.101713_b10","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1109\/97.789604","article-title":"Teager energy based feature parameters for speech recognition in car noise","volume":"6","author":"Jabloun","year":"1999","journal-title":"IEEE Signal Process. Lett."},{"issue":"4","key":"10.1016\/j.csl.2024.101713_b11","doi-asserted-by":"crossref","first-page":"1435","DOI":"10.1109\/TASL.2006.881693","article-title":"Joint factor analysis versus eigenchannels in speaker recognition","volume":"15","author":"Kenny","year":"2007","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"10.1016\/j.csl.2024.101713_b12","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1016\/j.dsp.2015.09.005","article-title":"Speech\/music classification using speech-specific features","volume":"48","author":"Khonglah","year":"2016","journal-title":"Digit. Signal Process."},{"key":"10.1016\/j.csl.2024.101713_b13","series-title":"INTERSPEECH","article-title":"Feature extraction for robust speech recognition using a power-law nonlinearity and power-bias subtraction","volume":"Vol. 10","author":"Kim","year":"2009"},{"issue":"4","key":"10.1016\/j.csl.2024.101713_b14","doi-asserted-by":"crossref","first-page":"670","DOI":"10.1109\/TMM.2009.2017635","article-title":"Automatic music genre classification based on modulation spectral analysis of spectral and cepstral features","volume":"11","author":"Lee","year":"2009","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.csl.2024.101713_b15","series-title":"International Conference on Multimedia and Expo","first-page":"204","article-title":"Automatic music genre classification using modulation spectral contrast feature","author":"Lee","year":"2007"},{"issue":"4","key":"10.1016\/j.csl.2024.101713_b16","doi-asserted-by":"crossref","first-page":"744","DOI":"10.1109\/TASSP.1986.1164910","article-title":"Speech analysis\/synthesis based on a sinusoidal representation","volume":"34","author":"McAulay","year":"1986","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"issue":"4","key":"10.1016\/j.csl.2024.101713_b17","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1109\/79.317925","article-title":"Automatic language identification: A review\/tutorial","volume":"11","author":"Muthusamy","year":"1994","journal-title":"IEEE Signal Process. Mag."},{"key":"10.1016\/j.csl.2024.101713_b18","series-title":"International Conference on Acoustics, Speech and Signal Processing","first-page":"1061","article-title":"Bias estimation and correction in a classifier using product of likelihood-gaussians","volume":"Vol. 3","author":"Nagarajan","year":"2007"},{"article-title":"Voxceleb: a large-scale speaker identification dataset","year":"2017","series-title":"INTERSPEECH","author":"Nagrani","key":"10.1016\/j.csl.2024.101713_b19"},{"issue":"4","key":"10.1016\/j.csl.2024.101713_b20","doi-asserted-by":"crossref","first-page":"1085","DOI":"10.1109\/TASL.2011.2172422","article-title":"Speaker identification and verification by combining MFCC and phase information","volume":"20","author":"Nakagawa","year":"2011","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"1","key":"10.1016\/j.csl.2024.101713_b21","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1109\/TMM.2004.840604","article-title":"A speech\/music discriminator based on RMS and zero-crossings","volume":"7","author":"Panagiotakis","year":"2005","journal-title":"IEEE Trans. Multimedia"},{"issue":"2","key":"10.1016\/j.csl.2024.101713_b22","doi-asserted-by":"crossref","first-page":"118","DOI":"10.1109\/MSP.2004.1276120","article-title":"The chirp z-transform algorithm-a lesson in serendipity","volume":"21","author":"Rabiner","year":"2004","journal-title":"IEEE Signal Process. Mag."},{"issue":"1","key":"10.1016\/j.csl.2024.101713_b23","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1109\/MASSP.1986.1165342","article-title":"An introduction to hidden Markov models","volume":"3","author":"Rabiner","year":"1986","journal-title":"IEEE ASSP Mag."},{"issue":"2","key":"10.1016\/j.csl.2024.101713_b24","doi-asserted-by":"crossref","first-page":"86","DOI":"10.1109\/TAU.1969.1162034","article-title":"The chirp z-transform algorithm","volume":"17","author":"Rabiner","year":"1969","journal-title":"IEEE Trans. Audio Electroacoust."},{"issue":"1\u20133","key":"10.1016\/j.csl.2024.101713_b25","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1006\/dspr.1999.0361","article-title":"Speaker verification using adapted Gaussian mixture models","volume":"10","author":"Reynolds","year":"2000","journal-title":"Digit. Signal Process."},{"key":"10.1016\/j.csl.2024.101713_b26","series-title":"International Conference on Acoustics, Speech and Signal Processing","first-page":"129","article-title":"A hidden Markov model based keyword recognition system","author":"Rose","year":"1990"},{"key":"10.1016\/j.csl.2024.101713_b27","series-title":"International Conference on Acoustics, Speech and Signal Processing","first-page":"993","article-title":"Real-time discrimination of broadcast speech\/music","volume":"Vol. 2","author":"Saunders","year":"1996"},{"key":"10.1016\/j.csl.2024.101713_b28","series-title":"International Conference on Acoustics, Speech and Signal Processing","first-page":"1331","article-title":"Construction and evaluation of a robust multifeature speech\/music discriminator","volume":"Vol. 2","author":"Scheirer","year":"1997"},{"key":"10.1016\/j.csl.2024.101713_b29","doi-asserted-by":"crossref","unstructured":"Seck, M., Bimbot, F., Zugaj, D., Delyon, B., 1999. Two-class signal segmentation for speech\/music detection in audio tracks. In: European Conference on Speech Communication and Technology. pp. 1\u20134.","DOI":"10.21437\/Eurospeech.1999-701"},{"key":"10.1016\/j.csl.2024.101713_b30","series-title":"International Conference on Acoustics, Speech and Signal Processing","first-page":"2489","article-title":"Music tonality features for speech\/music discrimination","author":"Sell","year":"2014"},{"issue":"2","key":"10.1016\/j.csl.2024.101713_b31","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1007\/s11042-009-0416-3","article-title":"Improvement to speech-music discrimination using sinusoidal model based features","volume":"50","author":"Shirazi","year":"2010","journal-title":"Multimedia Tools Appl."},{"key":"10.1016\/j.csl.2024.101713_b32","series-title":"International Conference on Acoustics, Speech and Signal Processing","first-page":"425","article-title":"A speech-music discriminator using HILN model based features","volume":"Vol. 5","author":"Thoshkahna","year":"2006"},{"key":"10.1016\/j.csl.2024.101713_b33","series-title":"IEEE Southeast Con","first-page":"116","article-title":"Feature extraction using discrete wavelet transform for speech recognition","author":"Tufekci","year":"2000"},{"year":"2018","series-title":"Speech commands: A dataset for limited-vocabulary speech recognition","author":"Warden","key":"10.1016\/j.csl.2024.101713_b34"},{"key":"10.1016\/j.csl.2024.101713_b35","series-title":"European Conference on Speech Communication and Technology","first-page":"1","article-title":"Speech\/music discrimination based on posterior probability features","volume":"Vol. 6","author":"Williams","year":"1999"},{"key":"10.1016\/j.csl.2024.101713_b36","doi-asserted-by":"crossref","unstructured":"Yapanel, U.H., Hansen, J.H.L., 2003. A new perspective on feature extraction for robust in-vehicle speech recognition. In: Eighth European Conference on Speech Communication and Technology. pp. 1281\u20131284.","DOI":"10.21437\/Eurospeech.2003-407"},{"year":"2017","series-title":"Hello edge: Keyword spotting on microcontrollers","author":"Zhang","key":"10.1016\/j.csl.2024.101713_b37"},{"key":"10.1016\/j.csl.2024.101713_b38","series-title":"International Conference on Acoustics, Speech and Signal Processing","first-page":"7204","article-title":"Analyzing noise robustness of MFCC and GFCC features in speaker identification","author":"Zhao","year":"2013"},{"key":"10.1016\/j.csl.2024.101713_b39","doi-asserted-by":"crossref","first-page":"582","DOI":"10.1007\/BF02943243","article-title":"Comparison of different implementations of MFCC","volume":"16","author":"Zheng","year":"2001","journal-title":"J. Comput. Sci. Technol."},{"issue":"1","key":"10.1016\/j.csl.2024.101713_b40","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1109\/TSA.1996.481450","article-title":"Comparison of four approaches to automatic language identification of telephone speech","volume":"4","author":"Zissman","year":"1996","journal-title":"IEEE Trans. Speech Audio Process."}],"container-title":["Computer Speech &amp; Language"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0885230824000962?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0885230824000962?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,9,14]],"date-time":"2024-09-14T11:53:10Z","timestamp":1726314790000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0885230824000962"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1]]},"references-count":40,"alternative-id":["S0885230824000962"],"URL":"http:\/\/dx.doi.org\/10.1016\/j.csl.2024.101713","relation":{},"ISSN":["0885-2308"],"issn-type":[{"type":"print","value":"0885-2308"}],"subject":[],"published":{"date-parts":[[2025,1]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Significance of chirp MFCC as a feature in speech and audio applications","name":"articletitle","label":"Article Title"},{"value":"Computer Speech & Language","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.csl.2024.101713","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"101713"}}