{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T14:38:11Z","timestamp":1740148691633,"version":"3.37.3"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2020,3,11]],"date-time":"2020-03-11T00:00:00Z","timestamp":1583884800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,3,11]],"date-time":"2020-03-11T00:00:00Z","timestamp":1583884800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Sign Process Syst"],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1007\/s11265-020-01517-2","type":"journal-article","created":{"date-parts":[[2020,3,11]],"date-time":"2020-03-11T02:02:57Z","timestamp":1583892177000},"page":"679-692","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Improved Speech-Signal Based Frequency Warping Scale for Cepstral Feature in Robust Speaker Verification System"],"prefix":"10.1007","volume":"92","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0488-0856","authenticated-orcid":false,"given":"Susanta Kumar","family":"Sarangi","sequence":"first","affiliation":[]},{"given":"Goutam","family":"Saha","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,3,11]]},"reference":[{"issue":"9","key":"1517_CR1","doi-asserted-by":"publisher","first-page":"1437","DOI":"10.1109\/5.628714","volume":"85","author":"JP Campbell","year":"1997","unstructured":"Campbell, J.P. (1997). Speaker recognition: a tutorial. Proceedings of the IEEE, 85(9), 1437\u20131462.","journal-title":"Proceedings of the IEEE"},{"issue":"1","key":"1517_CR2","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1016\/j.specom.2009.08.009","volume":"52","author":"T Kinnunen","year":"2010","unstructured":"Kinnunen, T., & Li, H. (2010). An overview of text-independent speaker recognition: from features to supervectors. Speech Communication, 52(1), 12\u201340.","journal-title":"Speech Communication"},{"issue":"4","key":"1517_CR3","first-page":"357","volume":"28","author":"S Davis","year":"1980","unstructured":"Davis, S., & Mermelstein, P. (1980). Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Transactions on Audio, Speech, and Language Processing, 28(4), 357\u2013366.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"1517_CR4","unstructured":"Ganchev, T. (2005). Speaker recognition PhD thesis. University of Patras, Greece."},{"key":"1517_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.dsp.2015.10.011","volume":"50","author":"M Sahidullah","year":"2016","unstructured":"Sahidullah, M., & Kinnunen, T. (2016). Local spectral variability features for speaker verification. Digital Signal Processing, 50, 1\u201311.","journal-title":"Digital Signal Processing"},{"issue":"4","key":"1517_CR6","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1109\/LSP.2009.2014096","volume":"16","author":"K Paliwal","year":"2009","unstructured":"Paliwal, K., Shannon, B., Lyons, J., W\u00f3jcicki, K. (2009). Speech-signal-based frequency warping. IEEE Signal Processing Letters, 16(4), 319\u2013322.","journal-title":"IEEE Signal Processing Letters"},{"issue":"4","key":"1517_CR7","doi-asserted-by":"publisher","first-page":"543","DOI":"10.1016\/j.specom.2011.11.004","volume":"54","author":"M Sahidullah","year":"2012","unstructured":"Sahidullah, M., & Saha, G. (2012). Design, analysis and experimental evaluation of block based transformation in MFCC computation for speaker recognition. Speech Communication, 54(4), 543\u2013565.","journal-title":"Speech Communication"},{"issue":"2","key":"1517_CR8","doi-asserted-by":"publisher","first-page":"96","DOI":"10.1109\/89.902277","volume":"9","author":"A Biem","year":"2001","unstructured":"Biem, A., Katagiri, S., McDermott, E., Juang, B.H. (2001). An application of discriminative feature extraction to filter-bank-based speech recognition. IEEE Transactions on Speech and Audio Processing, 9(2), 96\u2013110.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"1517_CR9","doi-asserted-by":"crossref","unstructured":"Aradilla, G, Vepa, J, Bourlard, H. (2005). Improving speech recognition using a data-driven approach. In Proc. Interspeech, EPFL-CONF-83186.","DOI":"10.21437\/Interspeech.2005-856"},{"key":"1517_CR10","unstructured":"Chollet, G., McTait, K., Petrovska-Delacr\u00e9taz, D. (2005). Data driven approaches to speech and language processing. In Proc. nonlinear speech modeling and applications (pp. 164\u2013198): Springer."},{"issue":"7","key":"1517_CR11","doi-asserted-by":"publisher","first-page":"1355","DOI":"10.1109\/TASL.2013.2250959","volume":"21","author":"J Ming","year":"2013","unstructured":"Ming, J., Srinivasan, R., Crookes, D., Jafari, A. (2013). CLOSE - a data-driven approach to speech separation. IEEE Transactions on Audio, Speech, and Language Processing, 21(7), 1355\u2013 1368.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"1517_CR12","unstructured":"Umesh, S., Cohen, L., Marinovic, N., Nelson, D. (1996). Frequency-warping in speech. In Proc. ICSLP, (Vol. 1 pp. 414\u2013417): IEEE."},{"issue":"3","key":"1517_CR13","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1109\/97.995829","volume":"9","author":"S Umesh","year":"2002","unstructured":"Umesh, S., Cohen, L., Nelson, D. (2002). Frequency warping and the mel scale. IEEE Signal Processing Letters, 9(3), 104\u2013107.","journal-title":"IEEE Signal Processing Letters"},{"key":"1517_CR14","doi-asserted-by":"crossref","unstructured":"Sarangi, S.K., & Saha, G. (2012). A novel approach in feature level for robust text-independent speaker identification system. In Proc. Fourth international conference on intelligent human computer interaction (IHCI) (pp. 1\u20135).","DOI":"10.1109\/IHCI.2012.6481824"},{"issue":"4","key":"1517_CR15","doi-asserted-by":"publisher","first-page":"605","DOI":"10.1109\/JSTSP.2017.2684705","volume":"11","author":"D Paul","year":"2017","unstructured":"Paul, D., Pal, M., Saha, G. (2017). Spectral features for synthetic speech detection. IEEE Journal of Selected Topics in Signal Processing, 11(4), 605\u2013617.","journal-title":"IEEE Journal of Selected Topics in Signal Processing"},{"key":"1517_CR16","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1155\/2015\/325703","volume":"2015","author":"D Sengupta","year":"2015","unstructured":"Sengupta, D., & Saha, G. (2015). Study on similarity among indian languages using language verification framework. Advances in Artificial Intelligence, 2015, 2.","journal-title":"Advances in Artificial Intelligence"},{"issue":"2","key":"1517_CR17","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1007\/s11265-015-1019-z","volume":"82","author":"M Li","year":"2016","unstructured":"Li, M., Liu, L., Cai, W., Liu, W. (2016). Generalized i-vector representation with phonetic tokenizations and tandem features for both text independent and text dependent speaker verification. Journal of Signal Processing Systems, 82(2), 207\u2013215.","journal-title":"Journal of Signal Processing Systems"},{"issue":"3","key":"1517_CR18","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1121\/1.1915893","volume":"8","author":"S Stevens","year":"1937","unstructured":"Stevens, S., Volkmann, J., Newman, E. (1937). A scale for the measurement of the psychological magnitude pitch. The Journal of the Acoustical Society of America, 8(3), 185\u2013190.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"1517_CR19","unstructured":"Toh, A.M., Togneri, R., Nordholm, S. (2005). Spectral entropy as speech features for speech recognition. Proceedings of PEECS, 1."},{"key":"1517_CR20","unstructured":"Misra, H, Ikbal, S, Bourlard, H, Hermansky, H. (2004). Spectral entropy based feature for robust asr. In Proc. ICASSP, (Vol. 1 pp. I\u2013193): IEEE."},{"issue":"4","key":"1517_CR21","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1007\/s10772-010-9079-8","volume":"13","author":"G Sarkar","year":"2010","unstructured":"Sarkar, G., & Saha, G. (2010). Spectral entropy and spectral shape based pre-quantization for real time speaker identification system. International Journal of Speech Technology, 13(4), 189\u2013199.","journal-title":"International Journal of Speech Technology"},{"issue":"5","key":"1517_CR22","doi-asserted-by":"publisher","first-page":"762","DOI":"10.1109\/TSA.2005.851909","volume":"13","author":"BF Wu","year":"2005","unstructured":"Wu, B.F., & Wang, K.C. (2005). Robust endpoint detection algorithm based on the adaptive band-partitioning spectral entropy in adverse environments. IEEE Transactions on Speech and Audio Processing, 13(5), 762\u2013775.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"1517_CR23","doi-asserted-by":"crossref","unstructured":"Hayakawa, S, & Itakura, F. (1994). Text-dependent speaker recognition using the information in the higher frequency band. In Proc. ICASSP, (Vol. 1 pp. I\u2013137\u2013I\u2013140).","DOI":"10.1109\/ICASSP.1994.389336"},{"issue":"2","key":"1517_CR24","first-page":"114","volume":"4","author":"S Chakroborty","year":"2007","unstructured":"Chakroborty, S., Roy, A., Saha, G. (2007). Improved closed set text-independent speaker identification by combining mfcc with evidence from flipped filter banks. International Journal of Signal Processing, 4(2), 114\u2013121.","journal-title":"International Journal of Signal Processing"},{"key":"1517_CR25","unstructured":"Quatieri, T.F. (2006). Discrete-time speech signal processing: principles and practice. Pearson Education India."},{"issue":"1\u20133","key":"1517_CR26","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"DA Reynolds","year":"2000","unstructured":"Reynolds, D.A., Quatieri, T.F., Dunn, R.B. (2000). Speaker verification using adapted Gaussian mixture models. Digital Signal Processing, 10(1\u20133), 19\u201341.","journal-title":"Digital Signal Processing"},{"issue":"4","key":"1517_CR27","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2011","unstructured":"Dehak, N., Kenny, P., Dehak, R., Dumouchel, P., Ouellet, P. (2011). Front-end factor analysis for speaker verification. IEEE Transactions on Audio, Speech, and Language Processing, 19(4), 788\u2013798.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"1517_CR28","unstructured":"Kenny, P. (2010). Bayesian speaker verification with heavy-tailed priors. In Proc. Odyssey: the speaker and language recognition workshop, ISCA, (p. 14)."},{"key":"1517_CR29","unstructured":"Poddar, A., Sahidullah, M., Saha, G. (2017). Improved i-vector extraction technique for speaker verification with short utterances. International Journal of Speech Technology, 1\u201316."},{"key":"1517_CR30","unstructured":"Poddar, A., Sahidullah, M., Saha, G. (2015). Performance comparison of speaker recognition systems in presence of duration variability. In Proc. Annual IEEE India conference INDICON (pp. 1\u20136): IEEE."},{"key":"1517_CR31","doi-asserted-by":"crossref","unstructured":"Martin, A., Doddington, G., Kamm, T., Ordowski, M., Przybocki, M. (1997). The DET curve in assessment of detection task performance. Tech. rep., DTIC Document.","DOI":"10.21437\/Eurospeech.1997-504"},{"issue":"5","key":"1517_CR32","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1109\/MAES.2005.1432568","volume":"20","author":"M Faundez-Zanuy","year":"2005","unstructured":"Faundez-Zanuy, M., & Monte-Moreno, E. (2005). State-of-the-art in speaker recognition. IEEE Aerospace and Electronic Systems Magazine, 20(5), 7\u201312.","journal-title":"IEEE Aerospace and Electronic Systems Magazine"},{"issue":"2\u20133","key":"1517_CR33","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1016\/S0167-6393(99)00082-5","volume":"31","author":"J Hennebert","year":"2000","unstructured":"Hennebert, J., Melin, H., Petrovska, D., Genoud, D. (2000). POLYCOST: a telephone-speech database for speaker recognition. Speech Communication, 31(2\u20133), 265\u2013270.","journal-title":"Speech Communication"},{"key":"1517_CR34","unstructured":"Sahidullah, M, & Saha, G. (2012). Comparison of speech activity detection techniques for speaker recognition. arXiv:12100297."},{"issue":"6B","key":"1517_CR35","doi-asserted-by":"publisher","first-page":"2044","DOI":"10.1121\/1.1913065","volume":"51","author":"JJ Wolf","year":"1972","unstructured":"Wolf, J.J. (1972). Efficient acoustic parameters for speaker recognition. The Journal of the Acoustical Society of America, 51(6B), 2044\u20132056.","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"4","key":"1517_CR36","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1016\/j.specom.2007.10.005","volume":"50","author":"X Lu","year":"2008","unstructured":"Lu, X., & Dang, J. (2008). An investigation of dependencies between frequency components and speaker characteristics for text-independent speaker identification. Speech Communication, 50(4), 312\u2013322.","journal-title":"Speech Communication"},{"key":"1517_CR37","unstructured":"Br\u00fcmmer, N. (2007). FoCal multi-class: toolkit for evaluation, fusion and calibration of multi-class recognition scores-tutorial and user manual. Software available at http:\/\/sitesgooglecom\/site\/nikobrummer\/focalmulticlass33:39."},{"issue":"8","key":"1517_CR38","doi-asserted-by":"publisher","first-page":"1622","DOI":"10.1109\/TASL.2013.2256895","volume":"21","author":"V Hautamaki","year":"2013","unstructured":"Hautamaki, V., Kinnunen, T., Sedl\u00e1k, F, Lee, K.A., Ma, B., Li, H. (2013). Sparse classifier fusion for speaker verification. IEEE Transactions on Audio, Speech, and Language Processing, 21(8), 1622\u20131631.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"}],"container-title":["Journal of Signal Processing Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-020-01517-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11265-020-01517-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-020-01517-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,18]],"date-time":"2022-10-18T07:30:13Z","timestamp":1666078213000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11265-020-01517-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,3,11]]},"references-count":38,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2020,7]]}},"alternative-id":["1517"],"URL":"https:\/\/doi.org\/10.1007\/s11265-020-01517-2","relation":{},"ISSN":["1939-8018","1939-8115"],"issn-type":[{"type":"print","value":"1939-8018"},{"type":"electronic","value":"1939-8115"}],"subject":[],"published":{"date-parts":[[2020,3,11]]},"assertion":[{"value":"23 September 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 December 2019","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 January 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 March 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}