{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T23:23:53Z","timestamp":1768001033123,"version":"3.49.0"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2023,2,19]],"date-time":"2023-02-19T00:00:00Z","timestamp":1676764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,2,19]],"date-time":"2023-02-19T00:00:00Z","timestamp":1676764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1007\/s10772-023-10024-1","type":"journal-article","created":{"date-parts":[[2023,2,23]],"date-time":"2023-02-23T16:37:09Z","timestamp":1677170229000},"page":"211-220","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["An empirical study on analysis window functions for text-independent speaker recognition"],"prefix":"10.1007","volume":"26","author":[{"given":"Bidhan","family":"Barai","sequence":"first","affiliation":[]},{"given":"Nibaran","family":"Das","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1780-0461","authenticated-orcid":false,"given":"Subhadip","family":"Basu","sequence":"additional","affiliation":[]},{"given":"Mita","family":"Nasipuri","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,2,19]]},"reference":[{"issue":"3","key":"10024_CR1","doi-asserted-by":"publisher","first-page":"851","DOI":"10.1007\/s10772-019-09630-9","volume":"22","author":"MT Al-Kaltakchi","year":"2019","unstructured":"Al-Kaltakchi, M. T., Al-Nima, R. R. O., Abdullah, M. A., & Abdullah, H. N. (2019). Thorough evaluation of timit database speaker identification performance under noise with and without the g. 712 type handset. International Journal of Speech Technology, 22(3), 851\u2013863.","journal-title":"International Journal of Speech Technology"},{"issue":"3","key":"10024_CR2","doi-asserted-by":"publisher","first-page":"549","DOI":"10.1044\/jslhr.4103.549","volume":"41","author":"SP Bacon","year":"1998","unstructured":"Bacon, S. P., Opie, J. M., & Montoya, D. Y. (1998). The effects of hearing loss and noise masking on the masking release for speech in temporally complex backgrounds. Journal of Speech, Language, and Hearing Research, 41(3), 549\u2013563.","journal-title":"Journal of Speech, Language, and Hearing Research"},{"key":"10024_CR3","doi-asserted-by":"crossref","unstructured":"Barai, B., Das, D., Das, N., Basu, S., & Nasipuri, M. (2017, December). An ASR system using MFCC and VQ\/GMM with emphasis on environmental dependency. In: 2017 IEEE Calcutta conference (CALCON) (pp. 362\u2013366).","DOI":"10.1109\/CALCON.2017.8280756"},{"key":"10024_CR4","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1007\/978-981-10-7566-7_33","volume-title":"Intelligent engineering informatics","author":"B Barai","year":"2018","unstructured":"Barai, B., Das, D., Das, N., Basu, S., & Nasipuri, M. (2018). Closed-set text-independent automatic speaker recognition system using vq\/gmm. In V. Bhateja, C. A. Coello Coello, S. C. Satapathy, & P. K. Pattnaik (Eds.), Intelligent engineering informatics (pp. 337\u2013346). Springer."},{"key":"10024_CR5","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1007\/978-981-13-3702-4_8","volume-title":"Advanced computing and systems for security","author":"B Barai","year":"2019","unstructured":"Barai, B., Das, D., Das, N., Basu, S., & Nasipuri, M. (2019). Vq\/gmm-based speaker identification with emphasis on language dependency. In R. Chaki, A. Cortesi, K. Saeed, & N. Chaki (Eds.), Advanced computing and systems for security,\u00a0Vol. eight, (pp. 125\u2013141). Springer. https:\/\/doi.org\/10.1007\/978-981-13-3702-4_8"},{"key":"10024_CR6","first-page":"1","volume":"25","author":"B Barai","year":"2021","unstructured":"Barai, B., Das, D., Das, N., Basu, S., & Nasipuri, M. (2021). Closed-set speaker identification using vq and gmm based models. International Journal of Speech Technology, 25, 1\u201324.","journal-title":"International Journal of Speech Technology"},{"issue":"4","key":"10024_CR7","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1016\/S0020-7373(72)80037-3","volume":"4","author":"DJ Broad","year":"1972","unstructured":"Broad, D. J. (1972). Formants in automatic speech recognition. International Journal of Man-Machine Studies, 4(4), 411\u2013424.","journal-title":"International Journal of Man-Machine Studies"},{"issue":"9","key":"10024_CR8","doi-asserted-by":"publisher","first-page":"1437","DOI":"10.1109\/5.628714","volume":"85","author":"JP Campbell","year":"1997","unstructured":"Campbell, J. P. (1997). Speaker recognition: A tutorial. Proceedings of the IEEE, 85(9), 1437\u20131462.","journal-title":"Proceedings of the IEEE"},{"issue":"3","key":"10024_CR9","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1016\/j.csl.2006.08.001","volume":"21","author":"C Cerisara","year":"2007","unstructured":"Cerisara, C., Demange, S., & Haton, J.-P. (2007). On noise masking for automatic missing data speech recognition: A survey and discussion. Computer Speech & Language, 21(3), 443\u2013457.","journal-title":"Computer Speech & Language"},{"key":"10024_CR10","doi-asserted-by":"crossref","unstructured":"Chauhan, N., Isshiki, T., & Li, D. (2020). Speaker recognition using fusion of features with feedforward artificial neural network and support vector machine. In: 2020 international conference on intelligent engineering and management (ICIEM) (pp. 170\u2013176).","DOI":"10.1109\/ICIEM48762.2020.9160269"},{"issue":"4","key":"10024_CR11","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"Davis, S., & Mermelstein, P. (1980). Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Transactions on Acoustics, Speech, and Signal Processing, 28(4), 357\u2013366.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"10024_CR12","doi-asserted-by":"publisher","first-page":"1432","DOI":"10.1109\/LSP.2022.3183538","volume":"29","author":"J Deng","year":"2022","unstructured":"Deng, J., Dong, L., Wang, R., Yang, R., & Yan, D. (2022). Decision-based attack to speaker recognition system via local low-frequency perturbation. IEEE Signal Processing Letters, 29, 1432\u20131436.","journal-title":"IEEE Signal Processing Letters"},{"key":"10024_CR13","series-title":"Informatikog Matematisk modelling","volume-title":"A new database for speaker recognition","author":"L Feng","year":"2005","unstructured":"Feng, L., & Hansen, L. K. (2005). A new database for speaker recognition. Informatikog Matematisk modellingDTU:IMM."},{"key":"10024_CR14","doi-asserted-by":"crossref","unstructured":"Firmansyah, M. R., Hidayat, R., & Bejo, A. (2021). Comparison of windowing function on feature extraction using mfcc for speaker identification. In: 2021 international conference on intelligent cybernetics technology & applications (ICICYTA) (pp. 1\u20135).","DOI":"10.1109\/ICICyTA53712.2021.9689160"},{"issue":"4","key":"10024_CR15","doi-asserted-by":"publisher","first-page":"441","DOI":"10.1007\/s10772-012-9140-x","volume":"15","author":"BC Haris","year":"2012","unstructured":"Haris, B. C., Pradhan, G., Misra, A., Prasanna, S. R., Das, R. K., & Sinha, R. (2012). Multivariability speaker recognition database in Indian scenario. International Journal of Speech Technology, 15(4), 441\u2013453.","journal-title":"International Journal of Speech Technology"},{"issue":"1","key":"10024_CR16","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1109\/PROC.1978.10837","volume":"66","author":"FJ Harris","year":"1978","unstructured":"Harris, F. J. (1978). On the use of windows for harmonic analysis with the discrete Fourier transform. Proceedings of the IEEE, 66(1), 51\u201383.","journal-title":"Proceedings of the IEEE"},{"key":"10024_CR17","volume-title":"Adaptive filter theory","author":"SS Haykin","year":"2005","unstructured":"Haykin, S. S. (2005). Adaptive filter theory. Pearson Education India."},{"issue":"1","key":"10024_CR18","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1109\/TASL.2006.876858","volume":"15","author":"RM Hegde","year":"2007","unstructured":"Hegde, R. M., Murthy, H. A., & Gadde, V. R. R. (2007). Significance of the modified group delay feature in speech recognition. IEEE Transactions on Audio, Speech, and Language Processing, 15(1), 190\u2013202.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"4","key":"10024_CR19","doi-asserted-by":"publisher","first-page":"578","DOI":"10.1109\/89.326616","volume":"2","author":"H Hermansky","year":"1994","unstructured":"Hermansky, H., & Morgan, N. (1994). Rasta processing of speech. IEEE Transactions on Speech and Audio Processing, 2(4), 578\u2013589.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"10024_CR20","unstructured":"Johnston, J. D. (1988). Estimation of perceptual entropy using noise masking criteria. In: Icassp-88., international conference on acoustics, speech, and signal processing (pp. 2524\u20132527)."},{"key":"10024_CR21","doi-asserted-by":"publisher","first-page":"1586","DOI":"10.1109\/TASLP.2022.3169977","volume":"30","author":"L Li","year":"2022","unstructured":"Li, L., Tong, F., & Hong, Q. (2022). When speaker recognition meets noisy labels: Optimizations for front-ends and back-ends. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 30, 1586\u20131599.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10024_CR22","volume-title":"Understanding digital signal processing","author":"RG Lyons","year":"2004","unstructured":"Lyons, R. G. (2004). Understanding digital signal processing (3rd ed.). Pearson Education India.","edition":"3"},{"issue":"4","key":"10024_CR23","doi-asserted-by":"publisher","first-page":"1085","DOI":"10.1109\/TASL.2011.2172422","volume":"20","author":"S Nakagawa","year":"2012","unstructured":"Nakagawa, S., Wang, L., & Ohtsuka, S. (2012). Speaker identification and verification by combining MFCC and phase information. IEEE Transactions on Audio, Speech and Language Processing, 20(4), 1085\u20131095.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"issue":"8","key":"10024_CR24","doi-asserted-by":"publisher","first-page":"625","DOI":"10.1109\/TSMC.1977.4309789","volume":"7","author":"SK Pal","year":"1977","unstructured":"Pal, S. K., & Majumder, D. D. (1977). Fuzzy sets and decision making approaches in vowel and speaker recognition. IEEE Transactions on Systems, Man, and Cybernetics, 7(8), 625\u2013629.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"issue":"2","key":"10024_CR25","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1007\/s10772-018-9515-8","volume":"21","author":"R Ram","year":"2018","unstructured":"Ram, R., & Mohanty, M. N. (2018). Performance analysis of adaptive variational mode decomposition approach for speech enhancement. International Journal of Speech Technology, 21(2), 369\u2013381.","journal-title":"International Journal of Speech Technology"},{"key":"10024_CR26","doi-asserted-by":"crossref","unstructured":"Ravanelli, M., & Bengio, Y. (2018). Speaker recognition from raw waveform with sincnet. In: 2018 IEEE spoken language technology workshop (SLT) (pp. 1021\u20131028).","DOI":"10.1109\/SLT.2018.8639585"},{"key":"10024_CR27","doi-asserted-by":"crossref","unstructured":"Reda, A., Panjwani, S., & Cutrell, E. (2011). Hyke: A low-cost remote attendance tracking system for developing regions. In: Proceedings of the 5th acm workshop on networked systems for developing regions (pp. 15\u201320). ACM.","DOI":"10.1145\/1999927.1999933"},{"issue":"2","key":"10024_CR28","doi-asserted-by":"publisher","first-page":"409","DOI":"10.1007\/s10772-021-09796-1","volume":"24","author":"FJ Reyes-D\u00edaz","year":"2021","unstructured":"Reyes-D\u00edaz, F. J., Hern\u00e1ndez-Sierra, G., & de Lara, J. R. C. (2021). Dnn and i-vector combined method for speaker recognition on multi-variability environments. International Journal of Speech Technology, 24(2), 409\u2013418.","journal-title":"International Journal of Speech Technology"},{"issue":"1\u20132","key":"10024_CR29","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1016\/0167-6393(95)00009-D","volume":"17","author":"DA Reynolds","year":"1995","unstructured":"Reynolds, D. A. (1995). Speaker identification and verification using Gaussian mixture speaker models. Speech Communication, 17(1\u20132), 91\u2013108.","journal-title":"Speech Communication"},{"issue":"10","key":"10024_CR30","doi-asserted-by":"publisher","first-page":"1671","DOI":"10.1109\/LSP.2015.2420092","volume":"22","author":"F Richardson","year":"2015","unstructured":"Richardson, F., Reynolds, D., & Dehak, N. (2015). Deep neural network approaches to speaker and language recognition. IEEE Signal Processing Letters, 22(10), 1671\u20131675.","journal-title":"IEEE Signal Processing Letters"},{"key":"10024_CR31","doi-asserted-by":"crossref","unstructured":"Sadjadi, S. O., Greenberg, C., Singer, E., Mason, L., & Reynolds, D. (2022). The 2021 nist speaker recognition evaluation. arXiv:2204.10242.","DOI":"10.21437\/Odyssey.2022-45"},{"key":"10024_CR32","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1007\/978-3-642-17641-8_18","volume":"13","author":"Z Saquib","year":"2010","unstructured":"Saquib, Z., Salam, N., Nair, R. P., Pandey, N., & Joshi, A. (2010). A survey on automatic speaker recognition systems. Signal Processing and Multimedia, 13, 134\u2013145.","journal-title":"Signal Processing and Multimedia"},{"issue":"1","key":"10024_CR33","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s10772-016-9385-x","volume":"20","author":"M Soleymanpour","year":"2017","unstructured":"Soleymanpour, M., & Marvi, H. (2017). Text-independent speaker identification based on selection of the most similar feature vectors. International Journal of Speech Technology, 20(1), 99\u2013108.","journal-title":"International Journal of Speech Technology"},{"issue":"2","key":"10024_CR34","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1109\/MCAS.2011.941079","volume":"11","author":"R Togneri","year":"2011","unstructured":"Togneri, R., & Pullella, D. (2011). An overview of speaker identification: Accuracy and robustness issues. IEEE Circuits and Systems Magazine, 11(2), 23\u201361.","journal-title":"IEEE Circuits and Systems Magazine"},{"key":"10024_CR35","unstructured":"Tyagi, V. (2006). Novel speech processing techniques for robust automatic speech recognition (Tech. Rep.). EPFL."},{"issue":"2","key":"10024_CR36","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1016\/j.joto.2020.12.001","volume":"16","author":"X Wang","year":"2021","unstructured":"Wang, X., & Xu, L. (2021). Speech perception in noise: Masking and unmasking. Journal of Otology, 16(2), 109\u2013119.","journal-title":"Journal of Otology"},{"issue":"6B","key":"10024_CR37","doi-asserted-by":"publisher","first-page":"2044","DOI":"10.1121\/1.1913065","volume":"51","author":"JJ Wolf","year":"1972","unstructured":"Wolf, J. J. (1972). Efficient acoustic parameters for speaker recognition. The Journal of the Acoustical Society of America, 51(6B), 2044\u20132056.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"10024_CR38","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10772-022-09973-w","volume":"25","author":"H Wu","year":"2022","unstructured":"Wu, H., Luo, L., Peng, H., & Wen, W. (2022). A method of multi-models fusion for speaker recognition. International Journal of Speech Technology, 25, 1\u20136.","journal-title":"International Journal of Speech Technology"},{"key":"10024_CR39","unstructured":"Yang, W., Dixon, M., & Yantorno, R. (1997). A modified bark spectral distortion measure which uses noise masking threshold. In: 1997 IEEE workshop on speech coding for telecommunications proceedings. back to basics: Attacking fundamental problems in speech coding (pp. 55\u201356)."},{"issue":"1","key":"10024_CR40","first-page":"49","volume":"16","author":"CH You","year":"2008","unstructured":"You, C. H., Lee, K. A., & Li, H. (2008). An svm kernel with gmm-supervector based on the Bhattacharyya distance for speaker recognition. IEEE Signal processing letters, 16(1), 49\u201352.","journal-title":"IEEE Signal processing letters"},{"key":"10024_CR41","doi-asserted-by":"crossref","unstructured":"Zhao, X., & Wang, D. (2013). Analyzing noise robustness of mfcc and gfcc features in speaker identification. In: 2013 IEEE international conference on acoustics, speech and signal processing (pp. 7204\u20137208).","DOI":"10.1109\/ICASSP.2013.6639061"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-023-10024-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-023-10024-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-023-10024-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,27]],"date-time":"2023-03-27T11:16:48Z","timestamp":1679915808000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-023-10024-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,19]]},"references-count":41,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,3]]}},"alternative-id":["10024"],"URL":"https:\/\/doi.org\/10.1007\/s10772-023-10024-1","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,2,19]]},"assertion":[{"value":"18 April 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 January 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 February 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}