{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T16:15:54Z","timestamp":1772727354777,"version":"3.50.1"},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2024,10,5]],"date-time":"2024-10-05T00:00:00Z","timestamp":1728086400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,5]],"date-time":"2024-10-05T00:00:00Z","timestamp":1728086400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s10772-024-10149-x","type":"journal-article","created":{"date-parts":[[2024,10,5]],"date-time":"2024-10-05T11:02:08Z","timestamp":1728126128000},"page":"987-995","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Integrated noise suppression techniques for enhancing voice activity detection in degraded environments"],"prefix":"10.1007","volume":"27","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8930-4235","authenticated-orcid":false,"given":"M. R.","family":"Prasad","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1423-584X","authenticated-orcid":false,"given":"Sharana Basavana","family":"Gowda","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9721-9540","authenticated-orcid":false,"given":"Manjunath B.","family":"Talawar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1298-0152","authenticated-orcid":false,"given":"N.","family":"Jagadisha","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,10,5]]},"reference":[{"key":"10149_CR1","unstructured":"Cheng, M., & Li, M. (2024). Multi-input multi-output target-speaker voice activity detection for unified, flexible, and robust audio-visual speaker diarization. arXiv preprint arXiv:2401.08052"},{"issue":"2","key":"10149_CR2","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1109\/LSP.2015.2495219","volume":"23","author":"T Drugman","year":"2015","unstructured":"Drugman, T., Stylianou, Y., Kida, Y., & Akamine, M. (2015). Voice activity detection: Merging source and filter-based information. IEEE Signal Processing Letters, 23(2), 252\u2013256.","journal-title":"IEEE Signal Processing Letters"},{"issue":"3","key":"10149_CR3","doi-asserted-by":"publisher","first-page":"646","DOI":"10.1109\/TASLP.2017.2769220","volume":"26","author":"G Gelly","year":"2017","unstructured":"Gelly, G., & Gauvain, J.-L. (2017). Optimization of rnn-based speech activity detection. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 26(3), 646\u2013656.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"1\u20132","key":"10149_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1504\/IJSISE.2020.113552","volume":"12","author":"SJ Jainar","year":"2020","unstructured":"Jainar, S. J., Sale, P. L., & Nagaraja, B. G. (2020). VAD, feature extraction and modelling techniques for speaker recognition: A review. International Journal of Signal and Imaging Systems Engineering, 12(1\u20132), 1\u201318.","journal-title":"International Journal of Signal and Imaging Systems Engineering"},{"key":"10149_CR5","doi-asserted-by":"crossref","unstructured":"Kinnunen, T., & Rajan, P. (2013). A practical, self-adaptive voice activity detector for speaker verification with noisy telephone and microphone data. In 2013 IEEE international conference on acoustics, speech and signal processing (pp. 7229\u20137233). IEEE.","DOI":"10.1109\/ICASSP.2013.6639066"},{"key":"10149_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2022.104408","volume":"80","author":"Y Korkmaz","year":"2023","unstructured":"Korkmaz, Y., & Boyac\u0131, A. (2023). Hybrid voice activity detection system based on lstm and auditory speech features. Biomedical Signal Processing and Control, 80, 104408.","journal-title":"Biomedical Signal Processing and Control"},{"issue":"02","key":"10149_CR7","doi-asserted-by":"publisher","first-page":"2150017","DOI":"10.1142\/S0219477521500176","volume":"20","author":"B Kumar","year":"2021","unstructured":"Kumar, B. (2021). Comparative performance evaluation of greedy algorithms for speech enhancement system. Fluctuation and Noise Letters, 20(02), 2150017.","journal-title":"Fluctuation and Noise Letters"},{"key":"10149_CR8","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2024.101639","volume":"87","author":"T Lavanya","year":"2024","unstructured":"Lavanya, T., Vijayalakshmi, P., Mrinalini, K., & Nagarajan, T. (2024). Higher order statistics-driven magnitude and phase spectrum estimation for speech enhancement. Computer Speech & Language, 87, 101639.","journal-title":"Computer Speech & Language"},{"issue":"1","key":"10149_CR9","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1109\/TASL.2010.2045180","volume":"19","author":"PC Loizou","year":"2010","unstructured":"Loizou, P. C., & Kim, G. (2010). Reasons why current speech-enhancement algorithms do not improve speech intelligibility and suggested solutions. IEEE Transactions on Audio, Speech, and Language Processing, 19(1), 47\u201356.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"5","key":"10149_CR10","doi-asserted-by":"publisher","first-page":"3387","DOI":"10.1121\/1.3097493","volume":"125","author":"J Ma","year":"2009","unstructured":"Ma, J., Yi, H., & Loizou, P. C. (2009). Objective measures for predicting speech intelligibility in noisy conditions based on new band-importance functions. The Journal of the Acoustical Society of America, 125(5), 3387\u20133405.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"10149_CR11","doi-asserted-by":"publisher","unstructured":"Mitra, V., Franco, H., Stern, R. M., Van Hout, J., Ferrer, L., Graciarena, M., Wang, W., Vergyri, D., Alwan, A., & Hansen, J. H. (2017). Robust features in deep-learning-based speech recognition. In New era for robust speech recognition: Exploiting deep learning, (pp. 187\u2013217).  https:\/\/doi.org\/10.1007\/978-3-319-64680-0_8","DOI":"10.1007\/978-3-319-64680-0_8"},{"issue":"3","key":"10149_CR12","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1515\/jisys-2013-0038","volume":"22","author":"BG Nagaraja","year":"2013","unstructured":"Nagaraja, B. G., & Jayanna, H. S. (2013). Multilingual speaker identification by combining evidence from LPR and multitaper MFCC. Journal of Intelligent Systems, 22(3), 241\u2013251.","journal-title":"Journal of Intelligent Systems"},{"key":"10149_CR13","doi-asserted-by":"crossref","unstructured":"Nagaraja, B. G., & Jayanna, H. S. (2013). Combination of features for crosslingual speaker identification with the constraint of limited data. In Proceedings of the 4th international conference on signal and image processing 2012 (ICSIP 2012) (Vol. 1, pp. 143\u2013148). Springer.","DOI":"10.1007\/978-81-322-0997-3_13"},{"key":"10149_CR14","doi-asserted-by":"publisher","unstructured":"Nagaraja, B. G., Yadava, G. T., Kabballi, P., & Patil, C. M. (2024). VAD system under uncontrolled environment: A solution for strengthening the noise robustness using MMSE-SPZC. International Journal of Speech Technology, Advance online publication. https:\/\/doi.org\/10.1007\/s10772-024-10104-w","DOI":"10.1007\/s10772-024-10104-w"},{"issue":"9","key":"10149_CR15","doi-asserted-by":"publisher","first-page":"14","DOI":"10.5815\/ijigsp.2013.09.03","volume":"5","author":"BG Nagaraja","year":"2013","unstructured":"Nagaraja, B. G., & Jayanna, H. S. (2013). Kannada language parameters for speaker identification with the constraint of limited data. International Journal of Image, Graphics and Signal Processing, 5(9), 14.","journal-title":"International Journal of Image, Graphics and Signal Processing"},{"key":"10149_CR16","unstructured":"Opochinsky, R., Moradi, M., & Gannot, S. (2024). Single-microphone speaker separation and voice activity detection in noisy and reverberant environments. arXiv preprint arXiv:2401.03448."},{"key":"10149_CR17","doi-asserted-by":"publisher","unstructured":"Ozturk, M. Z., Wu, C., Wang, B., Wu, M., & Liu, K. R. (2024). Radiovad: mmwave-based noise and interference-resilient voice activity detection. IEEE Internet of Things Journal. https:\/\/doi.org\/10.1109\/JIOT.2024.3394353","DOI":"10.1109\/JIOT.2024.3394353"},{"key":"10149_CR18","unstructured":"Pritam, L. S., Jainar, S. J., & Nagaraja, B. G. (2018). A comparison of features for multilingual speaker identification\u2014A review and some experimental results. International Journal of Recent Technology and Engineering, 7(4s2), 299\u2013304. December 2019."},{"key":"10149_CR19","doi-asserted-by":"publisher","DOI":"10.1016\/j.iswa.2023.200310","volume":"21","author":"GP Raghudathesh","year":"2024","unstructured":"Raghudathesh, G. P., Chandrakala, C. B., Rao, D., & Yadava, T. (2024). Noise estimation based on optimal smoothing and minimum controlled through recursive averaging for speech enhancement. Intelligent Systems with Applications, 21, 200310.","journal-title":"Intelligent Systems with Applications"},{"key":"10149_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.csl.2019.06.005","volume":"59","author":"Z-H Tan","year":"2020","unstructured":"Tan, Z.-H., Dehak, N., et al. (2020). rVAD: An unsupervised segment-based robust voice activity detection method. Computer Speech & Language, 59, 1\u201321.","journal-title":"Computer Speech & Language"},{"issue":"7","key":"10149_CR21","doi-asserted-by":"publisher","first-page":"4041","DOI":"10.1007\/s00034-022-01973-0","volume":"41","author":"G Thimmaraja Yadava","year":"2022","unstructured":"Thimmaraja Yadava, G., Nagaraja, B. G., & Jayanna, H. S. (2022). Enhancements in continuous Kannada ASR system by background noise elimination. Circuits, Systems, and Signal Processing, 41(7), 4041\u20134067.","journal-title":"Circuits, Systems, and Signal Processing"},{"issue":"1","key":"10149_CR22","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1007\/s10772-020-09786-9","volume":"24","author":"YG Thimmaraja","year":"2021","unstructured":"Thimmaraja, Y. G., Nagaraja, B. G., & Jayanna, H. S. (2021). Speech enhancement and encoding by combining SS-VAD and LPC. International Journal of Speech Technology, 24(1), 165\u2013172.","journal-title":"International Journal of Speech Technology"},{"key":"10149_CR23","doi-asserted-by":"crossref","unstructured":"Wang, D., Xiao, X., Kanda, N., Yousefi, M., Yoshioka, T., & Wu, J. (2024). Profile-error-tolerant target-speaker voice activity detection. In 2024 IEEE international conference on acoustics, speech and signal processing (ICASSP 2024) (pp. 11906\u201311910). IEEE.","DOI":"10.1109\/ICASSP48485.2024.10446475"},{"key":"10149_CR24","doi-asserted-by":"publisher","first-page":"31238","DOI":"10.1109\/ACCESS.2023.3262518","volume":"11","author":"M Wenpeng","year":"2023","unstructured":"Wenpeng, M., & Liu, B. (2023). Voice activity detection optimized by adaptive attention span transformer. IEEE Access, 11, 31238\u201331243.","journal-title":"IEEE Access"},{"key":"10149_CR25","doi-asserted-by":"crossref","unstructured":"Yadava, G. T., Nagaraja, B. G., & Jayanna, H. S. (2022). Performance evaluation of spectral subtraction with vad and time\u2013frequency filtering for speech enhancement. In Emerging research in computing, information, communication and applications: Proceedings of ERCICA 2022 (pp. 407\u2013414). Springer.","DOI":"10.1007\/978-981-19-5482-5_35"},{"issue":"5","key":"10149_CR26","first-page":"1123","volume":"19","author":"L Yang","year":"2010","unstructured":"Yang, L., & Loizou, P. C. (2010). Estimators of the magnitude-squared spectrum and methods for incorporating snr uncertainty. IEEE Transactions on Audio, Speech, and Language Processing, 19(5), 1123\u20131137.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"1","key":"10149_CR27","first-page":"229","volume":"16","author":"H Yi","year":"2007","unstructured":"Yi, H., & Loizou, P. C. (2007). Evaluation of objective quality measures for speech enhancement. IEEE Transactions on Audio, Speech, and Language Processing, 16(1), 229\u2013238.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"2","key":"10149_CR28","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1080\/17445302.2022.2159288","volume":"19","author":"W Zhang","year":"2024","unstructured":"Zhang, W., Liu, X., Han, D., Zhang, Q., & Yang, J. (2024). Voice activity detection for audio signal of voyage data recorder using residue network and attention mechanism. Ships and Offshore Structures, 19(2), 243\u2013251.","journal-title":"Ships and Offshore Structures"},{"issue":"1","key":"10149_CR29","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1186\/s13636-022-00260-9","volume":"2022","author":"X-L Zhang","year":"2022","unstructured":"Zhang, X.-L., & Menglong, X. (2022). AUC optimization for deep learning-based voice activity detection. EURASIP Journal on Audio, Speech, and Music Processing, 2022(1), 27.","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-024-10149-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-024-10149-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-024-10149-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T10:06:49Z","timestamp":1734343609000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-024-10149-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,5]]},"references-count":29,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["10149"],"URL":"https:\/\/doi.org\/10.1007\/s10772-024-10149-x","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,5]]},"assertion":[{"value":"29 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 September 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 October 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors do not have Conflict of interest on the manuscript.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}