{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T03:53:54Z","timestamp":1760586834982},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2017,4,11]],"date-time":"2017-04-11T00:00:00Z","timestamp":1491868800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Ambient Intell Human Comput"],"published-print":{"date-parts":[[2017,11]]},"DOI":"10.1007\/s12652-017-0482-8","type":"journal-article","created":{"date-parts":[[2017,4,11]],"date-time":"2017-04-11T07:13:20Z","timestamp":1491894800000},"page":"845-859","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Noise robust voice activity detection using joint phase and magnitude based feature enhancement"],"prefix":"10.1007","volume":"8","author":[{"given":"Khomdet","family":"Phapatanaburi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Longbiao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zeyan","family":"Oo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weifeng","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Seiichi","family":"Nakagawa","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masahiro","family":"Iwahashi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,4,11]]},"reference":[{"issue":"9","key":"482_CR1","doi-asserted-by":"crossref","first-page":"64","DOI":"10.1109\/35.620527","volume":"35","author":"A Benyassine","year":"1997","unstructured":"Benyassine A, Shlomot E, Su H-Y, Massaloux D, Lamblin C, Petit J-P (1997) Itu-t recommendation g. 729 annex b: a silence compression scheme for use with g. 729 optimized for v. 70 digital simultaneous voice and data applications. IEEE Commun Mag 35(9):64\u201373","journal-title":"IEEE Commun Mag"},{"issue":"6","key":"482_CR2","doi-asserted-by":"crossref","first-page":"1965","DOI":"10.1109\/TSP.2006.874403","volume":"54","author":"J-H Chang","year":"2006","unstructured":"Chang J-H, Kim NS, Mitra SK (2006) Voice activity detection based on multiple statistical models. IEEE Trans Signal Process 54(6):1965\u20131976","journal-title":"IEEE Trans Signal Process"},{"key":"482_CR3","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"SB Davis","year":"1980","unstructured":"Davis SB, Mermelstein P (1980) Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans Acoust Speech Signal Process 28:357\u2013366","journal-title":"IEEE Trans Acoust Speech Signal Process"},{"key":"482_CR4","doi-asserted-by":"crossref","unstructured":"Enqing D, Heming Z, YongLi L (2002) Low bit and variable rate speech coding using local cosine transform. In: TENCON\u201902. Proceedings. 2002 IEEE Region 10 Conference on Computers, Communications, Control and Power Engineering, pp 423\u2013426","DOI":"10.1109\/TENCON.2002.1181304"},{"key":"482_CR5","first-page":"1871","volume":"9","author":"RE Fan","year":"2008","unstructured":"Fan RE, Chang KW, Hsieh CJ (2008) LIBLINEAR: a library for large linear classification. J Mach Learn Res 9:1871\u20131874","journal-title":"J Mach Learn Res"},{"key":"482_CR6","doi-asserted-by":"crossref","unstructured":"Freeman D, Cosier G (1989) The voice activity detector for the Pan-European digital cellular mobile telephone service. In: 1989 international conference on acoustics, speech, and signal processing, 1989. ICASSP-89. pp 369\u2013372","DOI":"10.1109\/ICASSP.1989.266442"},{"key":"482_CR7","doi-asserted-by":"crossref","unstructured":"Hendriks RC (2010) MMSE based noise PSD tracking with low complexity. 2010 IEEE international conference on acoustics speech and signal processing (ICASSP), pp 4266\u20134269","DOI":"10.1109\/ICASSP.2010.5495680"},{"key":"482_CR8","doi-asserted-by":"crossref","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"G Hinton","year":"2006","unstructured":"Hinton G, Osindero S, Teh Y (2006) A fast learning algorithm for deep belief nets. Neural Comput 18:1527\u20131554","journal-title":"Neural Comput"},{"key":"482_CR9","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"GE Hinton","year":"2006","unstructured":"Hinton GE, Salakhutdinov RR (2006) Reducing the dimensionality of data with neural networks. Science 313:504\u2013507","journal-title":"Science"},{"key":"482_CR10","doi-asserted-by":"crossref","unstructured":"Junqua J, Reaves B, Mak B (1991) A study of endpoint detection algorithms in adverse conditions: incidence on a DTW and HMM recognizer. In: Second European conference on speech communication and technology","DOI":"10.21437\/Eurospeech.1991-313"},{"key":"482_CR11","doi-asserted-by":"crossref","unstructured":"Kim C, Stern RM (2012) Power-normalized cepstral coefficients (PNCC) for robust speech recognition. In: 2012 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 4101\u20134104","DOI":"10.1109\/ICASSP.2012.6288820"},{"key":"482_CR12","unstructured":"Kinnunen T, Chernenko E (2007) Voice activity detection using MFCC features and support vector machine. In: Conf. on Speech and Computer (SPECOM07), Moscow, Russia, pp 556\u2013561"},{"key":"482_CR13","doi-asserted-by":"crossref","first-page":"363","DOI":"10.1250\/ast.30.363","volume":"30","author":"N Kitaoka","year":"2009","unstructured":"Kitaoka N, Yamada T, Tsuge S (2009) CENSREC-1-C: an evaluation framework for voice activity detection under noisy environments. Acoust Sci Technol 30:363\u2013371","journal-title":"Acoust Sci Technol"},{"key":"482_CR14","doi-asserted-by":"crossref","unstructured":"Lu X, Tsao Y, Matsuda S, Hori C (2013) Speech enhancement based on deep denoising autoencoder. In: INTERSPEECH, pp 436\u2013440","DOI":"10.21437\/Interspeech.2013-130"},{"key":"482_CR15","doi-asserted-by":"crossref","unstructured":"Malah D, Cox RV, Accardi AJ (1999) Tracking speech-presence uncertainty to improve speech enhancement in non-stationary noise environments. In: 1999 IEEE international conference on acoustics, speech, and signal processing, 1999. Proceedings, pp 789\u2013792","DOI":"10.1109\/ICASSP.1999.759789"},{"key":"482_CR16","doi-asserted-by":"crossref","first-page":"2026","DOI":"10.1109\/TASL.2011.2109379","volume":"19","author":"I McCowan","year":"2011","unstructured":"McCowan I, Dean D (2011) The delta-phase spectrum with application to voice activity detection and speaker recognition. IEEE Trans Audio Speech Lang Process 19:2026\u20132038","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"482_CR17","doi-asserted-by":"crossref","first-page":"1085","DOI":"10.1109\/TASL.2011.2172422","volume":"20","author":"S Nakagawa","year":"2012","unstructured":"Nakagawa S, Wang L, Ohtsuka S (2012) Speaker identification and verification by combining MFCC and phase information. IEEE Trans Audio Speech Lang Process 20:1085\u20131095","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"482_CR18","unstructured":"Povey D, Ghoshal A (2011) The Kaldi speech recognition toolkit. In: IEEE 2011 workshop on automatic speech recognition and understanding (No. EPFL-CONF-192584). IEEE Signal Processing Society"},{"issue":"9","key":"482_CR19","doi-asserted-by":"crossref","first-page":"5093","DOI":"10.1007\/s11042-015-2849-1","volume":"75","author":"B Ren","year":"2016","unstructured":"Ren B, Wang L, Lu L, Ueda Y, Kai A (2016) Combination of bottleneck feature extraction and dereverberation for distant-talking speech recognition. Multimed Tools Appl 75(9):5093\u20135108","journal-title":"Multimed Tools Appl"},{"key":"482_CR20","doi-asserted-by":"crossref","unstructured":"Ryant N, Liberman M, Yuan J (2013) Speech activity detection on youtube using deep neural networks. In: INTERSPEECH, pp 728\u2013731","DOI":"10.21437\/Interspeech.2013-203"},{"key":"482_CR21","doi-asserted-by":"crossref","unstructured":"Seltzer ML, Yu D, Wang Y (2013) An investigation of deep neural networks for noise robust speech recognition. In: 2013 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 7398\u20137402","DOI":"10.1109\/ICASSP.2013.6639100"},{"key":"482_CR22","doi-asserted-by":"crossref","unstructured":"Tanrikulu O (1997) Residual echo signal in critically sampled subband acoustic echo cancellers based on IIR and FIR filter banks. IEEE Trans Signal Process 45:901\u2013912","DOI":"10.1109\/78.564178"},{"key":"482_CR23","doi-asserted-by":"crossref","unstructured":"Tong R, Ma B, Lee KA, You C (2006) The IIR NIST 2006 Speaker Recognition System: Fusion of Acoustic and Tokenization Features. In: Presentation in 5th Int. Symp. on Chinese Spoken Language Processing, ISCSLP","DOI":"10.1007\/11939993_59"},{"key":"482_CR24","doi-asserted-by":"crossref","first-page":"377","DOI":"10.1049\/ip-i-2.1992.0052","volume":"I","author":"R Tucker","year":"1992","unstructured":"Tucker R (1992) Voice activity detection using a periodicity measure. IEE Proc Commu Speech Vis I:377\u2013380","journal-title":"IEE Proc Commu Speech Vis"},{"issue":"92","key":"482_CR25","first-page":"1","volume":"2015","author":"Y Ueda","year":"2015","unstructured":"Ueda Y, Wang L, Kai A, Ren B (2015) Environment-dependent denoising autoencoder for distant-talking speech recognition. EURASIP J Adv Signal Process 2015(92):1\u201311","journal-title":"EURASIP J Adv Signal Process"},{"key":"482_CR26","doi-asserted-by":"crossref","unstructured":"Wang L, Minami K, Yamamoto K, Nakagawa S (2010) Speaker identification by combining MFCC and phase information in noisy environments. In: 2010 IEEE international conference on acoustics speech and signal processing (ICASSP), pp 4502\u20134505","DOI":"10.1109\/ICASSP.2010.5495586"},{"key":"482_CR27","doi-asserted-by":"crossref","unstructured":"Wang L, Ren B, Ueda Y (2014) Denoising autoencoder and environment adaptation for distant-talking speech recognition with asynchronous speech recording. In: asia-pacific signal and information processing association, 2014 annual summit and conference (APSIPA), pp 1\u20135","DOI":"10.1109\/APSIPA.2014.7041548"},{"key":"482_CR28","doi-asserted-by":"crossref","unstructured":"Wang L, Yoshida Y, Kawakami Y, Nakagawa S (2015) Relative phase information for detecting human speech and spoofed speech. In: INTERSPEECH, pp 2092\u20132096","DOI":"10.21437\/Interspeech.2015-473"},{"key":"482_CR29","doi-asserted-by":"crossref","unstructured":"Williamson DS, Wang Y, Wang D (2016a) Complex ratio masking for joint enhancement of magnitude and phase. In: 2016 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 5220\u20135224","DOI":"10.1109\/ICASSP.2016.7472673"},{"issue":"3","key":"482_CR30","doi-asserted-by":"crossref","first-page":"483","DOI":"10.1109\/TASLP.2015.2512042","volume":"24","author":"DS Williamson","year":"2016","unstructured":"Williamson DS, Wang Y, Wang D (2016b) Complex ratio masking for monaural speech separation. IEEE\/ACM Trans Audio Speech Lang Process 24(3):483\u2013492","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"482_CR31","doi-asserted-by":"crossref","first-page":"466","DOI":"10.1109\/LSP.2011.2159374","volume":"18","author":"J Wu","year":"2011","unstructured":"Wu J, Zhang X (2011) Efficient multiple kernel support vector machine based voice activity detection. IEEE Signal Process Lett 18:466\u2013469","journal-title":"IEEE Signal Process Lett"},{"key":"482_CR32","doi-asserted-by":"crossref","unstructured":"Xia B, Bao C (2013) Speech enhancement with weighted denoising auto-encoder. In: INTERSPEECH, pp 3444\u20133448","DOI":"10.21437\/Interspeech.2013-754"},{"key":"482_CR33","unstructured":"Xiao, X. (2016). SignalGraph. https:\/\/github.com\/singaxiong\/SignalGraph"},{"key":"482_CR34","unstructured":"Xiao X, Zhao S, Nguyen DHH (2014) The NTU-ADSC systems for reverberation challenge 2014. In: Proc, REVERB challenge workshop"},{"key":"482_CR35","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1109\/TASLP.2014.2364452","volume":"23","author":"Y Xu","year":"2015","unstructured":"Xu Y, Du J, Dai L, Lee C (2015) A regression approach to speech enhancement based on deep neural networks. IEEE\/ACM Trans Audio Speech Lang Process 23:7\u201319","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"482_CR36","doi-asserted-by":"crossref","unstructured":"Xu Y, Du J, Dai LR, Lee CH (2014) Dynamic noise aware training for speech enhancement based on deep neural networks. In: INTERSPEECH, pp 2670\u20132674","DOI":"10.21437\/Interspeech.2014-571"},{"issue":"8","key":"482_CR37","doi-asserted-by":"crossref","first-page":"2624","DOI":"10.1109\/TASL.2011.2125953","volume":"19","author":"D Ying","year":"2011","unstructured":"Ying D, Yan Y, Dang J, Soong FK (2011) Voice activity detection based on an unsupervised learning framework. IEEE Trans Audio Speech Lang Process 19(8):2624\u20132633","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"2","key":"482_CR38","doi-asserted-by":"crossref","first-page":"252","DOI":"10.1109\/TASLP.2015.2505415","volume":"24","author":"X-L Zhang","year":"2016","unstructured":"Zhang X-L, Wang D (2016) Boosting contextual information for deep neural network based voice activity detection. IEEE\/ACM Trans Audio Speech Lang Process 24(2):252\u2013264","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"482_CR39","doi-asserted-by":"crossref","first-page":"697","DOI":"10.1109\/TASL.2012.2229986","volume":"21","author":"XL Zhang","year":"2013","unstructured":"Zhang XL, Wu J (2013a) Deep belief networks based voice activity detection. IEEE Trans Audio Speech Lang Process 21:697\u2013710","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"482_CR40","doi-asserted-by":"crossref","unstructured":"Zhang XL, Wu J (2013b) Denoising deep neural networks based voice activity detection. In: 2013 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 853\u2013857","DOI":"10.1109\/ICASSP.2013.6637769"},{"key":"482_CR41","doi-asserted-by":"crossref","unstructured":"Zou YX, Zheng WQ, Shi W, Liu H (2014) Improved voice activity detection based on support vector machine with high separable speech feature vectors. In: 2014 19th international conference on digital signal processing (DSP), pp 763\u2013767","DOI":"10.1109\/ICDSP.2014.6900767"}],"container-title":["Journal of Ambient Intelligence and Humanized Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s12652-017-0482-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12652-017-0482-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12652-017-0482-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,23]],"date-time":"2023-08-23T02:46:53Z","timestamp":1692758813000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s12652-017-0482-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,4,11]]},"references-count":41,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2017,11]]}},"alternative-id":["482"],"URL":"https:\/\/doi.org\/10.1007\/s12652-017-0482-8","relation":{},"ISSN":["1868-5137","1868-5145"],"issn-type":[{"value":"1868-5137","type":"print"},{"value":"1868-5145","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,4,11]]}}}