{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T17:51:47Z","timestamp":1740160307492,"version":"3.37.3"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2018,8,1]],"date-time":"2018-08-01T00:00:00Z","timestamp":1533081600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2019,7]]},"DOI":"10.1007\/s13042-018-0856-z","type":"journal-article","created":{"date-parts":[[2018,8,1]],"date-time":"2018-08-01T07:24:28Z","timestamp":1533108268000},"page":"1791-1803","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A voice activity detection algorithm in spectro-temporal domain using sparse representation"],"prefix":"10.1007","volume":"10","author":[{"given":"Mohadese","family":"Eshaghi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4970-8117","authenticated-orcid":false,"given":"Farbod","family":"Razzazi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alireza","family":"Behrad","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,8,1]]},"reference":[{"key":"856_CR1","doi-asserted-by":"crossref","unstructured":"Freeman DK, Cosier G, Southcott CB, Boyd I (1989) The voice activity detector for the pan European digital cellular mobile telephone service. In: International conference on acoustics, speech, and signal processing, Glascow, May 1989, pp\u00a0369\u2013372","DOI":"10.1109\/ICASSP.1989.266442"},{"key":"856_CR2","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TASL.2010.2052803","volume":"19","author":"PK Ghosh","year":"2011","unstructured":"Ghosh PK, Tsiartas A, Narayanan S (2011) Robust voice activity detection using long-term signal variability. IEEE Trans Audio Speech Lang Process 19:600\u2013613","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"856_CR3","unstructured":"Datao Y, Jiqing H, Guibin Z, Tieran Z (2012) Sparse power spectrum based robust voice activity detector. In: IEEE international conference on acoustics, speech and signal processing (ICASSP), March 2012, pp 289\u2013292"},{"key":"856_CR4","doi-asserted-by":"crossref","unstructured":"Hongzhi W, Yuchao X, Meijing L (2011) Study on the MFCC similarity-based voice activity detection algorithm. In: International conference on artificial intelligence, management science and electronic commerce (AIMSEC), August 2011, pp\u00a04391\u20134394","DOI":"10.1109\/AIMSEC.2011.6009945"},{"key":"856_CR5","first-page":"709","volume-title":"All for one: feature combination for highly channel-degraded speech activity detection","author":"G Martin","year":"2013","unstructured":"Martin G, Abeer A, Dan E et al (2013) All for one: feature combination for highly channel-degraded speech activity detection. INTERSPEECH, Lyon, pp\u00a0709\u2013713"},{"issue":"1","key":"856_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/97.736233","volume":"6","author":"J Sohn","year":"1999","unstructured":"J. Sohn, N. S. Kim, and W. Sung (1999) A statistical model-based voice activity detection. IEEE Signal Process Lett 6(1):1\u20133","journal-title":"IEEE Signal Process Lett"},{"issue":"10","key":"856_CR7","doi-asserted-by":"publisher","first-page":"276","DOI":"10.1109\/97.957270","volume":"8","author":"YD Cho","year":"2001","unstructured":"Cho YD, Kondoz A (2001) Analysis and improvement of a statistical model-based voice activity detector. IEEE Signal Process Lett 8(10):276\u2013278","journal-title":"IEEE Signal Process Lett"},{"issue":"3","key":"856_CR8","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1109\/97.995824","volume":"9","author":"F Beritelli","year":"2002","unstructured":"Beritelli F, Casale S, Ruggeri G, Serrano S (2002) Performance evaluation and comparison of G.729\/AMR\/fuzzy voice activity detectors. IEEE Signal Process Lett 9(3):85\u201388","journal-title":"IEEE Signal Process Lett"},{"issue":"3","key":"856_CR9","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1109\/89.905996","volume":"9","author":"E Nemer","year":"2001","unstructured":"Nemer E, Goubran R, Mahmoud S (2001) Robust voice activity detection using higher-order statistics in the LPC residual domain. IEEE Trans Speech Audio Process Lett 9(3):217\u2013231","journal-title":"IEEE Trans Speech Audio Process Lett"},{"issue":"9","key":"856_CR10","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1109\/35.620527","volume":"35","author":"AE Benyassine","year":"1997","unstructured":"Benyassine AE, Shlomot HY, Su D, Massaloux C, Lamblin, Petit JP (1997) ITU-T Recommendation G.729 Annex B: a silence compression scheme for use with G.729 optimized for V.70 digital simultaneous voice and data applications. IEEE Commun Mag Lett 35(9):64\u201373","journal-title":"IEEE Commun Mag Lett"},{"issue":"4","key":"856_CR11","doi-asserted-by":"publisher","first-page":"1102","DOI":"10.1016\/j.dsp.2009.11.008","volume":"20","author":"M Eshaghi","year":"2010","unstructured":"Eshaghi M, Karami MR, Mollaei (2010) Voice activity detection based on using wavelet packet. Digital Signal Process Lett 20(4):1102\u20131115","journal-title":"Digital Signal Process Lett"},{"issue":"7","key":"856_CR12","doi-asserted-by":"publisher","first-page":"674","DOI":"10.1109\/34.192463","volume":"11","author":"SG Mallat","year":"1989","unstructured":"Mallat SG (1989) A theory for multiresolution signal decomposition: the wavelet representation. IEEE Trans Pattern Anal Mach Intell Lett 11(7):674\u2013693","journal-title":"IEEE Trans Pattern Anal Mach Intell Lett"},{"key":"856_CR13","first-page":"042357","volume":"3","author":"N Mesgarani","year":"2007","unstructured":"Mesgarani N, Shamma S (2007) Denoising in the domain of spectro-temporal modulations. EURASIP J Audio Speech Music Process 2007(3):042357","journal-title":"EURASIP J Audio Speech Music Process"},{"key":"856_CR14","doi-asserted-by":"publisher","first-page":"681","DOI":"10.1109\/LSP.2013.2245894","volume":"20","author":"W Li","year":"2013","unstructured":"Li W, Zhou Y, Poh N, Zhou F, Liao Q (2013) Feature denoising using joint sparse representation for in-car speech recognition. IEEE Signal Process Lett 20:681\u2013684","journal-title":"IEEE Signal Process Lett"},{"key":"856_CR15","doi-asserted-by":"crossref","unstructured":"Mesgarani N, David S, Shamma SA (2007) Representation of phoneme in primary auditory cortex: how the brain analyzes speech. In: IEEE international conference on acoustic, speech and signal processing (ICASSP), vol 4. April 2007, Hawai, pp 765\u2013768","DOI":"10.1109\/ICASSP.2007.367025"},{"key":"856_CR16","unstructured":"Mirbagheri M, Mesgarani N, Shamma S (2010) Nonlinear filtering of spectrotemporal modulation in speech enhancement. In: IEEE international conference on acoustic, speech and signal processing (ICASSP), vol\u00a06. March 2010, pp 5478\u20135481"},{"key":"856_CR17","doi-asserted-by":"crossref","unstructured":"Kim C, Kumar K, Stern RM (2011) Binaural sound source separation motivated by auditory processing. In: IEEE international conference on acoustic, speech and signal processing (ICASSP), vol 5. May 2011, Prague, pp 5072\u20135075","DOI":"10.1109\/ICASSP.2011.5947497"},{"key":"856_CR18","doi-asserted-by":"publisher","first-page":"336","DOI":"10.1016\/j.csl.2012.02.002","volume":"26","author":"C Mart\u00ednez","year":"2012","unstructured":"Mart\u00ednez C, Goddardb J, Milone D, Rufiner H (2012) Bio inspired sparse spectro-temporal representation of speech for robust classification. Comput Speech Lang 26:336\u2013348","journal-title":"Comput Speech Lang"},{"key":"856_CR19","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1109\/JSTSP.2009.2039171","volume":"4","author":"JF Gemmeke","year":"2010","unstructured":"Gemmeke JF, Van Hamme H, Cranen B, Boves L (2010) Compressive sensing for missing data imputation in noise robust speech recognition. IEEE J Sel Topics Signal Process 4:273\u2013282","journal-title":"IEEE J Sel Topics Signal Process"},{"key":"856_CR20","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1137\/S0097539792240406","volume":"24","author":"BK Natarajan","year":"1995","unstructured":"Natarajan BK (1995) Sparse approximate solutions to linear systems. SIAM J Comput 24:227\u2013234","journal-title":"SIAM J Comput"},{"key":"856_CR21","doi-asserted-by":"publisher","first-page":"462","DOI":"10.1016\/j.csl.2010.06.004","volume":"25","author":"J Gemmeke","year":"2011","unstructured":"Gemmeke J, Cranen B, Remes U (2011) Sparse imputation for large vocabulary noise robust ASR. Comput Speech Lang 25:462\u2013479","journal-title":"Comput Speech Lang"},{"key":"856_CR22","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1109\/TSP.2008.2007606","volume":"57","author":"GH Mohimani","year":"2009","unstructured":"Mohimani GH, Babaie-Zadeh M, Jutten C (2009) A fast approach for overcomplete sparse decomposition based on smoothed L0 norm. IEEE Trans Signal Process 57:289\u2013301","journal-title":"IEEE Trans Signal Process"},{"key":"856_CR23","doi-asserted-by":"publisher","first-page":"349","DOI":"10.1162\/089976603762552951","volume":"15","author":"K Kreutz-Delgado","year":"2003","unstructured":"Kreutz-Delgado K, Murray JF, Rao BD, Engan K, Lee T, Sejnowski TJ (2003) Dictionary learning algorithms for sparse representation. Neural Comput 15:349\u2013396","journal-title":"Neural Comput"},{"key":"856_CR24","doi-asserted-by":"publisher","first-page":"4311","DOI":"10.1109\/TSP.2006.881199","volume":"54","author":"M Aharon","year":"2006","unstructured":"Aharon M, Elad M, Bruckstein A (2006) K-svd: a algorithm for designing over complete dictionaries for sparse representation. IEEE Trans Signal Process 54:4311\u20134322","journal-title":"IEEE Trans Signal Process"},{"key":"856_CR25","doi-asserted-by":"publisher","first-page":"2309","DOI":"10.1016\/j.neucom.2007.01.013","volume":"71","author":"R Zdunek","year":"2007","unstructured":"Zdunek R, Cichocki A (2007) Non-negative matrix factorization with quadratic programming. Neurocomputing 71:2309\u20132320","journal-title":"Neurocomputing"},{"key":"856_CR26","unstructured":"Fisher WM, Doddington GR, Goudie M, Kathleen M (1986) The DARPA speech recognition research database: specifications and status. In: Proceedings of DARPA workshop on speech recognition, February 1986,\u00a0Palo. AJeo, pp 93\u201399"},{"issue":"3","key":"856_CR27","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1016\/0167-6393(93)90095-3","volume":"12","author":"A Varga","year":"1993","unstructured":"Varga A, Steeneken HJM (1993) Assessment for automatic speech recognition: II. NOISEX-92: a database and an experiment to study the effect of additive noise on speech recognition systems. Speech Commun 12(3):247\u2013251","journal-title":"Speech Commun"},{"key":"856_CR28","doi-asserted-by":"crossref","unstructured":"Raj B, Virtanen T, Chaudhure S, Singh R (2010) Non-negative matrix factorization based compensation of music for automatic speech recognition. In: Proceedings of international conference on speech and language processing, Makuhari, pp 717\u2013720","DOI":"10.21437\/Interspeech.2010-268"},{"issue":"1","key":"856_CR29","first-page":"601","volume":"4","author":"N Mesgarani","year":"2004","unstructured":"Mesgarani N, Shamma S, Slaney M (2004) Speech discrimination based on multiscale spectro-temporal modulations. Proc IEEE Int Conf Acoust Speech Signal Process 4(1):601\u2013604","journal-title":"Proc IEEE Int Conf Acoust Speech Signal Process"},{"issue":"9","key":"856_CR30","doi-asserted-by":"publisher","first-page":"1424","DOI":"10.1109\/TASLP.2014.2335055","volume":"22","author":"IV McLoughlin","year":"2014","unstructured":"McLoughlin IV (2014) Super-audible voice activity detection. IEEE Trans Speech Audio Process Lett 22(9):1424\u20131433","journal-title":"IEEE Trans Speech Audio Process Lett"},{"key":"856_CR31","doi-asserted-by":"crossref","unstructured":"Tan LN, Borgstrom BJ, Alwan A (2010) Voice activity detection using harmonic frequency components in likelihood ratio test. In: IEEE international conference on acoustics, speech, and signal processing (ICASSP), March 2010, Dallas, pp\u00a04466\u20134469","DOI":"10.1109\/ICASSP.2010.5495611"},{"key":"856_CR32","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1016\/j.specom.2003.10.002","volume":"42","author":"J Ramirez","year":"2004","unstructured":"Ramirez J, Segura JC, Benitez C, de la Torre A, Rubio A (2004) Efficient voice activity detection algorithms using long-term speech information. Speech Commun 42:271\u2013287","journal-title":"Speech Commun"},{"key":"856_CR33","doi-asserted-by":"publisher","DOI":"10.1186\/1687-4722-2013-21","author":"M Yanna","year":"2013","unstructured":"Yanna M, Nishihara A (2013) Efficient voice activity detection algorithm using long-term spectral flatness measure. EURASIP J Audio Speech Music Process. https:\/\/doi.org\/10.1186\/1687-4722-2013-21","journal-title":"EURASIP J Audio Speech Music Process"},{"key":"856_CR34","doi-asserted-by":"publisher","DOI":"10.1186\/s13636-016-0092-y","author":"XK Yang","year":"2016","unstructured":"Yang XK, He L, Qu D, Zhang WQ (2016) Voice activity detection algorithm based on long-term pitch information. EURASIP J Audio Speech Music Process. https:\/\/doi.org\/10.1186\/s13636-016-0092-y","journal-title":"EURASIP J Audio Speech Music Process"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-018-0856-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s13042-018-0856-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-018-0856-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,28]],"date-time":"2022-08-28T15:09:32Z","timestamp":1661699372000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s13042-018-0856-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,8,1]]},"references-count":34,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2019,7]]}},"alternative-id":["856"],"URL":"https:\/\/doi.org\/10.1007\/s13042-018-0856-z","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"type":"print","value":"1868-8071"},{"type":"electronic","value":"1868-808X"}],"subject":[],"published":{"date-parts":[[2018,8,1]]},"assertion":[{"value":"28 November 2016","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 July 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 August 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}