{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T20:15:26Z","timestamp":1779912926275,"version":"3.53.1"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1109\/apsipaasc63619.2025.10848584","type":"proceedings-article","created":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T18:37:05Z","timestamp":1738003025000},"page":"1-6","source":"Crossref","is-referenced-by-count":1,"title":["Scale-invariant Online Voice Activity Detection under Various Environments"],"prefix":"10.1109","author":[{"given":"Ryu","family":"Takeda","sequence":"first","affiliation":[{"name":"Osaka University,SANKEN,Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kazunori","family":"Komatani","sequence":"additional","affiliation":[{"name":"Osaka University,SANKEN,Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/tassp.1976.1162800"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1887"},{"issue":"8","key":"ref3","first-page":"512","article-title":"Spoken dialogue system development at the dialogue robot competition","volume":"77","author":"Higashinaka","year":"2021","journal-title":"The Journal of The Acoustic Society of Japan"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.3390\/app10041522"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.sigdial-1.9"},{"key":"ref6","volume-title":"Silero VAD: Pre-trained enterprise-grade voice activity detector (VAD), number detector and language classifier","author":"Team","year":"2021"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9052974"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-560"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21236\/ada164453"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/BF00344251"},{"key":"ref12","first-page":"396","article-title":"Handwritten digit recognition with a back-propagation network","volume-title":"Proc. of Advances in Neural Information Processing Systems (NeurIPS)","volume":"2","author":"LeCun"},{"key":"ref13","volume-title":"Pattern Recognition and Machine Learning","author":"Bishop","year":"2006"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/cesa.2006.4281886"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/eurospeech.1991-313"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ispacs.2006.364896"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2016-268"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2023-2466"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2023-598"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2505415"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO.2019.8902703"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-11216"},{"key":"ref23","volume-title":"Adaptive Filter Theory","author":"Haykin","year":"1991"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/89.260359"},{"key":"ref25","first-page":"24","article-title":"Feature engineering in context-dependent deep neural networks for conversational speech transaction","volume-title":"Proc. of IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)","author":"Seide"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref28","author":"Ba","year":"2016","journal-title":"Layer normalization"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1613\/jair.953"},{"key":"ref30","article-title":"Corpus of spontaneous Japanese: Its design and evaluation","volume-title":"Proc. of ISCA & IEEE Workshop on Spontaneous Speech Processing and Recognition","author":"Maekawa"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806390"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2821"},{"key":"ref33","author":"Takamichi","year":"2019","journal-title":"JVS corpus: Free Japanese multi-speaker voice corpus"},{"key":"ref34","author":"Snyder","year":"2015","journal-title":"MUSAN: A Music, Speech, and Noise Corpus"},{"key":"ref35","article-title":"Acoustical sound database in real environments for sound scene understanding and hands-free speech recognition","volume-title":"Proc. of the Second International Conference on Language Resources and Evaluation (LREC)","author":"Nakamura"},{"key":"ref36","first-page":"8024","article-title":"PyTorch: An imperative style, high-performance deep learning library","volume-title":"Proc. of Advances in Neural Information Processing Systems 32 (NeurIPS)","author":"Paszke"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.5555\/3042817.3043083"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1406.3269"},{"key":"ref39","article-title":"Averaging weights leads to wider optima and better generalization","volume-title":"Proc. of Conference on Uncertainty in Artificial Intelligence","author":"Izmailov"},{"key":"ref40","volume-title":"Confidence intervals for evaluation in machine learning","author":"Ferrer"}],"event":{"name":"2024 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","location":"Macau, Macao","start":{"date-parts":[[2024,12,3]]},"end":{"date-parts":[[2024,12,6]]}},"container-title":["2024 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10848542\/10848533\/10848584.pdf?arnumber=10848584","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,28]],"date-time":"2025-01-28T06:10:38Z","timestamp":1738044638000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10848584\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc63619.2025.10848584","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]}}}