{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:29:16Z","timestamp":1775230156222,"version":"3.50.1"},"reference-count":26,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,4]]},"DOI":"10.1109\/icassp.2018.8461921","type":"proceedings-article","created":{"date-parts":[[2018,9,21]],"date-time":"2018-09-21T18:24:48Z","timestamp":1537554288000},"page":"5549-5553","source":"Crossref","is-referenced-by-count":57,"title":["Temporal Modeling Using Dilated Convolution and Gating for Voice-Activity-Detection"],"prefix":"10.1109","author":[{"given":"Shuo-Yiin","family":"Chang","sequence":"first","affiliation":[]},{"given":"Bo","family":"Li","sequence":"additional","affiliation":[]},{"given":"Gabor","family":"Simko","sequence":"additional","affiliation":[]},{"given":"Tara N.","family":"Sainath","sequence":"additional","affiliation":[]},{"given":"Anshuman","family":"Tripathi","sequence":"additional","affiliation":[]},{"given":"Aaron","family":"van den Oord","sequence":"additional","affiliation":[]},{"given":"Oriol","family":"Vinyals","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/29.21701"},{"key":"ref11","article-title":"Improve-ments to deep convolutional neural networks for LVCSR","author":"sainath","year":"2013","journal-title":"Proc ASRU"},{"key":"ref12","year":"0","journal-title":"Type less talk more"},{"key":"ref13","author":"simonyan","year":"2014","journal-title":"Very Deep Convolutional Networks for Large-scale Image Recognition"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/0893-6080(90)90044-L"},{"key":"ref15","first-page":"289","article-title":"Wavelets, time-frequency methods and phase space","author":"holschneider","year":"1989","journal-title":"A Real-time Algorithm for Signal Analysis with the help of the Wavelet Transform"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-75988-8_29"},{"key":"ref17","author":"chen","year":"2016","journal-title":"Deeplab Semantic image segmentation with deep convolutional nets atrous convolution and fully connected crfs"},{"key":"ref18","author":"yu","year":"2015","journal-title":"Multi-scale context aggregation by dilated convolutions"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref4","article-title":"All for one: feature combination for highly channel-degraded speech activity detection","author":"graciarena","year":"2013","journal-title":"Proc INTERSPEECH"},{"key":"ref3","article-title":"Improvements to the IBM speech activity detection system for the darpa rats program","author":"thomas","year":"2015","journal-title":"Proc ICASSP"},{"key":"ref6","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-284","article-title":"Endpoint detection using grid long short-term memory networks for streaming speech recognition","author":"chang","year":"2017","journal-title":"InterSpeech"},{"key":"ref5","article-title":"Real-life voice activity detection with LSTM recurrent neural networks and an application to hollywood movies","author":"eyben","year":"2013","journal-title":"Proc ICA SSP"},{"key":"ref8","author":"peddinti","year":"2015","journal-title":"A time delay neural network architecture for efficient modeling of long temporal contexts"},{"key":"ref7","article-title":"Improved end-of-query detection for streaming speech recognition","author":"simko","year":"2017","journal-title":"InterSpeech"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-268"},{"key":"ref9","article-title":"Wavenet: A generative model for raw audio","author":"oord","year":"2016"},{"key":"ref1","article-title":"A comparative study of speech detection methods","volume":"97","author":"xie","year":"1997","journal-title":"Eurospeech"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2582924"},{"key":"ref22","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-429","article-title":"Highway-lstrn and recurrent highway networks for speech recognition","author":"pundak","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref24","article-title":"Large Scale Distributed Deep Networks","author":"dean","year":"2012","journal-title":"Proc NIPS"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1510","article-title":"Generated of large-scale simulated utterances in virtual rooms to train deep-neural networks for far-field speech recognition in google home","author":"kim","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref26","article-title":"Speech recognition for medical conversation","author":"chou","year":"2018","journal-title":"Submitted to ICASSP"},{"key":"ref25","article-title":"Understanding the Difficulty of Training Deep Feedforward Neural Networks","author":"glorot","year":"2014","journal-title":"Proc AIS-TATS"}],"event":{"name":"ICASSP 2018 - 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Calgary, AB","start":{"date-parts":[[2018,4,15]]},"end":{"date-parts":[[2018,4,20]]}},"container-title":["2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8450881\/8461260\/08461921.pdf?arnumber=8461921","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,23]],"date-time":"2020-08-23T21:03:42Z","timestamp":1598216622000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8461921\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/icassp.2018.8461921","relation":{},"subject":[],"published":{"date-parts":[[2018,4]]}}}