{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T09:54:41Z","timestamp":1729677281770,"version":"3.28.0"},"reference-count":37,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,10]]},"DOI":"10.1109\/iros.2018.8593925","type":"proceedings-article","created":{"date-parts":[[2019,1,24]],"date-time":"2019-01-24T02:33:30Z","timestamp":1548297210000},"page":"2503-2510","source":"Crossref","is-referenced-by-count":2,"title":["Multi-timescale Feature-extraction Architecture of Deep Neural Networks for Acoustic Model Training from Raw Speech Signal"],"prefix":"10.1109","author":[{"given":"Ryu","family":"Takeda","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kazuhiro","family":"Nakadai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kazunori","family":"Komatani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","article-title":"The Kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"Proceedings of IEEE Workshop on Automatic Speech Recognition and Understanding"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-268"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.20965\/jrm.2017.p0016"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2015.7354250"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2004.1326009"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404852"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-76336-9_3"},{"key":"ref34","article-title":"Corpus of spontaneous Japanese: Its design and evaluation","author":"maekawa","year":"2003","journal-title":"Proceedings of ISCA & IEEE Workshop on Spontaneous Speech Processing and Recognition"},{"key":"ref10","first-page":"26","article-title":"Convolutional neural networks for acoustic modeling of raw time signalin LVCSR","author":"golik","year":"2015","journal-title":"InterSpeech"},{"key":"ref11","article-title":"Analysis of CNN-based speech recognition system using raw speech as input","author":"palaz","year":"2015","journal-title":"IDIAP tech report"},{"key":"ref12","first-page":"3214","article-title":"A time delay neural network architecture for efficient modeling of long temporal contexts","author":"peddinti","year":"2015","journal-title":"Proc INTERSPEECH"},{"journal-title":"Kaldi csj egs","year":"0","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1049\/el:19940400"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1459"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7471706"},{"key":"ref17","first-page":"2121","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"duchi","year":"2011","journal-title":"The Journal of Machine Learning Research"},{"key":"ref18","first-page":"338","article-title":"Long short-term memory recurrent neural network architectures for large scale acoustic modeling","author":"sak","year":"2014","journal-title":"Proceedings of Interspeech"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178838"},{"key":"ref28","first-page":"111","article-title":"Design and implementation of a robot audition system for automatic speech recognition of simultaneous speech","author":"yamamoto","year":"2007","journal-title":"Proc of ASRU"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"437","DOI":"10.21437\/Interspeech.2011-169","article-title":"Conversational speech transcription using context-dependent deep neural network","author":"seide","year":"2011","journal-title":"Proceedings of Interspeech"},{"key":"ref27","first-page":"1076","article-title":"Maximum a posteriori adaptation of network parameters in deep models","author":"huang","year":"2015","journal-title":"Proceedings of Interspeech"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1109\/ASRU.2011.6163899","article-title":"Feature engineering in context-dependent deep neural networks for conversational speech transaction","author":"seide","year":"2011","journal-title":"Proceedings of the IEEE Workshop on Automatic Speech Recognition and Understanding"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1495"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2007.900612"},{"key":"ref5","first-page":"6","article-title":"Architectures for deep neural network based acoustic models defined over windowed speech waveforms","author":"bhargava","year":"2015","journal-title":"Proceeding of Interspeech"},{"key":"ref8","first-page":"890","article-title":"Acoustic modeling with deep neural networks using raw time signal for LVCSR","author":"t\u00fcske","year":"2014","journal-title":"Proceedings of Interspeech"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707746"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178847"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2134090"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639347"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1006\/csla.1996.0013"},{"journal-title":"Network in Network","year":"2013","author":"lin","key":"ref21"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707705"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854825"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2006.11.005"},{"key":"ref25","first-page":"2877","article-title":"Robust i-Vector based adaptation of DNN acoustic model for speech recognition","author":"garimella","year":"2015","journal-title":"Proc INTERSPEECH"}],"event":{"name":"2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2018,10,1]]},"location":"Madrid","end":{"date-parts":[[2018,10,5]]}},"container-title":["2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8574473\/8593358\/08593925.pdf?arnumber=8593925","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,14]],"date-time":"2024-07-14T08:32:55Z","timestamp":1720945975000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8593925\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/iros.2018.8593925","relation":{},"subject":[],"published":{"date-parts":[[2018,10]]}}}