{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,26]],"date-time":"2025-06-26T04:07:35Z","timestamp":1750910855371,"version":"3.41.0"},"reference-count":21,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,7]]},"DOI":"10.1109\/icme.2017.8019385","type":"proceedings-article","created":{"date-parts":[[2017,9,7]],"date-time":"2017-09-07T01:03:50Z","timestamp":1504746230000},"page":"1093-1098","source":"Crossref","is-referenced-by-count":0,"title":["Multi-scale feature based convolutional neural networks for large vocabulary speech recognition"],"prefix":"10.1109","author":[{"given":"Tong","family":"Fu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xihong","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICSLP.1996.607300"},{"key":"ref11","first-page":"3833","article-title":"Wavelet transform to hybrid support vector machine and hidden markov model for speech recognition","volume":"4","author":"shao","year":"2005","journal-title":"IEEE International Symposium on Circuits and Systems"},{"key":"ref12","first-page":"4580","article-title":"Con-volutional, long short-term memory, fully connected deep neural networks","author":"sainath","year":"2015","journal-title":"ICASSP 2015-2015 IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref13","first-page":"1","article-title":"Very deep multilingual convolutional neural networks for lvcsr","author":"sercu","year":"2015","journal-title":"Computer Science"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"2947","DOI":"10.21437\/Interspeech.2009-746","article-title":"In-vestigation into bottle-neck features for meeting speech recognition","author":"grezl","year":"2009","journal-title":"InterSpeech"},{"key":"ref15","first-page":"3288","author":"sermanet","year":"2012","journal-title":"Convolutional neural networks applied to house numbers digit classification"},{"key":"ref16","article-title":"Stochastic pooling for regularization of deep convolutional neural network","author":"zeiler","year":"2013","journal-title":"Computer Science"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/29.46546"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1990.2.4.490"},{"key":"ref19","first-page":"528","article-title":"Asyn-chronous peer-to-peer data mining with stochastic gradient descent","author":"ormndi","year":"2011","journal-title":"Euro-Par 2011 Parallel Processing-International Conference Euro-Par 2011"},{"key":"ref4","article-title":"Deep speech: Scaling up end-to-end speech recognition","author":"hannun","year":"2014","journal-title":"Computer Science"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"437","DOI":"10.21437\/Interspeech.2011-169","article-title":"Conversational speech transcription using context-dependent deep neural networks","author":"seide","year":"2011","journal-title":"INTERSPEECH 2011 Conference of the InternationalSpeech Communication Association"},{"journal-title":"Convolutional Networks for Images Speech and Time Series","year":"1997","author":"lecun","key":"ref6"},{"key":"ref5","first-page":"1764","article-title":"Towards end-to-end speech recognition with recurrent neural networks","author":"graves","year":"2014","journal-title":"International Conference on Machine Learning"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"890","DOI":"10.21437\/Interspeech.2014-223","article-title":"Acoustic modeling with deep neural networks using raw time signal for lvcsr","author":"tske","year":"2014","journal-title":"InterSpeech"},{"key":"ref7","article-title":"Convolutional neural networks for acoustic modeling of raw time signal in lvcsr","author":"golik","year":"2015","journal-title":"InterSpeech"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2134090"},{"key":"ref1","article-title":"Roles of pre-training and fine-tuning in context-dependent dbn-hmms for real-world speech recognition","author":"yu","year":"2010","journal-title":"Proc NIPS Workshop on Deep Learning and Unsupervised Feature Learning"},{"key":"ref9","article-title":"Wavelet transform speech recognition using vector quantization, dynamic time warping and artificial neural networks","author":"murali","year":"1994","journal-title":"Computer Aided Systems Engineering & Telecommunications & Information Science Laboratory"},{"key":"ref20","first-page":"337","article-title":"On the difficulty of training recurrent neural networks","volume":"52","author":"gustavsson","year":"2013","journal-title":"Computer Science"},{"key":"ref21","first-page":"338","article-title":"Long short-term memory recurrent neural network architectures for large scale acoustic modeling","author":"hasim","year":"2014","journal-title":"TERSPEECH"}],"event":{"name":"2017 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2017,7,10]]},"location":"Hong Kong, Hong Kong","end":{"date-parts":[[2017,7,14]]}},"container-title":["2017 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8014303\/8019290\/08019385.pdf?arnumber=8019385","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T16:26:29Z","timestamp":1750868789000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8019385\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/icme.2017.8019385","relation":{},"subject":[],"published":{"date-parts":[[2017,7]]}}}