{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T05:16:06Z","timestamp":1755926166704,"version":"3.41.0"},"reference-count":42,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1109\/asru.2017.8268987","type":"proceedings-article","created":{"date-parts":[[2018,1,25]],"date-time":"2018-01-25T21:43:53Z","timestamp":1516916633000},"page":"569-576","source":"Crossref","is-referenced-by-count":28,"title":["Exploring ASR-free end-to-end modeling to improve spoken language understanding in a cloud-based dialog system"],"prefix":"10.1109","author":[{"given":"Yao","family":"Qian","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rutuja","family":"Ubale","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vikram","family":"Ramanaryanan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Patrick","family":"Lange","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Suendermann-Oeft","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Keelan","family":"Evanini","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eugene","family":"Tsuprun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref38","article-title":"The kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"IEEE 2011 workshop on automatic speech recognition and understanding (No EPFL-CONF-192584) IEEE Signal Processing Society"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2015.07.006"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-42816-1_13"},{"key":"ref30","first-page":"553","article-title":"Speech utterance classification model training without manual transcriptions","volume":"1","author":"wang","year":"2006","journal-title":"Proc ICASSP"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.50"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-82"},{"key":"ref35","article-title":"Why does unsupervised pre-training help deep learning?","author":"erhan","year":"2010","journal-title":"JMLR"},{"key":"ref34","article-title":"Roles of pre-training and fine-tuning in context-dependent dbn-hmms for real-world speech recognition","author":"yu","year":"2010","journal-title":"Proc NIPS Workshop on Deep Learning and Unsupervised Feature Learning"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICSLP.1996.607771"},{"key":"ref40","first-page":"69","article-title":"The fisher corpus: a resource for the next generations of speech-to-text","volume":"4","author":"cieri","year":"2004","journal-title":"LREC"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/34.391397"},{"key":"ref12","first-page":"449","article-title":"A data-driven spoken language understanding system","volume":"17","author":"he","year":"2003","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"3771","DOI":"10.21437\/Interspeech.2013-596","article-title":"Investi-gation of recurrent-neural-network architectures and learning methods for spoken language understanding","author":"mesnil","year":"2013","journal-title":"InterSpeech"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2383614"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707709"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947636"},{"key":"ref17","first-page":"1764","article-title":"Towards end-to-end speech recognition with recurrent neural networks","volume":"14","author":"graves","year":"2015","journal-title":"Proc ICML"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947700"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2011.6163899"},{"key":"ref28","article-title":"Learning spoken language without transcriptions","volume":"99","author":"gorin","year":"1999","journal-title":"Proc IEEE Workshop on Automatic Speech Recognition and Understanding"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/365153.365168"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2005.06.001"},{"article-title":"Natural language input for a computer problem solving system","year":"1964","author":"bobrow","key":"ref3"},{"key":"ref6","doi-asserted-by":"crossref","DOI":"10.21437\/Eurospeech.1993-481","article-title":"Cmu's robust spoken language understanding system","volume":"93","author":"issar","year":"1993","journal-title":"Proceedings of EUROSPEECH"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.3115\/1073445.1073446"},{"key":"ref5","first-page":"61","article-title":"TINA: A natural language system for spoken language applications","volume":"18","author":"seneff","year":"1992","journal-title":"Computational Linguistics"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.3115\/981574.981582"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.3115\/1075812.1075857"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-4658"},{"key":"ref9","article-title":"Concept-based spontaneous speech understanding system","author":"levin","year":"1995","journal-title":"Fourth European Conference on Speech Communication and Technology"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1002\/ets2.12105"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"ref22","first-page":"6","article-title":"Architectures for deep neural network based acoustic models defined over windowed speech waveforms","author":"bhargava","year":"2015","journal-title":"Proc Inter-speech"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-291"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-686"},{"key":"ref41","first-page":"3214","article-title":"A time delay neural network architecture for efficient modeling of long temporal contexts","author":"peddinti","year":"2015","journal-title":"Proc of Inter-speech"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472652"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953076"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472669"}],"event":{"name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","start":{"date-parts":[[2017,12,16]]},"location":"Okinawa, Japan","end":{"date-parts":[[2017,12,20]]}},"container-title":["2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8260578\/8268903\/08268987.pdf?arnumber=8268987","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:47:44Z","timestamp":1751244464000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8268987\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/asru.2017.8268987","relation":{},"subject":[],"published":{"date-parts":[[2017,12]]}}}