{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T17:05:53Z","timestamp":1772039153323,"version":"3.50.1"},"reference-count":36,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1109\/slt.2018.8639043","type":"proceedings-article","created":{"date-parts":[[2019,2,14]],"date-time":"2019-02-14T23:36:34Z","timestamp":1550187394000},"page":"720-726","source":"Crossref","is-referenced-by-count":57,"title":["From Audio to Semantics: Approaches to End-to-End Spoken Language Understanding"],"prefix":"10.1109","author":[{"given":"Parisa","family":"Haghani","sequence":"first","affiliation":[]},{"given":"Arun","family":"Narayanan","sequence":"additional","affiliation":[]},{"given":"Michiel","family":"Bacchiani","sequence":"additional","affiliation":[]},{"given":"Galen","family":"Chuang","sequence":"additional","affiliation":[]},{"given":"Neeraj","family":"Gaur","sequence":"additional","affiliation":[]},{"given":"Pedro","family":"Moreno","sequence":"additional","affiliation":[]},{"given":"Rohit","family":"Prabhavalkar","sequence":"additional","affiliation":[]},{"given":"Zhongdi","family":"Qu","sequence":"additional","affiliation":[]},{"given":"Austin","family":"Waters","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","first-page":"265","article-title":"Tensorflow: a system for large-scale machine learning","volume":"16","author":"abadi","year":"2016","journal-title":"OSDI"},{"key":"ref32","first-page":"1171","article-title":"Scheduled sampling for sequence prediction with recurrent neural networks","author":"bengio","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref31","article-title":"State-of-the-art speech recognition with sequence-to-sequence models","author":"chiu","year":"2017"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.3115\/1119176.1119195"},{"key":"ref35","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2012.6424196"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2005.07.005"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1583"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461598"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461718"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref17","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2014"},{"key":"ref18","article-title":"Google&#x2019;s neural machine translation system: Bridging the gap between human and machine translation","author":"wu","year":"2016"},{"key":"ref19","article-title":"State-of-the-art speech recognition with sequence-to-sequence models","author":"chiu","year":"2017","journal-title":"Acoustics Speech and Signal Processing (ICASSP) 2017 IEEE International Conference on"},{"key":"ref28","article-title":"Fast and accurate recurrent neural network acoustic models for speech recognition","author":"sak","year":"2015","journal-title":"Proceedings of Interspeech"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268984"},{"key":"ref27","article-title":"Minimum word error rate training for attention-based sequence-to-sequence models","author":"prabhavalkar","year":"2017"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"399","DOI":"10.21437\/Interspeech.2017-234","article-title":"Acoustic modeling for google home","author":"li","year":"2017","journal-title":"Proc Interspeech 2017"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"137","DOI":"10.21437\/Interspeech.2017-1544","article-title":"Comparing human and machine errors in conversational speech transcription","author":"stolcke","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref29","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"schmidhuber","year":"1997","journal-title":"Neural Comput"},{"key":"ref5","first-page":"715","article-title":"Multi-domain joint semantic frame parsing using bi-directional rnn-lstm","author":"hakkani-t\u00fcr","year":"2016","journal-title":"InterSpeech"},{"key":"ref8","year":"0","journal-title":"Google duplex An AI system for accomplishing real-world tasks over the phone"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"132","DOI":"10.21437\/Interspeech.2017-405","article-title":"English conversational telephone speech recognition by humans and machines","author":"saon","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1002\/9781119992691"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461785"},{"key":"ref1","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref22","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref21","first-page":"685","article-title":"Attention-based recurrent neural network models for joint intent detection and slot filling","author":"liu","year":"2016","journal-title":"Proc INTERSPEECH"},{"key":"ref24","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1023\/A:1007379606734","article-title":"Multitask learning","volume":"28","author":"caruana","year":"1997","journal-title":"Machine Learning"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"ref26","author":"shannon","year":"2017","journal-title":"Optimizing expected word error rate via sampling for speech recognition"},{"key":"ref25","author":"luong","year":"2015"}],"event":{"name":"2018 IEEE Spoken Language Technology Workshop (SLT)","location":"Athens, Greece","start":{"date-parts":[[2018,12,18]]},"end":{"date-parts":[[2018,12,21]]}},"container-title":["2018 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8632666\/8639030\/08639043.pdf?arnumber=8639043","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T05:37:40Z","timestamp":1643261860000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8639043\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/slt.2018.8639043","relation":{},"subject":[],"published":{"date-parts":[[2018,12]]}}}