{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T01:10:08Z","timestamp":1751245808277,"version":"3.41.0"},"reference-count":31,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1109\/asru.2017.8268951","type":"proceedings-article","created":{"date-parts":[[2018,1,25]],"date-time":"2018-01-25T21:43:53Z","timestamp":1516916633000},"page":"309-315","source":"Crossref","is-referenced-by-count":12,"title":["Attention-based Wav2Text with feature transfer learning"],"prefix":"10.1109","author":[{"given":"Andros","family":"Tjandra","sequence":"first","affiliation":[]},{"given":"Sakriani","family":"Sakti","sequence":"additional","affiliation":[]},{"given":"Satoshi","family":"Nakamura","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2011.6163903"},{"key":"ref30","article-title":"A scalable approach to using DNN-derived features in GMM-HMM based acoustic modeling for LVCSR","author":"yan","year":"2013","journal-title":"Proc INTERSPEECH"},{"journal-title":"End-to-end continuous speech recognition using attention-based recurrent nn First results","year":"2014","author":"chorowski","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"journal-title":"Wav2 letter an end-to-end convnet-based speech recognition system","year":"2016","author":"collobert","key":"ref12"},{"journal-title":"Neural machine translation by jointly learning to align and translate","year":"2014","author":"bahdanau","key":"ref13"},{"journal-title":"Effective approaches to attention-based neural machine translation[J]","year":"2015","author":"luong","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1989.1.4.541"},{"journal-title":"Network in Network","year":"2013","author":"lin","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"journal-title":"Striving for simplicity The all convolutional net","year":"2014","author":"springenberg","key":"ref18"},{"key":"ref19","article-title":"The Kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"2011 IEEE Workshop on Automatic Speech Recognition &amp; Understanding"},{"key":"ref28","first-page":"3531","article-title":"Cross-lingual transfer learning during supervised training in low resource scenarios","author":"das","year":"2015","journal-title":"InterSpeech"},{"key":"ref4","article-title":"Learning the speech front-end with raw waveform CLDNNs","volume":"2015","author":"sainath","year":"2015","journal-title":"InterSpeech"},{"key":"ref27","first-page":"3320","article-title":"How transferable are features in deep neural networks?","author":"yosinski","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178781"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.3115\/1075527.1075614"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.367023"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2016-1495","article-title":"Acoustic modelling from the signal domain using CNNs","volume":"2016","author":"ghahremani","year":"2016","journal-title":"InterSpeech"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"journal-title":"End-to-end phoneme sequence recognition using convolutional neural networks","year":"2013","author":"palaz","key":"ref2"},{"key":"ref9","first-page":"173","article-title":"Deep speech 2: End-to-end speech recognition in English and Mandarin","author":"amodei","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1561\/2000000004"},{"journal-title":"First-pass large vocabulary continuous speech recognition using bidirectional recurrent dnns","year":"2014","author":"hannun","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24797-2_2"},{"key":"ref21","article-title":"Rectifier nonlinearities improve neural network acoustic models","author":"maas","year":"2013","journal-title":"ICML Workshop on Deep Learning for Audio Speech and Language Processing"},{"journal-title":"Adam A method for stochastic optimization","year":"2014","author":"kingma","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.50"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953075"}],"event":{"name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","start":{"date-parts":[[2017,12,16]]},"location":"Okinawa, Japan","end":{"date-parts":[[2017,12,20]]}},"container-title":["2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8260578\/8268903\/08268951.pdf?arnumber=8268951","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:47:38Z","timestamp":1751244458000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8268951\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/asru.2017.8268951","relation":{},"subject":[],"published":{"date-parts":[[2017,12]]}}}