{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,10]],"date-time":"2026-05-10T07:15:09Z","timestamp":1778397309266,"version":"3.51.4"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T00:00:00Z","timestamp":1673222400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T00:00:00Z","timestamp":1673222400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,1,9]]},"DOI":"10.1109\/slt54892.2023.10023291","type":"proceedings-article","created":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T18:54:03Z","timestamp":1674845643000},"page":"92-99","source":"Crossref","is-referenced-by-count":6,"title":["Conformer-Based on-Device Streaming Speech Recognition with KD Compression and Two-Pass Architecture"],"prefix":"10.1109","author":[{"given":"Jinhwan","family":"Park","sequence":"first","affiliation":[{"name":"Samsung Research,Seoul,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sichen","family":"Jin","sequence":"additional","affiliation":[{"name":"Samsung Research,Seoul,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junmo","family":"Park","sequence":"additional","affiliation":[{"name":"Samsung Research,Seoul,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sungsoo","family":"Kim","sequence":"additional","affiliation":[{"name":"Samsung Research,Seoul,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dhairya","family":"Sandhyana","sequence":"additional","affiliation":[{"name":"Samsung Research,Seoul,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Changheon","family":"Lee","sequence":"additional","affiliation":[{"name":"Samsung Electronics,AI R&#x0026;D Group,Suwon,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Myoungji","family":"Han","sequence":"additional","affiliation":[{"name":"Samsung Electronics,AI R&#x0026;D Group,Suwon,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jungin","family":"Lee","sequence":"additional","affiliation":[{"name":"Samsung Electronics,AI R&#x0026;D Group,Suwon,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Seokyeong","family":"Jung","sequence":"additional","affiliation":[{"name":"Samsung Electronics,AI R&#x0026;D Group,Suwon,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Changwoo","family":"Han","sequence":"additional","affiliation":[{"name":"Samsung Research,Seoul,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chanwoo","family":"Kim","sequence":"additional","affiliation":[{"name":"Samsung Research,Seoul,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2840"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682336"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9004027"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2846"},{"key":"ref5","article-title":"Monotonic chunkwise attention","volume-title":"International Conference on Learning Repre-sentations","author":"Chiu"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2460"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383583"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413803"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383606"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383518"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414941"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-234"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1341"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414607"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-3015"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU51503.2021.9688121"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1285"},{"key":"ref18","article-title":"On using monolingual corpora in neural machine translation","volume":"abs\/1503.03535","author":"Gulcehre","year":"2015","journal-title":"CoRR"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414858"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413899"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413423"},{"key":"ref24","first-page":"100","article-title":"Convolution augmented trans-former for semi-supervised sound event detection","volume-title":"Proc. Workshop Detection Classification Acoust. Scenes Events (DCASE)","author":"Miyazaki"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_1"},{"key":"ref26","article-title":"Searching for activation functions","author":"Ramachandran","year":"2017","journal-title":"arXiv preprint"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1162\/089976699300016467"},{"key":"ref28","article-title":"Attention-based models for speech recognition","volume":"28","author":"Chorowski","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413905"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3174"},{"key":"ref31","doi-asserted-by":"crossref","DOI":"10.1109\/ASRU46091.2019.9003913","volume-title":"Recognizing long-form speech using streaming end-to-end models","author":"Narayanan","year":"2019"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-3227"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2545928"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref35","article-title":"Googles neural machine translation system: Bridging the gap between human and machine translation","volume":"abs\/1609.08144","author":"Wu","year":"2016","journal-title":"CoRR"},{"key":"ref36","volume-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","author":"Abadi","year":"2015"},{"key":"ref37","volume-title":"DeepTwist: Learning model compression via occasional weight distortion","author":"Lee","year":"2018"}],"event":{"name":"2022 IEEE Spoken Language Technology Workshop (SLT)","location":"Doha, Qatar","start":{"date-parts":[[2023,1,9]]},"end":{"date-parts":[[2023,1,12]]}},"container-title":["2022 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10022052\/10022330\/10023291.pdf?arnumber=10023291","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T08:37:41Z","timestamp":1707813461000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10023291\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,9]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/slt54892.2023.10023291","relation":{},"subject":[],"published":{"date-parts":[[2023,1,9]]}}}