{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T06:24:56Z","timestamp":1757312696208,"version":"3.37.3"},"reference-count":73,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"FAIR, Meta Platforms Inc."}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2023]]},"DOI":"10.1109\/taslp.2023.3296019","type":"journal-article","created":{"date-parts":[[2023,7,17]],"date-time":"2023-07-17T17:51:43Z","timestamp":1689616303000},"page":"3112-3126","source":"Crossref","is-referenced-by-count":8,"title":["LegoNN: Building Modular Encoder-Decoder Models"],"prefix":"10.1109","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0437-5988","authenticated-orcid":false,"given":"Siddharth","family":"Dalmia","sequence":"first","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]},{"given":"Dmytro","family":"Okhonko","sequence":"additional","affiliation":[{"name":"Samaya AI., Mountain View, CA, USA"}]},{"given":"Mike","family":"Lewis","sequence":"additional","affiliation":[{"name":"Meta Platforms Inc., Menlo Park, CA, USA"}]},{"given":"Sergey","family":"Edunov","sequence":"additional","affiliation":[{"name":"Meta Platforms Inc., Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5970-8631","authenticated-orcid":false,"given":"Shinji","family":"Watanabe","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6663-8600","authenticated-orcid":false,"given":"Florian","family":"Metze","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]},{"given":"Luke","family":"Zettlemoyer","sequence":"additional","affiliation":[{"name":"Meta Platforms Inc., Menlo Park, CA, USA"}]},{"given":"Abdelrahman","family":"Mohamed","sequence":"additional","affiliation":[{"name":"Rembrand Inc., Palo Alto, CA, USA"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1162"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.2.181"},{"key":"ref12","first-page":"8024","article-title":"Pytorch: An imperative style, high-performance deep learning library","author":"paszke","year":"0","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref56","first-page":"79","article-title":"A statistical approach to machine translation","volume":"16","author":"brown","year":"1990","journal-title":"Comput Linguistics"},{"article-title":"The label bias problem","year":"2019","author":"hannun","key":"ref15"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-80"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-2012"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref53","first-page":"1615","article-title":"CTC alignments improve autoregressive translation","author":"yan","year":"0","journal-title":"Proc 17th Conf Eur Chapter Assoc Comput Linguistics"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3288409"},{"article-title":"JAX: Composable transformations of python NumPy programs","year":"2018","author":"bradbury","key":"ref11"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1006\/csla.2001.0184"},{"article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","year":"2015","author":"abadi","key":"ref10"},{"journal-title":"Statistical Methods for Speech Recognition","year":"1997","author":"jelinek","key":"ref54"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.1997.609370"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1336"},{"key":"ref18","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"dosovitskiy","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"article-title":"Discretalk: Text-to-speech as a machine translation problem","year":"2020","author":"hayashi","key":"ref51"},{"article-title":"Palm: Scaling language modeling with pathways","year":"2022","author":"chowdhery","key":"ref50"},{"year":"2022","key":"ref46","article-title":"cuDNN CTC loss"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-595"},{"key":"ref48","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","author":"radford","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1009"},{"key":"ref42","first-page":"173","article-title":"Deep speech 2: End-to-end speech recognition in english and mandarin","author":"amodei","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003750"},{"key":"ref43","first-page":"551","article-title":"Single headed attention based sequence-to-sequence model for state-of-the-art results on switchboard","author":"t\u00fcske","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref49","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"radford","year":"2019","journal-title":"OpenAIRE blog"},{"key":"ref8","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"0","journal-title":"Proc Conf North Amer Chapter Assoc Comput Linguistics Hum Lang Technol"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref9","first-page":"1700","article-title":"Recurrent continuous translation models","author":"kalchbrenner","year":"0","journal-title":"Proc Conf Empirical Methods Natural Lang Process"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref3","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"0","journal-title":"Proc 31st Annu Conf Neural Inf Process Syst"},{"journal-title":"Design Rules The Power of Modularity Volume 1","year":"1999","author":"baldwin","key":"ref6"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.11"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6301"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1149"},{"year":"2018","key":"ref36","article-title":"Multi-bleu.perl"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054626"},{"key":"ref30","first-page":"3935","article-title":"Enhancing the TED-LIUM corpus with selected data for language modeling and more ted talks","author":"rousseau","year":"0","journal-title":"Proc 9th Int Conf Lang Resour Eval"},{"article-title":"Transformers with convolutional context for ASR","year":"2019","author":"mohamed","key":"ref33"},{"article-title":"g2pe","year":"2019","author":"park","key":"ref32"},{"key":"ref2","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"0","journal-title":"Proc 27th Annu Conf Neural Inf Process Syst"},{"key":"ref1","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.83"},{"key":"ref38","first-page":"3515","article-title":"Aligned cross entropy for non-autoregressive machine translation","author":"ghazvininejad","year":"0","journal-title":"Proc 37th Int Conf Mach Learn"},{"key":"ref71","article-title":"Neural module networks for reasoning over text","author":"gupta","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref70","article-title":"Zero-shot learning through cross-modal transfer","author":"socher","year":"0","journal-title":"Proc 27th Annu Conf Neural Inf Process Syst"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6319"},{"key":"ref72","first-page":"2295","article-title":"Learning to control self-assembling morphologies: A study of generalization via modularity","author":"pathak","year":"0","journal-title":"Proc 33rd Annu Conf Neural Inf Process Syst"},{"article-title":"Layer normalization","year":"2016","author":"ba","key":"ref24"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00369"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-4009"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989250"},{"article-title":"Switchboard-1 release 2","year":"1993","author":"godfrey","key":"ref26"},{"key":"ref25","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref69","first-page":"166","article-title":"Modular multitask reinforcement learning with policy sketches","author":"andreas","year":"0","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref20","first-page":"1243","article-title":"Convolutional sequence to sequence learning","author":"gehring","year":"0","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2020.3044547"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1633"},{"key":"ref22","first-page":"2340","article-title":"Stress test evaluation for natural language inference","author":"naik","year":"0","journal-title":"Proc Conf North Amer Chapter Assoc Comput Linguistics Hum Lang Technol"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.12"},{"key":"ref21","first-page":"368","article-title":"The information bottleneck method","author":"tishby","year":"0","journal-title":"Proc 37th Annu Allerton Conf Commun Control Comput"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953075"},{"year":"2002","key":"ref28","article-title":"2000 HUB5 english evaluation transcripts"},{"year":"2002","key":"ref27","article-title":"2000 HUB5 english evaluation speech"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1456"},{"article-title":"Deep speech: Scaling up end-to-end speech recognition","year":"2014","author":"hannun","key":"ref60"},{"key":"ref62","article-title":"Non-autoregressive neural machine translation","author":"gu","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref61","first-page":"1403","article-title":"Imputer: Sequence modelling via imputation and dynamic programming","author":"chan","year":"0","journal-title":"Proc 37th Int Conf Mach Learn"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/9970249\/10184317.pdf?arnumber=10184317","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,4]],"date-time":"2023-09-04T18:15:55Z","timestamp":1693851355000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10184317\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"references-count":73,"URL":"https:\/\/doi.org\/10.1109\/taslp.2023.3296019","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"type":"print","value":"2329-9290"},{"type":"electronic","value":"2329-9304"}],"subject":[],"published":{"date-parts":[[2023]]}}}