{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T12:26:37Z","timestamp":1777983997237,"version":"3.51.4"},"reference-count":54,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001824","name":"Grantov Agentura esk Republiky","doi-asserted-by":"publisher","award":["19-26934X"],"award-info":[{"award-number":["19-26934X"]}],"id":[{"id":"10.13039\/501100001824","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001823","name":"Ministerstvo kolstv Mldee a Tlovchovy","doi-asserted-by":"publisher","award":["e-Infrastructure CZ LM2018140"],"award-info":[{"award-number":["e-Infrastructure CZ LM2018140"]}],"id":[{"id":"10.13039\/501100001823","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["870930"],"award-info":[{"award-number":["870930"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/taslp.2022.3171975","type":"journal-article","created":{"date-parts":[[2022,5,3]],"date-time":"2022-05-03T16:15:18Z","timestamp":1651594518000},"page":"1902-1917","source":"Crossref","is-referenced-by-count":1,"title":["Non-Parametric Bayesian Subspace Models for Acoustic Unit Discovery"],"prefix":"10.1109","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4512-0471","authenticated-orcid":false,"given":"Lucas","family":"Ondel","sequence":"first","affiliation":[{"name":"LISN, CNRS, Universite Paris-Saclay, Orsay, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9852-3456","authenticated-orcid":false,"given":"Bolaji","family":"Yusuf","sequence":"additional","affiliation":[{"name":"Brno University of Technology, Faculty of Information Technology, Brno, Czechia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4951-5908","authenticated-orcid":false,"given":"Lukas","family":"Burget","sequence":"additional","affiliation":[{"name":"Brno University of Technology, Faculty of Information Technology, Brno, Czechia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7435-8510","authenticated-orcid":false,"given":"Murat","family":"Saraclar","sequence":"additional","affiliation":[{"name":"Bo&#x011F;azi&#x00E7;i University, Istanbul, Turkey"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683690"},{"key":"ref2","first-page":"173","article-title":"Deep speech 2: End-to-end speech recognition in english and mandarin","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Amodei","year":"2016"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2017-1566"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.cognition.2017.11.008"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-638"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268953"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2019-2904"},{"key":"ref8","first-page":"40","article-title":"A nonparametric Bayesian approach to acoustic model discovery","volume-title":"Proc. 50th Annu. Meeting Assoc. Comput. Linguistics, Long Papers-Volume 1","author":"Lee","year":"2012"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-642"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2016.7846245"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2016.04.033"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2938863"},{"key":"ref13","article-title":"vq-wav2vec: Self-supervised learning of discrete speech representations","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Baevski","year":"2020"},{"key":"ref14","article-title":"Learning hierarchical discrete linguistic units from visually-grounded speech","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Harwath","year":"2020"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1160"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2148"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2019-2224"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414899"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015439"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1976.10159"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4899-7687-1_928"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4899-7687-1_219"},{"key":"ref23","first-page":"263","article-title":"Reconnaissance automatique de la parole: Modlisation ou description","volume-title":"Journes dEtude sur la Parole","author":"Bourlard","year":"1996"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-45528-0"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/29.46546"},{"key":"ref26","article-title":"Discovering acoustic units from speech: A Bayesian approach","author":"Ondel","year":"2021"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.881693"},{"issue":"2","key":"ref28","first-page":"404","article-title":"The subspace Gaussian mixture model-A structured model for speech recognition","volume-title":"Comput. Speech Lang.","volume":"25","author":"Povey","year":"2011"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2012-343"},{"issue":"2","key":"ref30","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1023\/A:1007665907178","article-title":"An introduction to variational methods for graphical models","volume":"37","author":"Jordan","year":"1999","journal-title":"Mach. Learn."},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.2307\/2984875"},{"key":"ref32","article-title":"A very low resource language speech corpus for computational language documentation experiments","volume-title":"Proc. 11th Int. Conf. Lang. Resour. Eval.","author":"Godard","year":"2018"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1096"},{"key":"ref34","first-page":"27403","article-title":"DARPA TIMIT Acoustic-phonetic continuous speech corpus CDROM. NIST speech disc 1-1.1","author":"Garofolo","year":"1993"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639248"},{"key":"ref36","first-page":"1601","article-title":"An amharic speech corpus for large vocabulary continuous speech recognition","volume-title":"Proc. Interspeech 2005 - Eurospeech, 9th Eur. Conf. Speech Commun. Tech.","author":"Abate","year":"2005"},{"key":"ref37","first-page":"94","article-title":"Developments of swahili resources for an automatic speech recognition system","volume-title":"Proc. 3rd Workshop Spoken Lang. Technol. Under-Resour. Lang.","author":"Gelas","year":"2012"},{"key":"ref38","article-title":"Collecting resources in sub-saharan african languages for automatic speech recognition: A case study of wolof","volume-title":"Proc. 10th Int. Conf. Lang. Resour. Eval.","author":"Gauthier","year":"2016"},{"key":"ref39","first-page":"1886","article-title":"Speech technologies for african languages: Example of a multilingual calculator for education","volume-title":"Proc. Interspeech 2015, 16th Annu. Conf. Int. Speech Commun. Assoc.","author":"Besacier","year":"2015"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1002\/0471200611"},{"key":"ref41","first-page":"6306","article-title":"Neural discrete representation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Oord","year":"2017"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2019-3232"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-2743"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-50"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2018-40"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref48","article-title":"XLS-R: Self-supervised cross-lingual speech representation learning at scale","volume-title":"CoRR","volume":"abs\/2111.09296","author":"Babu","year":"2021"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-329"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/5.18626"},{"key":"ref51","article-title":"Variational algorithms for approximate Bayesian inference","author":"Beal","year":"2003"},{"key":"ref52","first-page":"3577","article-title":"Auto-encoding variational bayes","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma","year":"2014"},{"key":"ref53","first-page":"1278","article-title":"Stochastic backpropagation and approximate inference in deep generative models","volume-title":"Proc. 31st Int. Conf. Mach. Learn.","author":"Rezende","year":"2014"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1198\/016214501750332758"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/9657755\/09767690.pdf?arnumber=9767690","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T17:32:54Z","timestamp":1705944774000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9767690\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":54,"URL":"https:\/\/doi.org\/10.1109\/taslp.2022.3171975","relation":{"has-preprint":[{"id-type":"doi","id":"10.36227\/techrxiv.16618135.v2","asserted-by":"object"},{"id-type":"doi","id":"10.36227\/techrxiv.16618135.v1","asserted-by":"object"}]},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]}}}