{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T06:42:06Z","timestamp":1764225726752,"version":"3.37.3"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,10,13]],"date-time":"2021-10-13T00:00:00Z","timestamp":1634083200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,10,13]],"date-time":"2021-10-13T00:00:00Z","timestamp":1634083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,10,13]],"date-time":"2021-10-13T00:00:00Z","timestamp":1634083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100015667","name":"National Authority for Scientific Research and Innovation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100015667","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,10,13]]},"DOI":"10.1109\/sped53181.2021.9587345","type":"proceedings-article","created":{"date-parts":[[2021,11,1]],"date-time":"2021-11-01T20:53:34Z","timestamp":1635800014000},"page":"132-138","source":"Crossref","is-referenced-by-count":5,"title":["Establishing a Baseline of Romanian Speech-to-Text Models"],"prefix":"10.1109","author":[{"given":"Dan","family":"Ungureanu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Madalina","family":"Badeanu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gabriela-Catalina","family":"Marica","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mihai","family":"Dascalu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dan Ioan","family":"Tufis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","article-title":"The kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"IEEE 2011 workshop on automatic speech recognition and understanding no CONF IEEE Signal Processing Society"},{"key":"ref11","first-page":"2345","article-title":"Sequence-discriminative training of deep neural networks","volume":"2013","author":"vesely?","year":"2013","journal-title":"InterSpeech"},{"article-title":"Rwth asr systems for librispeech: Hybrid vs attention&#x2013;w\/o data augmentation","year":"2019","author":"l\u00fcscher","key":"ref12"},{"key":"ref13","first-page":"395","article-title":"Towards a romanian end-to-end automatic speech recognition based on deepspeech2","volume":"21","author":"avram","year":"2020","journal-title":"Proc Rom Acad Ser A"},{"key":"ref14","first-page":"6606","article-title":"Rsc: A romanian read speech corpus for automatic speech recognition","author":"georgescu","year":"2020","journal-title":"Proceedings of the 12th Language Resources and Evaluation Conference"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/SPED.2019.8906555"},{"key":"ref16","first-page":"1","article-title":"The swara speech corpus: A large parallel romanian read speech dataset","author":"stan","year":"2017","journal-title":"Speech Technology and Human-Computer Dialogue (SpeD) 2017 International Conference on"},{"key":"ref17","first-page":"90","article-title":"Crowd-sourced, automatic speech-corpora collection&#x2013;building the romanian anonymous speech corpus","author":"dumitrescu","year":"2014","journal-title":"CCURL 2014 Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2010.12.002"},{"article-title":"The mara corpus: Expressivity in end-to-end tts systems using synthesised speech data","year":"2020","author":"stan","key":"ref19"},{"key":"ref28","first-page":"4171","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics Human Language Technologies Volume 1 (Long and Short Papers)"},{"journal-title":"Training acoustic model on librispeech","year":"2017","author":"xiong","key":"ref4"},{"journal-title":"Magicdata mandarin chinese read speech corpus","year":"0","key":"ref27"},{"key":"ref3","first-page":"2","article-title":"The cmu sphinx-4 speech recognition system","volume":"1","author":"lamere","year":"2003","journal-title":"IEEE Intl Conf on Acoustics Speech and Signal Processing (ICASSP 2003)"},{"article-title":"Deep speech: Scaling up end-to-end speech recognition","year":"2014","author":"hannun","key":"ref6"},{"key":"ref29","first-page":"6626","article-title":"Robert&#x2013;a romanian bert model","author":"masala","year":"2020","journal-title":"Proceedings of the 28th International Conference on Computational Linguistics"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref8","first-page":"173","article-title":"Deep speech 2: End-to-end speech recognition in english and mandarin","author":"amodei","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1992.225858"},{"key":"ref2","first-page":"374","article-title":"Distance measures for speech recognition, psychological and instrumental","volume":"116","author":"mermelstein","year":"1976","journal-title":"Pattern Recognition and Artificial Intelligence"},{"article-title":"The capio 2017 conversational speech recognition system","year":"2017","author":"han","key":"ref9"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MASSP.1986.1165342"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref22","article-title":"Srilm-an extensible language modeling toolkit","author":"stolcke","year":"2002","journal-title":"Seventh International Conference on Spoken Language Processing"},{"key":"ref21","first-page":"187","article-title":"Kenlm: Faster and smaller language model queries","author":"heafield","year":"2011","journal-title":"Proceedings of the Sixth Workshop on Statistical Machine Translation"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref23","first-page":"320","article-title":"100k+ words, machine-readable, pronunciation dictionary for the romanian language","author":"domokos","year":"2012","journal-title":"2012 Proceedings of the 20th European Signal Processing Conference (EUSIPCO)"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-04208-9_36"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"}],"event":{"name":"2021 International Conference on Speech Technology and Human-Computer Dialogue (SpeD)","start":{"date-parts":[[2021,10,13]]},"location":"Bucharest, Romania","end":{"date-parts":[[2021,10,15]]}},"container-title":["2021 International Conference on Speech Technology and Human-Computer Dialogue (SpeD)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9586791\/9587340\/09587345.pdf?arnumber=9587345","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T16:56:24Z","timestamp":1652201784000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9587345\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,13]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/sped53181.2021.9587345","relation":{},"subject":[],"published":{"date-parts":[[2021,10,13]]}}}