{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T20:46:52Z","timestamp":1769633212249,"version":"3.49.0"},"reference-count":51,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/access.2021.3064197","type":"journal-article","created":{"date-parts":[[2021,3,8]],"date-time":"2021-03-08T21:29:49Z","timestamp":1615238989000},"page":"39098-39110","source":"Crossref","is-referenced-by-count":3,"title":["Towards Constructing HMM Structure for Speech Recognition With Deep Neural Fenonic Baseform Growing"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0641-3178","authenticated-orcid":false,"given":"Lujun","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3552-3325","authenticated-orcid":false,"given":"Tobias","family":"Watzel","sequence":"additional","affiliation":[]},{"given":"Ludwig","family":"Kurzinger","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1096-1596","authenticated-orcid":false,"given":"Gerhard","family":"Rigoll","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"3935","article-title":"Enhancing the TED-LIUM corpus with selected data for language modeling and more TED talks","author":"rousseau","year":"2014","journal-title":"Proc LREC"},{"key":"ref38","doi-asserted-by":"crossref","first-page":"2345","DOI":"10.21437\/Interspeech.2013-548","article-title":"Sequence-discriminative training of deep neural networks","author":"vesel\u00fd","year":"2013","journal-title":"Proc INTERSPEECH"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2711"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2015.7333837"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2306423"},{"key":"ref30","first-page":"697","article-title":"Analyzing the information entropy of states to optimize the number of states in an HMM-based off-line handwritten Arabic word recognizer","author":"jiang","year":"2012","journal-title":"Proc Int Conf Pattern Recognit (ICPR)"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-26061-3_54"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1996.543258"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/89.242490"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1988.196628"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IWFHR.2002.1030938"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICFHR.2010.23"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2003.1227642"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2134090"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177699147"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-018-9535-4"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003972"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1417"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1121\/1.2003011"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053573"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2003.1227641"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1985.1164586"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref51","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"van der maaten","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref10","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref11","first-page":"491","article-title":"Jointly adversarial enhancement training for robust end-to-end speech recognition","author":"bin","year":"2019","journal-title":"Proc ISCA"},{"key":"ref40","article-title":"Timit acoustic phonetic continuous speech corpus","author":"garofolo","year":"1993","journal-title":"Linguistic Data Consortium"},{"key":"ref12","article-title":"RWTH ASR systems for LibriSpeech: Hybrid vs attention&#x2013;w\/o data augmentation","author":"l\u00fcscher","year":"2019","journal-title":"arXiv 1905 03072"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054345"},{"key":"ref15","first-page":"3586","article-title":"Audio augmentation for speech recognition","author":"ko","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref16","first-page":"3214","article-title":"A time delay neural network architecture for efficient modeling of long temporal contexts","author":"peddinti","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref17","article-title":"Deep recurrent neural networks for acoustic modelling","author":"chan","year":"2015","journal-title":"arXiv 1504 01482"},{"key":"ref18","article-title":"The IBM 2015 english conversational telephone speech recognition system","author":"saon","year":"2015","journal-title":"arXiv 1505 05899"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1460"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639347"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref8","first-page":"577","article-title":"Attention-based models for speech recognition","author":"chorowski","year":"2015","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref7","first-page":"1764","article-title":"Towards end-to-end speech recognition with recurrent neural networks","author":"graves","year":"2014","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/GlobalSIP.2017.8308665"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2004.02.005"},{"key":"ref45","first-page":"1","article-title":"The Kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"Proc IEEE Workshop Autom Speech Recog and Understanding"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-68456-7_17"},{"key":"ref47","article-title":"Speech and speaker recognition from raw waveform with SincNet","author":"ravanelli","year":"2018","journal-title":"arXiv 1812 05920"},{"key":"ref42","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"arXiv 1502 03167"},{"key":"ref41","doi-asserted-by":"crossref","first-page":"361","DOI":"10.1007\/BF01071812","article-title":"Element-wise recognition of continuous speech composed of words from a specified dictionary","volume":"7","author":"vintsyuk","year":"1971","journal-title":"Cybernetics"},{"key":"ref44","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"arXiv 1412 6980"},{"key":"ref43","first-page":"1929","article-title":"Dropout: A simple way to prevent neural networks from overfitting","volume":"15","author":"srivastava","year":"2014","journal-title":"J Mach Learn Res"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9312710\/09371697.pdf?arnumber=9371697","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T17:22:01Z","timestamp":1724606521000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9371697\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":51,"URL":"https:\/\/doi.org\/10.1109\/access.2021.3064197","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}