{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T07:10:59Z","timestamp":1775200259090,"version":"3.50.1"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,6]]},"DOI":"10.1109\/asru65441.2025.11434768","type":"proceedings-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:48:04Z","timestamp":1775159284000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["Graph Connectionist Temporal Classification for Phoneme Recognition"],"prefix":"10.1109","author":[{"given":"Henry","family":"Graf\u00e9","sequence":"first","affiliation":[{"name":"KU Leuven,Department of Electrical Engineering-ESAT,Belgium"}]},{"given":"Hugo Van","family":"Hamme","sequence":"additional","affiliation":[{"name":"KU Leuven,Department of Electrical Engineering-ESAT,Belgium"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"crossref","first-page":"141","DOI":"10.4324\/9780429320903-11","article-title":"The nature of phonetic transcription","author":"Cucchiarini","year":"2021","journal-title":"Manual of Clinical Phonetics"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2006-543"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2014-356"},{"issue":"5","key":"ref4","doi-asserted-by":"crossref","first-page":"1071","DOI":"10.1111\/1460-6984.12740","article-title":"I think that\u2019s what I heard? I\u2019m not sure: Speech and language therapists\u2019 views of, and practices in, phonetic transcription","volume":"57","author":"White","year":"2022","journal-title":"Int. J. Lang. Commun. Disord"},{"key":"ref5","first-page":"498","article-title":"Montreal Forced Aligner: Trainable Text-Speech Alignment Using Kaldi","volume-title":"Proc. Interspeech","author":"McAuliffe"},{"key":"ref6","volume-title":"The CMU Pronouncing Dictionary","author":"Weide","year":"1993"},{"key":"ref7","volume-title":"FONILEX Manual, version 1.0b","author":"Mertens","year":"1998"},{"key":"ref8","first-page":"369","article-title":"Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks","volume-title":"Proc. 23rd Int. Conf. Mach. Learn. (ICML)","author":"Graves"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414058"},{"key":"ref10","first-page":"13392","article-title":"Star Temporal Classification: Sequence Modeling with Partially Labeled Data","volume-title":"Advances in Neural Information Processing Systems 35 (NeurIPS 2022)","author":"Pratap"},{"key":"ref11","article-title":"W-CTC: a Connectionist Temporal Classification Loss with Wild Cards","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Cai"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2023-2258"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/taslp.2023.3306709"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/asru57964.2023.10389684"},{"key":"ref15","first-page":"6978","article-title":"Multiple-hypothesis CTC-based semi-supervised adaptation of end-to-end speech recognition","volume-title":"Proc. IEEE Int. Conf. Acoust., Speech, Signal Process. (ICASSP)","author":"Do"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/29.45616"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2016-595"},{"key":"ref18","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"33","author":"Baevski","year":"2020","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"ref19","first-page":"27826","article-title":"Unsupervised speech recognition","volume":"34","author":"Baevski","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref20","first-page":"8249","article-title":"Universal phone recognition with a multilingual allophone system","volume-title":"Proc. IEEE Int. Conf. Acoust., Speech, Signal Process. (ICASSP)","author":"Li"},{"key":"ref21","first-page":"2471","article-title":"Differentiable allophone graphs for language-universal speech recognition","volume-title":"Proc. Interspeech","author":"Yan"},{"key":"ref22","article-title":"Epitran: Precision G2P for Many Languages","volume-title":"Proc. Eleventh Int. Conf. Language Resources and Evaluation (LREC 2018)","author":"Mortensen"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(99)00034-5"},{"issue":"1","key":"ref24","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1006\/csla.2001.0184","article-title":"Weighted finite-state transducers in speech recognition","volume":"16","author":"Mohri","year":"2002","journal-title":"Computer Speech & Language"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/icassp49357.2023.10094567"},{"key":"ref26","first-page":"4218","article-title":"Common Voice: A Massively-Multilingual Speech Corpus","volume-title":"Proc. 12th Language Resources and Evaluation Conf. (LREC)","author":"Ardila"},{"key":"ref27","article-title":"DARPA TIMIT Acoustic-Phonetic Continuous Speech Corpus CD-ROM","volume-title":"NIST Speech Disc 1-1.1, NIST Interagency or Internal Report (NISTIR) 4930","author":"Garofolo","year":"1993"},{"key":"ref28","article-title":"CGN, an annotated corpus of spoken Dutch","volume-title":"Proceedings of the 4th International Workshop on Linguistically Interpreted Corpora (LINC-03) at EACL 2003","author":"Schuurman"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2008.01.002"},{"key":"ref30","first-page":"5036","article-title":"Conformer: Convolutionaugmented Transformer for Speech Recognition","volume-title":"Proc. Interspeech","author":"Gulati"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2018-1456"},{"key":"ref32","article-title":"Adam: A Method for Stochastic Optimization","volume-title":"Proc. 3rd Int. Conf. Learn. Representations (ICLR)","author":"Kingma"}],"event":{"name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,12,6]]},"end":{"date-parts":[[2025,12,10]]}},"container-title":["2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11434577\/11433836\/11434768.pdf?arnumber=11434768","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T04:59:34Z","timestamp":1775192374000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11434768\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/asru65441.2025.11434768","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]}}}