{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T22:30:21Z","timestamp":1781649021415,"version":"3.54.5"},"reference-count":52,"publisher":"IEEE","funder":[{"DOI":"10.13039\/501100000582","name":"Carnegie Trust for the Universities of Scotland","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000582","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,19]]},"DOI":"10.1109\/slt48900.2021.9383619","type":"proceedings-article","created":{"date-parts":[[2021,3,25]],"date-time":"2021-03-25T20:46:54Z","timestamp":1616705214000},"page":"1109-1116","source":"Crossref","is-referenced-by-count":26,"title":["Tal: A Synchronised Multi-Speaker Corpus of Ultrasound Tongue Imaging, Audio, and Lip Videos"],"prefix":"10.1109","author":[{"given":"Manuel Sam","family":"Ribeiro","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jennifer","family":"Sanger","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jing-Xuan","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Aciel","family":"Eshky","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alan","family":"Wrench","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Korin","family":"Richmond","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Steve","family":"Renals","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","author":"fairbanks","year":"1940","journal-title":"Voice and Articulation Drillbook"},{"key":"ref38","author":"weinberger","year":"2015","journal-title":"Speech Accent Archive"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.367"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1121\/1.2229005"},{"key":"ref31","first-page":"965","article-title":"XM2VTSDB: The extended M2VTS database","volume":"964","author":"messer","year":"1999","journal-title":"Second International Conference on Audio and Video-based Biometric Person Authentication"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/34.982900"},{"key":"ref37","year":"2010","journal-title":"SyncBrightUp Users Manual Revision 1 10"},{"key":"ref36","year":"2010","journal-title":"Articulate Assistant User Guide Version 2 11"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-995"},{"key":"ref34","article-title":"LRS3-TED: a large-scale dataset for visual speech recognition","author":"afouras","year":"2018"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1736"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2011-410","article-title":"Recognition and real time performances of a lightweight ultrasound based silent speech interface employing a language model","author":"cai","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3015783.3015797"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejmp.2014.05.001"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2752365"},{"key":"ref20","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2011-239","article-title":"Statistical mapping between articulatory and acoustic data for an ultrasound-based silent speech interface","author":"hueber","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1038\/264746a0"},{"key":"ref21","first-page":"1","article-title":"DNN-based acoustic-to-articulatory in-version using ultrasound tongue imaging","author":"porras","year":"2019","journal-title":"International Joint Conference on Neural Networks (IJCNN)"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2018.07.002"},{"key":"ref23","first-page":"251","article-title":"Out of time: automated lip sync in the wild","author":"chung","year":"2016","journal-title":"Asian Conference on Computer Vision"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953127"},{"key":"ref25","article-title":"Deep audio-visual speech recognition","author":"afouras","year":"2018","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1075\/sibil.36.15gic"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.08.002"},{"key":"ref40","article-title":"IEEE recommended practice for speech quality measurements","volume":"297","year":"1969","journal-title":"IEEE"},{"key":"ref12","article-title":"Tongue tracking in ultrasound images using eigentongue decomposition and artificial neural networks","author":"fabre","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2017.08.002"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1804"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2612"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.11.004"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2018.02.002"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2004.1326078"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-939"},{"key":"ref4","article-title":"Seeing speech: an articulatory web resource for the study of phonetics [website]","author":"lawson","year":"2015"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1080\/02699200500113558"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.3109\/02699206.2015.1016188"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683564"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1111\/1460-6984.12478"},{"key":"ref7","article-title":"Enabling new articulatory gestures in children with persistent speech sound disorders using ultrasound visual biofeedback","author":"cleland","year":"2018","journal-title":"Journal of Speech Language and Hearing Research"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(98)00085-5"},{"key":"ref9","first-page":"148","article-title":"Ultrasound technology and second language acquisition research","author":"wilson","year":"2006","journal-title":"Proc of the 8th Generative Approaches to Second Language Acquisition Conference (GASLA)"},{"key":"ref46","article-title":"Improved feature processing for deep neural networks","author":"rath","year":"2013","journal-title":"Proc INTERSPEECH"},{"key":"ref45","article-title":"The Kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952701"},{"key":"ref47","article-title":"A time delay neural network architecture for efficient modeling of long temporal contexts","author":"peddinti","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref42","author":"yamagishi","year":"2019","journal-title":"CSTR VCTK corpus English multi-speaker corpus for cstr voice cloning toolkit"},{"key":"ref41","article-title":"TIMIT acoustic phonetic continuous speech corpus","author":"garofolo","year":"1993","journal-title":"Linguistic Data Consortium"},{"key":"ref44","article-title":"Software tools for speech and audio coding standardization","year":"2010"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"}],"event":{"name":"2021 IEEE Spoken Language Technology Workshop (SLT)","location":"Shenzhen, China","start":{"date-parts":[[2021,1,19]]},"end":{"date-parts":[[2021,1,22]]}},"container-title":["2021 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9383468\/9383452\/09383619.pdf?arnumber=9383619","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,24]],"date-time":"2023-10-24T00:22:31Z","timestamp":1698106951000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9383619\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,19]]},"references-count":52,"URL":"https:\/\/doi.org\/10.1109\/slt48900.2021.9383619","relation":{},"subject":[],"published":{"date-parts":[[2021,1,19]]}}}