{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T19:34:19Z","timestamp":1768073659033,"version":"3.49.0"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T00:00:00Z","timestamp":1752451200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T00:00:00Z","timestamp":1752451200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100007601","name":"Horizon 2020","doi-asserted-by":"publisher","award":["101003750"],"award-info":[{"award-number":["101003750"]}],"id":[{"id":"10.13039\/501100007601","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010198","name":"Ministerio de Asuntos Econ\u00f3micos y Transformaci\u00f3n Digital, Gobierno de Espa\u00f1a","doi-asserted-by":"publisher","award":["PID2021-128469OB-I00"],"award-info":[{"award-number":["PID2021-128469OB-I00"]}],"id":[{"id":"10.13039\/501100010198","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J AUDIO SPEECH MUSIC PROC."],"DOI":"10.1186\/s13636-025-00414-5","type":"journal-article","created":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T16:50:25Z","timestamp":1752511825000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Parameter optimisation for a physical model of the vocal system"],"prefix":"10.1186","volume":"2025","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1055-6315","authenticated-orcid":false,"given":"Mateo","family":"C\u00e1mara","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jos\u00e9 Luis","family":"Blanco","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joshua D.","family":"Reiss","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,7,14]]},"reference":[{"key":"414_CR1","first-page":"173","volume-title":"in Konferenz Elektronische Sprachsignalverarbeitung, Articulatory speech synthesis in the context of speech research and speech technology: Review and prospect","author":"BJ Kr\u00f6ger","year":"2023","unstructured":"B.J. Kr\u00f6ger, in Konferenz Elektronische Sprachsignalverarbeitung, Articulatory speech synthesis in the context of speech research and speech technology: Review and prospect (TUDpress, Dresden, 2023), pp.173\u2013180"},{"key":"414_CR2","doi-asserted-by":"publisher","first-page":"95008","DOI":"10.1109\/ACCESS.2022.3204816","volume":"10","author":"P Birkholz","year":"2022","unstructured":"P. Birkholz, S. Ossmann, R. Blandin, A. Wilbrandt, P.K. Krug, M. Fleischer, Modeling speech sound radiation with different degrees of realism for articulatory synthesis. IEEE Access 10, 95008\u201395019 (2022)","journal-title":"IEEE Access"},{"key":"414_CR3","doi-asserted-by":"publisher","first-page":"796739","DOI":"10.3389\/frobt.2022.796739","volume":"9","author":"BJ Kr\u00f6ger","year":"2022","unstructured":"B.J. Kr\u00f6ger, Computer-implemented articulatory models for speech production: A review. Front. Robot. AI 9, 796739 (2022)","journal-title":"Front. Robot. AI"},{"key":"414_CR4","unstructured":"BBC, The BBC Year Book 1931. Chapter \u201cThe Use of Sound Effects\u201d 194\u2013197, Editorial: British Broadcasting Corporation, Savoy Hill, London. (1931)"},{"key":"414_CR5","unstructured":"M.M. Afsar, et al., Generating diverse realistic laughter for interactive art (2021), arXiv preprint arXiv:2111.03146"},{"key":"414_CR6","doi-asserted-by":"publisher","first-page":"778","DOI":"10.3758\/s13428-018-1095-7","volume":"51","author":"A Anikin","year":"2019","unstructured":"A. Anikin, Soundgen: An open-source tool for synthesizing nonverbal vocalizations. Behav. Res. Methods 51, 778\u2013792 (2019)","journal-title":"Behav. Res. Methods"},{"key":"414_CR7","unstructured":"K.\u00a0Richmond, Estimating articulatory parameters from the acoustic speech signal (Ph.D. thesis, University of Edinburgh, 2002)"},{"key":"414_CR8","doi-asserted-by":"crossref","unstructured":"Q.\u00a0Fang, in Proceedings of the 2024 International Conference on Generative Artificial Intelligence and Information Security, On the performance of ema-synchronized speech and stand-alone speech in speech recognition and acoustic-to-articulatory inversion,\u00a0Association for Computing Machinery, New York. pp. 162\u2013166\u00a0(2024)","DOI":"10.1145\/3665348.3665377"},{"key":"414_CR9","doi-asserted-by":"crossref","unstructured":"S.\u00a0Azzouz, P.A. Vuissoz, Y.\u00a0Laprie, Complete reconstruction of the tongue contour through acoustic to articulatory inversion using real-time mri data (2024). arXiv preprint arXiv:2411.02037","DOI":"10.1109\/ICASSP49660.2025.10888721"},{"issue":"1","key":"414_CR10","doi-asserted-by":"publisher","first-page":"537","DOI":"10.1121\/1.415960","volume":"100","author":"BH Story","year":"1996","unstructured":"B.H. Story, I.R. Titze, E.A. Hoffman, Vocal tract area functions from magnetic resonance imaging. J. Acoust. Soc. Am. 100(1), 537\u2013554 (1996)","journal-title":"J. Acoust. Soc. Am."},{"key":"414_CR11","doi-asserted-by":"crossref","unstructured":"A.\u00a0Toutios, S.S. Narayanan, in INTERSPEECH, Articulatory synthesis of french connected speech from ema data,\u00a0Lyon. pp. 2738\u20132742\u00a0(2013)","DOI":"10.21437\/Interspeech.2013-628"},{"issue":"5","key":"414_CR12","doi-asserted-by":"publisher","first-page":"1535","DOI":"10.1121\/1.381848","volume":"63","author":"BS Atal","year":"1978","unstructured":"B.S. Atal et al., Inversion of articulatory-to-acoustic transformation in the vocal tract by a computer-sorting technique. J. Acoust. Soc. Am. 63(5), 1535\u20131555 (1978)","journal-title":"J. Acoust. Soc. Am."},{"key":"414_CR13","unstructured":"K.N. Stevens, Remarks on analysis by synthesis and distinctive features. Models for the perception of speech and visual form (1967)"},{"key":"414_CR14","unstructured":"M. C\u00e1mara, et al., in Proceedings of the 26th international conference on digital audio effects, Optimization techniques for a physical model of human vocalisation,\u00a0Denmark. pp 29-36 (2023)"},{"key":"414_CR15","unstructured":"N.\u00a0Thapen. Pink trombone (2017)"},{"key":"414_CR16","doi-asserted-by":"crossref","unstructured":"P. Birkholz. Modeling consonant-vowel coarticulation for articulatory speech synthesis. PloS one 8(4), e60603 (2013)","DOI":"10.1371\/journal.pone.0060603"},{"key":"414_CR17","doi-asserted-by":"crossref","unstructured":"S.\u00a0Maeda, Compensatory articulation during speech: Evidence from the analysis and synthesis of vocal-tract shapes using an articulatory model. Speech Prod. Speech Model. 55:131\u2013149 (1990)","DOI":"10.1007\/978-94-009-2037-8_6"},{"issue":"4","key":"414_CR18","doi-asserted-by":"publisher","first-page":"1070","DOI":"10.1121\/1.1913427","volume":"53","author":"P Mermelstein","year":"1973","unstructured":"P. Mermelstein, Articulatory model for the study of speech production. J. Acoust. Soc. Am. 53(4), 1070\u20131082 (1973)","journal-title":"J. Acoust. Soc. Am."},{"issue":"4","key":"414_CR19","doi-asserted-by":"publisher","first-page":"2144","DOI":"10.1121\/1.3514544","volume":"129","author":"S Panchapagesan","year":"2011","unstructured":"S. Panchapagesan, A. Alwan, A study of acoustic-to-articulatory inversion of speech by analysis-by-synthesis using chain matrices and the maeda articulatory model. J. Acoust. Soc. Am. 129(4), 2144\u20132162 (2011)","journal-title":"J. Acoust. Soc. Am."},{"key":"414_CR20","unstructured":"D.\u00a0S\u00fcdholt, et al., in Proceedings of the 20th international conference on digital audio effects, Vocal tract area estimation by gradient descent,\u00a0Denmark. (2017)"},{"key":"414_CR21","doi-asserted-by":"crossref","unstructured":"Y.\u00a0Laprie, M.\u00a0Loosvelt, S.\u00a0Maeda, R.\u00a0Sock, F.\u00a0Hirsch, in InterSpeech-14th Annual Conference of the International Speech Communication Association-2013, Articulatory copy synthesis from cine X-ray films,\u00a0Association for Computing Machinery, New York. (2013)","DOI":"10.21437\/Interspeech.2013-480"},{"issue":"3","key":"414_CR22","doi-asserted-by":"publisher","first-page":"511","DOI":"10.1006\/jpho.2002.0167","volume":"30","author":"J Dang","year":"2002","unstructured":"J. Dang, K. Honda, Estimation of vocal tract shapes from speech sounds with a physiological articulatory model. J. Phon. 30(3), 511\u2013532 (2002)","journal-title":"J. Phon."},{"key":"414_CR23","doi-asserted-by":"crossref","unstructured":"S.\u00a0Prom-on, P.\u00a0Birkholz, Y.\u00a0Xu, in INTERSPEECH, Training an articulatory synthesizer with continuous acoustic data,\u00a0Association for Computing Machinery, New York. pp. 349\u2013353\u00a0(2013)","DOI":"10.21437\/Interspeech.2013-98"},{"key":"414_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1687-4722-2014-23","volume":"2014","author":"S Prom-on","year":"2014","unstructured":"S. Prom-on, P. Birkholz, Y. Xu, Identifying underlying articulatory targets of thai vowels from acoustic data based on an analysis-by-synthesis approach. EURASIP J. Audio Speech Music Process. 2014, 1\u201311 (2014)","journal-title":"EURASIP J. Audio Speech Music Process."},{"key":"414_CR25","doi-asserted-by":"crossref","unstructured":"S.\u00a0Fairee, B.\u00a0Sirinaovakul, S.\u00a0Prom-on, in 2015 12th International Conference on Electrical Engineering\/Electronics, Computer, Telecommunications and Information Technology (ECTI-CON), Acoustic-to-articulatory inversion using particle swarm optimization (IEEE, 2015), pp. 1\u20136","DOI":"10.1109\/ECTICon.2015.7206999"},{"key":"414_CR26","doi-asserted-by":"crossref","unstructured":"Y.\u00a0Gao, S.\u00a0Stone, P.\u00a0Birkholz, in INTERSPEECH, Articulatory copy synthesis based on a genetic algorithm,\u00a0Graz. pp. 3770\u20133774\u00a0(2019)","DOI":"10.21437\/Interspeech.2019-1334"},{"key":"414_CR27","doi-asserted-by":"crossref","unstructured":"Y. Gao, P. Birkholz, Y. Li, Articulatory copy synthesis based on the speech synthesizer vocaltractlab and convolutional recurrent neural networks. IEEE\/ACM Trans. Audio Speech Lang. Process. 32 pp. 1845\u20131858\u00a0(2024)","DOI":"10.1109\/TASLP.2024.3372874"},{"key":"414_CR28","unstructured":"Y.\u00a0Sun, X.\u00a0Wu, Embodied self-supervised learning by coordinated sampling and training (2020). arXiv preprint arXiv:2006.13350"},{"key":"414_CR29","unstructured":"L. Manzara, The tube resonance model speech synthesizer. Master's thesis, University of Calgary (2009)"},{"key":"414_CR30","doi-asserted-by":"crossref","unstructured":"S.\u00a0Aryal, R.\u00a0Gutierrez-Osuna, in 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, Articulatory inversion and synthesis: towards articulatory-based modification of speech (IEEE, 2013), pp. 7952\u20137956","DOI":"10.1109\/ICASSP.2013.6639213"},{"issue":"6","key":"414_CR31","doi-asserted-by":"publisher","first-page":"3980","DOI":"10.1121\/1.4763545","volume":"132","author":"H Nam","year":"2012","unstructured":"H. Nam, V. Mitra, M. Tiede, M. Hasegawa-Johnson, C. Espy-Wilson, E. Saltzman, L. Goldstein, A procedure for estimating gestural scores from speech acoustics. J. Acoust. Soc. Am. 132(6), 3980\u20133989 (2012)","journal-title":"J. Acoust. Soc. Am."},{"key":"414_CR32","unstructured":"L.\u00a0Mo, M.\u00a0Cherep, N.\u00a0Singh, Q.\u00a0Langford, P.\u00a0Maes, in Audio Imagination: NeurIPS 2024 Workshop AI-Driven Speech, Music, and Sound Generation, Articulatory synthesis of speech and diverse vocal sounds via optimization,\u00a0Vancouver. (2024)"},{"key":"414_CR33","doi-asserted-by":"crossref","unstructured":"P.\u00a0Saha, et al., Sound stream: Towards vocal sound synthesis via dual-handed simultaneous control of articulatory parameters. J. Acoust. Soc. Am. 144(3_Supplement), 1907 (2018)","DOI":"10.1121\/1.5068362"},{"key":"414_CR34","doi-asserted-by":"crossref","unstructured":"P.\u00a0Saha, S.\u00a0Fels, Learning Joint Articulatory-Acoustic Representations with Normalizing Flows (2020). arXiv e-prints arXiv:2005.09463","DOI":"10.21437\/Interspeech.2020-2004"},{"key":"414_CR35","unstructured":"J.L.J. Kelly, C.\u00a0Lochbaum, Speech synthesis. Proc. Fourth Int. Congr. Acoust. 1\u20134 (1962)"},{"issue":"5","key":"414_CR36","doi-asserted-by":"publisher","first-page":"3231","DOI":"10.1121\/1.1869752","volume":"117","author":"BH Story","year":"2005","unstructured":"B.H. Story, A parametric model of the vocal tract area function for vowel and consonant simulation. J. Acoust. Soc. Am. 117(5), 3231\u20133254 (2005)","journal-title":"J. Acoust. Soc. Am."},{"key":"414_CR37","unstructured":"U.G. Goldstein, An articulatory model for the vocal tracts of growing children (Ph.D. thesis, Massachusetts Institute of Technology, 1980)"},{"key":"414_CR38","unstructured":"ISO 8253-1:2010, Acoustics\u2014Audiometric test methods. Part 1: Pure-tone air and bone conduction audiometry (2010)"},{"issue":"4B","key":"414_CR39","doi-asserted-by":"publisher","first-page":"1002","DOI":"10.1121\/1.1910429","volume":"41","author":"MR Schroeder","year":"1967","unstructured":"M.R. Schroeder, Determination of the geometry of the human vocal tract by acoustic measurements. J. Acoust. Soc. Am. 41(4B), 1002\u20131010 (1967)","journal-title":"J. Acoust. Soc. Am."},{"key":"414_CR40","doi-asserted-by":"crossref","unstructured":"K.\u00a0He, X.\u00a0Zhang, S.\u00a0Ren, J.\u00a0Sun, in Proceedings of the IEEE conference on computer vision and pattern recognition, Deep residual learning for image recognition,\u00a0Las Vegas. pp. 770\u2013778\u00a0(2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"414_CR41","doi-asserted-by":"crossref","unstructured":"Y.\u00a0Ma, Z.\u00a0Ren, S.\u00a0Xu, RW-Resnet: A novel speech anti-spoofing model using raw waveform (2021). arXiv preprint arXiv:2108.05684","DOI":"10.21437\/Interspeech.2021-438"},{"key":"414_CR42","unstructured":"D.P. Kingma, J.\u00a0Ba, Adam: A method for stochastic optimization (2014). arXiv preprint arXiv:1412.6980"},{"key":"414_CR43","doi-asserted-by":"crossref","unstructured":"A.\u00a0Auger, N.\u00a0Hansen, in 2005 IEEE congress on evolutionary computation, vol.\u00a02, A restart CMA evolution strategy with increasing population size (IEEE, 2005), pp. 1769\u20131776","DOI":"10.1109\/CEC.2005.1554902"},{"issue":"2","key":"414_CR44","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1109\/TETCI.2017.2783885","volume":"2","author":"MJ Yee-King","year":"2018","unstructured":"M.J. Yee-King et al., Automatic programming of vst sound synthesizers using deep networks and other techniques. IEEE Trans. Emerg. Top. Comput. Intel. 2(2), 150\u2013159 (2018)","journal-title":"IEEE Trans. Emerg. Top. Comput. Intel."},{"key":"414_CR45","unstructured":"S.\u00a0Ruder, An overview of gradient descent optimization algorithms (2016). arXiv preprint arXiv:1609.04747"},{"key":"414_CR46","doi-asserted-by":"crossref","unstructured":"M.\u00a0Mauch, S.\u00a0Dixon, in IEEE international conference on acoustics, speech and signal processing (ICASSP), pYIN: A fundamental frequency estimator using probabilistic threshold distributions,\u00a0Florence. pp. 659\u2013663\u00a0(2014)","DOI":"10.1109\/ICASSP.2014.6853678"},{"key":"414_CR47","unstructured":"G.\u00a0Fant, The LF-model revisited. Transformations and frequency domain analysis. STL-QPSR 2(3),\u00a0p. 40\u00a0(1995)"},{"key":"414_CR48","doi-asserted-by":"crossref","unstructured":"M.\u00a0Chinen, et al., in twelfth international conference on quality of multimedia experience (QoMEX), ViSQOL v3: An open source production ready objective speech and audio metric (IEEE, 2020), pp. 1\u20136","DOI":"10.1109\/QoMEX48832.2020.9123150"},{"key":"414_CR49","unstructured":"ITU-T. P.862 \u2013 perceptual evaluation of speech quality (PESQ): an objective method for end-to-end speech quality assessment of narrow-band telephone networks and speech codecs (2001)"},{"key":"414_CR50","doi-asserted-by":"crossref","unstructured":"D.\u00a0Barry, et al., Go listen: an end-to-end online listening test platform. J Open Res. Softw. 9(1), p. 20 (2021)","DOI":"10.5334\/jors.361"},{"issue":"1","key":"414_CR51","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1186\/s13636-015-0054-9","volume":"2015","author":"A Hines","year":"2015","unstructured":"A. Hines, J. Skoglund, A.C. Kokaram, N. Harte, ViSQOL: an objective speech quality model. EURASIP J. Audio Speech Music Process. 2015(1), 13 (2015). https:\/\/doi.org\/10.1186\/s13636-015-0054-9","journal-title":"EURASIP J. Audio Speech Music Process."},{"key":"414_CR52","doi-asserted-by":"crossref","unstructured":"C.J. Cho, P.\u00a0Wu, T.S. Prabhune, D.\u00a0Agarwal, G.K. Anumanchipalli, Coding speech through vocal tract kinematics. IEEE J. Sel. Top. Signal Process.\u00a018(8), pp. 1427\u20131440 (2024)","DOI":"10.1109\/JSTSP.2024.3497655"}],"container-title":["EURASIP Journal on Audio, Speech, and Music Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-025-00414-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s13636-025-00414-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-025-00414-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T09:19:25Z","timestamp":1757236765000},"score":1,"resource":{"primary":{"URL":"https:\/\/asmp-eurasipjournals.springeropen.com\/articles\/10.1186\/s13636-025-00414-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,14]]},"references-count":52,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["414"],"URL":"https:\/\/doi.org\/10.1186\/s13636-025-00414-5","relation":{},"ISSN":["1687-4722"],"issn-type":[{"value":"1687-4722","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,14]]},"assertion":[{"value":"18 January 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 June 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 July 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"27"}}