{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T22:41:41Z","timestamp":1759444901097,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030597153"},{"type":"electronic","value":"9783030597160"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-59716-0_45","type":"book-chapter","created":{"date-parts":[[2020,10,2]],"date-time":"2020-10-02T20:03:41Z","timestamp":1601669021000},"page":"473-482","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Ultra2Speech - A Deep Learning Framework for Formant Frequency Estimation and Tracking from Ultrasound Tongue Images"],"prefix":"10.1007","author":[{"given":"Pramit","family":"Saha","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yadong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bryan","family":"Gick","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sidney","family":"Fels","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,9,29]]},"reference":[{"key":"45_CR1","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"45_CR2","doi-asserted-by":"publisher","first-page":"3672","DOI":"10.21437\/Interspeech.2017-939","volume":"2017","author":"TG Csap\u00f3","year":"2017","unstructured":"Csap\u00f3, T.G., Gr\u00f3sz, T., Gosztolya, G., T\u00f3th, L., Mark\u00f3, A.: DNN-based ultrasound-to-speech conversion for a silent speech interface. Proc. Interspeech 2017, 3672\u20133676 (2017)","journal-title":"Proc. Interspeech"},{"key":"45_CR3","doi-asserted-by":"crossref","unstructured":"Dahl, G.E., Sainath, T.N., Hinton, G.E.: Improving deep neural networks for LVCSR using rectified linear units and dropout. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 8609\u20138613. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6639346"},{"key":"45_CR4","doi-asserted-by":"crossref","unstructured":"Denby, B., Oussar, Y., Dreyfus, G., Stone, M.: Prospects for a silent speech interface using ultrasound imaging. In: Proceedings of the 2006 IEEE International Conference on Acoustics Speech and Signal Processing, vol. 1, p. I. IEEE (2006)","DOI":"10.1109\/ICASSP.2006.1660033"},{"issue":"4","key":"45_CR5","doi-asserted-by":"publisher","first-page":"270","DOI":"10.1016\/j.specom.2009.08.002","volume":"52","author":"B Denby","year":"2010","unstructured":"Denby, B., Schultz, T., Honda, K., Hueber, T., Gilbert, J.M., Brumberg, J.S.: Silent speech interfaces. Speech Commun. 52(4), 270\u2013287 (2010)","journal-title":"Speech Commun."},{"key":"45_CR6","doi-asserted-by":"crossref","unstructured":"Denby, B., Stone, M.: Speech synthesis from real time ultrasound images of the tongue. In: 2004 IEEE International Conference on Acoustics, Speech, and Signal Processing, vol. 1, pp. I\u2013685. IEEE (2004)","DOI":"10.1109\/ICASSP.2004.1326078"},{"issue":"3","key":"45_CR7","doi-asserted-by":"publisher","first-page":"EL307","DOI":"10.1121\/1.4978364","volume":"141","author":"M Gilbert","year":"2017","unstructured":"Gilbert, M., et al.: Restoring speech following total removal of the larynx by a learned transformation from sensor data to acoustics. J. Acoust. Soc. Am. 141(3), EL307\u2013EL313 (2017)","journal-title":"J. Acoust. Soc. Am."},{"key":"45_CR8","doi-asserted-by":"crossref","unstructured":"Gosztolya, G., Pint\u00e9r, \u00c1., T\u00f3th, L., Gr\u00f3sz, T., Mark\u00f3, A., Csap\u00f3, T.G.: Autoencoder-based articulatory-to-acoustic mapping for ultrasound silent speech interfaces. In: 2019 International Joint Conference on Neural Networks (IJCNN), pp. 1\u20138. IEEE (2019)","DOI":"10.1109\/IJCNN.2019.8852153"},{"key":"45_CR9","doi-asserted-by":"crossref","unstructured":"Hueber, T., et al.: Eigentongue feature extraction for an ultrasound-based silent speech interface. In: 2007 IEEE International Conference on Acoustics, Speech and Signal Processing-ICASSP 2007, vol. 1, pp. I\u20131245. IEEE (2007)","DOI":"10.1109\/ICASSP.2007.366140"},{"key":"45_CR10","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. arXiv preprint arXiv:1502.03167 (2015)"},{"issue":"4","key":"45_CR11","doi-asserted-by":"publisher","first-page":"587","DOI":"10.3813\/AAA.919339","volume":"105","author":"EM Juanpere","year":"2019","unstructured":"Juanpere, E.M., Csap\u00f3, T.G.: Ultrasound-based silent speech interface using convolutional and recurrent neural networks. Acta Acust. United Acust. 105(4), 587\u2013590 (2019)","journal-title":"Acta Acust. United Acust."},{"issue":"3","key":"45_CR12","doi-asserted-by":"publisher","first-page":"971","DOI":"10.1121\/1.383940","volume":"67","author":"DH Klatt","year":"1980","unstructured":"Klatt, D.H.: Software for a cascade\/parallel formant synthesizer. J. Acoust. Soc. Am. 67(3), 971\u2013995 (1980)","journal-title":"J. Acoust. Soc. Am."},{"key":"45_CR13","doi-asserted-by":"crossref","unstructured":"Luo, C., Yuille, A.L.: Grouped spatial-temporal aggregation for efficient action recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5512\u20135521 (2019)","DOI":"10.1109\/ICCV.2019.00561"},{"key":"45_CR14","doi-asserted-by":"crossref","unstructured":"Mandal, M., Kumar, L.K., Saran, M.S., et al.: MotionRec: a unified deep framework for moving object recognition. In: The IEEE Winter Conference on Applications of Computer Vision, pp. 2734\u20132743 (2020)","DOI":"10.1109\/WACV45572.2020.9093324"},{"key":"45_CR15","series-title":"Springer Handbooks","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-540-49127-9_11","volume-title":"Springer Handbook of Speech Processing","author":"D O\u2019Shaughnessy","year":"2008","unstructured":"O\u2019Shaughnessy, D.: Formant estimation and tracking. In: Benesty, J., Sondhi, M.M., Huang, Y.A. (eds.) Springer Handbook of Speech Processing. SH, pp. 213\u2013228. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-49127-9_11"},{"key":"45_CR16","doi-asserted-by":"publisher","first-page":"1249","DOI":"10.21437\/Interspeech.2018-2537","volume":"2018","author":"P Saha","year":"2018","unstructured":"Saha, P., Srungarapu, P., Fels, S.: Towards automatic speech identification from vocal tract shape dynamics in real-time MRI. Proc. Interspeech 2018, 1249\u20131253 (2018)","journal-title":"Proc. Interspeech"},{"issue":"16","key":"45_CR17","doi-asserted-by":"publisher","first-page":"2841","DOI":"10.1016\/j.jbiomech.2012.08.031","volume":"45","author":"I Stavness","year":"2012","unstructured":"Stavness, I., Lloyd, J.E., Fels, S.: Automatic prediction of tongue muscle activations using a finite element model. J. Biomech. 45(16), 2841\u20132848 (2012)","journal-title":"J. Biomech."},{"key":"45_CR18","doi-asserted-by":"crossref","unstructured":"T\u00f3th, L., Gosztolya, G., Gr\u00f3sz, T., Mark\u00f3, A., Csap\u00f3, T.G.: Multi-task learning of speech recognition and speech synthesis parameters for ultrasound-based silent speech interfaces. In: Interspeech, pp. 3172\u20133176 (2018)","DOI":"10.21437\/Interspeech.2018-1078"},{"key":"45_CR19","doi-asserted-by":"crossref","unstructured":"Tran, D., Wang, H., Torresani, L., Ray, J., LeCun, Y., Paluri, M.: A closer look at spatiotemporal convolutions for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6450\u20136459 (2018)","DOI":"10.1109\/CVPR.2018.00675"},{"key":"45_CR20","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhou, X., Lin, M., Sun, J.: Shufflenet: An extremely efficient convolutional neural network for mobile devices. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6848\u20136856 (2018)","DOI":"10.1109\/CVPR.2018.00716"}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-59716-0_45","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T22:03:26Z","timestamp":1759442606000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-59716-0_45"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030597153","9783030597160"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-59716-0_45","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lima","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Peru","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.miccai2020.org\/en\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1809","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"542","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"30% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}