{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T10:48:49Z","timestamp":1743072529321,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030835262"},{"type":"electronic","value":"9783030835279"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-83527-9_43","type":"book-chapter","created":{"date-parts":[[2021,8,29]],"date-time":"2021-08-29T23:04:59Z","timestamp":1630278299000},"page":"499-510","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Voice Activity Detection for Ultrasound-Based Silent Speech Interfaces Using Convolutional Neural Networks"],"prefix":"10.1007","author":[{"given":"Amin","family":"Honarmandi Shandiz","sequence":"first","affiliation":[]},{"given":"L\u00e1szl\u00f3","family":"T\u00f3th","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,8,30]]},"reference":[{"key":"43_CR1","unstructured":"WebRtc voice activity detection (1999). https:\/\/webrtc.org"},{"issue":"3","key":"43_CR2","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1109\/TASSP.1976.1162800","volume":"24","author":"B Atal","year":"1976","unstructured":"Atal, B., Rabiner, L.: A pattern recognition approach to voiced-unvoiced-silence classification with applications to speech recognition. IEEE Trans. Acoust. Speech Signal Process. 24(3), 201\u2013212 (1976)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"43_CR3","doi-asserted-by":"crossref","unstructured":"Benyassine, A., Shlomot, E., Su, H., Massaloux, D., Lamblin, C., Petit, J.: A silence compression scheme for use with G. 729 optimized for V. 70 digital simultaneous voice and data applications (recommendation G. 729 annex B). IEEE Commun. Mag. 35(9), 64\u201373 (1997)","DOI":"10.1109\/35.620527"},{"key":"43_CR4","unstructured":"Bradbury, J., Merity, S., Xiong, C., Socher, R.: Quasi-recurrent neural networks. arXiv preprint arXiv:1611.01576 (2016)"},{"key":"43_CR5","doi-asserted-by":"crossref","unstructured":"Csap\u00f3, T.G., Zaink\u00f3, C., T\u00f3th, L., Gosztolya, G., Mark\u00f3, A.: Ultrasound-based articulatory-to-acoustic mapping with WaveGlow speech synthesis. In: Proceedings of the Interspeech 2020, pp. 2727\u20132731 (2020)","DOI":"10.21437\/Interspeech.2020-1031"},{"key":"43_CR6","doi-asserted-by":"crossref","unstructured":"Deng, H., O\u2019Shaughnessy, D.: Voiced-unvoiced-silence speech sound classification based on unsupervised learning. In: 2007 IEEE International Conference on Multimedia and Expo, pp. 176\u2013179. IEEE (2007)","DOI":"10.1109\/ICME.2007.4284615"},{"key":"43_CR7","unstructured":"Haigh, J., Mason, J.: Robust voice activity detection using cepstral features. In: Proceedings of TENCon 1993. IEEE Region 10 International Conference on Computers, Communications and Automation, vol. 3, pp. 321\u2013324. IEEE (1993)"},{"key":"43_CR8","doi-asserted-by":"crossref","unstructured":"Honarmandi Shandiz, A., T\u00f3th, L., Gosztolya, G., Mark\u00f3, A., G\u00e1bor Csap\u00f3, T.: Improving neural silent speech interface models by adversarial training. arXiv e-prints pp. arXiv-2104 (2021)","DOI":"10.1007\/978-3-030-76346-6_39"},{"issue":"1","key":"43_CR9","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2012","unstructured":"Ji, S., Xu, W., Yang, M., Yu, K.: 3d convolutional neural networks for human action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 35(1), 221\u2013231 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"43_CR10","unstructured":"Kominek, J., Schultz, T., Black, A.: Synthesizer voice quality of new languages calibrated with mean cepstral distortion. In: Proceedings of the SLT, pp. 63\u201368 (2008)"},{"key":"43_CR11","first-page":"1097","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. Adv. Neural. Inf. Process. Syst. 25, 1097\u20131105 (2012)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"43_CR12","doi-asserted-by":"crossref","unstructured":"Kubichek, R.: Mel-cepstral distance measure for objective speech quality assessment. In: Proceedings of the Pacific Rim Conference, pp. 125\u2013128 (1993)","DOI":"10.1109\/PACRIM.1993.407206"},{"key":"43_CR13","doi-asserted-by":"crossref","unstructured":"Lokhande, N.N., Nehe, N.S., Vikhe, P.S.: Voice activity detection algorithm for speech recognition applications. In: IJCA Proceedings on International Conference in Computational Intelligence (ICCIA 2012), pp. 1\u20134, no. 6 (2012)","DOI":"10.1109\/INDCON.2012.6420726"},{"key":"43_CR14","doi-asserted-by":"crossref","unstructured":"Moattar, M.H., Homayounpour, M.M., Kalantari, N.K.: A new approach for robust realtime voice activity detection using spectral pattern. In: 2010 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 4478\u20134481. IEEE (2010)","DOI":"10.1109\/ICASSP.2010.5495597"},{"key":"43_CR15","doi-asserted-by":"crossref","unstructured":"Mondal, S., Barman, A.D.: Clustering based voiced-unvoiced-silence detection in speech using temporal and spectral parameters. In: 2015 IEEE International Conference on Research in Computational Intelligence and Communication Networks (ICRCICN), pp. 390\u2013394. IEEE (2015)","DOI":"10.1109\/ICRCICN.2015.7434270"},{"key":"43_CR16","unstructured":"Nirmalkar, B., Kumar, S.: Voiced\/unvoiced classification by hybrid method based on cepstrum and EMD (2016)"},{"key":"43_CR17","doi-asserted-by":"crossref","unstructured":"Prenger, R., Valle, R., Catanzaro, B.: WaveGlow: a flow-based generative network for speech synthesis. In: Proceedings of the ICASSP, pp. 3617\u20133621 (2019)","DOI":"10.1109\/ICASSP.2019.8683143"},{"key":"43_CR18","unstructured":"Qi, F., Bao, C., Liu, Y.: A novel two-step SVM classifier for voiced\/unvoiced\/silence classification of speech. In: 2004 International Symposium on Chinese Spoken Language Processing, pp. 77\u201380. IEEE (2004)"},{"issue":"2","key":"43_CR19","doi-asserted-by":"publisher","first-page":"250","DOI":"10.1109\/89.222883","volume":"1","author":"Y Qi","year":"1993","unstructured":"Qi, Y., Hunt, B.R.: Voiced-unvoiced-silence classifications of speech using hybrid features and a network classifier. IEEE Trans. Speech Audio Process. 1(2), 250\u2013255 (1993)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"43_CR20","unstructured":"Rabiner, L.R., Schafer, R.W., et al.: Digital Processing of Speech Signals. Prentice-Hall, Englewood Cliffs (1978)"},{"key":"43_CR21","doi-asserted-by":"crossref","unstructured":"Ribeiro, M.S., et al.: TaL: a synchronised multi-speaker corpus of ultrasound tongue imaging, audio, and lip videos. arXiv preprint arXiv:2011.09804 (2020)","DOI":"10.1109\/SLT48900.2021.9383619"},{"key":"43_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"473","DOI":"10.1007\/978-3-030-59716-0_45","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2020","author":"P Saha","year":"2020","unstructured":"Saha, P., Liu, Y., Gick, B., Fels, S.: Ultra2Speech - a deep learning framework for formant frequency estimation and tracking from ultrasound tongue images. In: Martel, A.L., et al. (eds.) MICCAI 2020. LNCS, vol. 12263, pp. 473\u2013482. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-59716-0_45"},{"issue":"8","key":"43_CR23","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"J Schmidhuber","year":"1997","unstructured":"Schmidhuber, J., Hochreiter, S.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"43_CR24","doi-asserted-by":"crossref","unstructured":"Tatulli, E., Hueber, T.: Feature extraction using multimodal convolutional neural networks for visual speech recognition. In: Proceedings of ICASSP, pp. 2971\u20132975 (2017)","DOI":"10.1109\/ICASSP.2017.7952701"},{"key":"43_CR25","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"159","DOI":"10.1007\/978-3-030-61401-0_16","volume-title":"Artificial Intelligence and Soft Computing","author":"L T\u00f3th","year":"2020","unstructured":"T\u00f3th, L., Shandiz, A.H.: 3D convolutional neural networks for ultrasound-based silent speech interfaces. In: Rutkowski, L., Scherer, R., Korytkowski, M., Pedrycz, W., Tadeusiewicz, R., Zurada, J.M. (eds.) ICAISC 2020. LNCS (LNAI), vol. 12415, pp. 159\u2013169. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-61401-0_16"},{"key":"43_CR26","doi-asserted-by":"crossref","unstructured":"Verteletskaya, E., Sakhnov, K.: Voice activity detection for speech enhancement applications. Acta Polytechnica 50(4) (2010)","DOI":"10.14311\/1251"},{"key":"43_CR27","doi-asserted-by":"crossref","unstructured":"Yu, Y., Shandiz, A.H., T\u00f3th, L.: Reconstructing speech from real-time articulatory MRI using neural vocoders. arXiv preprint arXiv:2104.11598 (2021)","DOI":"10.23919\/EUSIPCO54536.2021.9616153"},{"issue":"8","key":"43_CR28","doi-asserted-by":"publisher","first-page":"1839","DOI":"10.1109\/TCSVT.2017.2682196","volume":"28","author":"S Zhao","year":"2017","unstructured":"Zhao, S., Liu, Y., Han, Y., Hong, R., Hu, Q., Tian, Q.: Pooling the convolutional layers in deep convnets for video action recognition. IEEE Trans. Circuits Syst. Video Technol. 28(8), 1839\u20131849 (2017)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-83527-9_43","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T10:58:56Z","timestamp":1725706736000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-83527-9_43"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030835262","9783030835279"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-83527-9_43","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"30 August 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TSD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Text, Speech, and Dialogue","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Olomouc","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tsd2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.kiv.zcu.cz\/tsd2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"TSDEngine 3.2","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"101","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2,93","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}