{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T15:49:52Z","timestamp":1743004192187,"version":"3.40.3"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030583224"},{"type":"electronic","value":"9783030583231"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58323-1_40","type":"book-chapter","created":{"date-parts":[[2020,9,3]],"date-time":"2020-09-03T13:48:44Z","timestamp":1599140924000},"page":"366-376","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Voice-Activity and Overlapped Speech Detection Using x-Vectors"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5864-0196","authenticated-orcid":false,"given":"Ji\u0159\u00ed","family":"M\u00e1lek","sequence":"first","affiliation":[]},{"given":"Jind\u0159ich","family":"\u017d\u010f\u00e1nsk\u00fd","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,1]]},"reference":[{"key":"40_CR1","unstructured":"DCASE 2018 challenge. http:\/\/dcase.community\/challenge2018\/index. Accessed 27 Mar 2020"},{"key":"40_CR2","doi-asserted-by":"crossref","unstructured":"Bhattacharya, G., Alam, J., Stafylakis, T., Kenny, P.: Deep neural network based text-dependent speaker recognition: preliminary results. In: Proceedings of the Odyssey, pp. 2\u201315 (2016)","DOI":"10.21437\/Odyssey.2016-2"},{"issue":"1","key":"40_CR3","first-page":"1","volume":"20","author":"D Cai","year":"2007","unstructured":"Cai, D., He, X., Han, J.: SRDA: an efficient algorithm for large-scale discriminant analysis. IEEE Trans. Knowl. Data Eng. 20(1), 1\u201312 (2007)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"4","key":"40_CR4","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2010","unstructured":"Dehak, N., Kenny, P.J., Dehak, R., Dumouchel, P., Ouellet, P.: Front-end factor analysis for speaker verification. IEEE Trans. Audio Speech Lang. Process. 19(4), 788\u2013798 (2010)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"40_CR5","doi-asserted-by":"crossref","unstructured":"Doddipatla, R., Braunschweiler, N., Maia, R.: Speaker adaptation in DNN-based speech synthesis using d-vectors. In: INTERSPEECH, pp. 3404\u20133408 (2017)","DOI":"10.21437\/Interspeech.2017-1038"},{"key":"40_CR6","doi-asserted-by":"crossref","unstructured":"Fu, R., Tao, J., Wen, Z., Zheng, Y.: Phoneme dependent speaker embedding and model factorization for multi-speaker speech synthesis and adaptation. In: ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6930\u20136934. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8682535"},{"key":"40_CR7","doi-asserted-by":"crossref","unstructured":"Garcia-Romero, D., Snyder, D., Sell, G., Povey, D., McCree, A.: Speaker diarization using deep neural network embeddings. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4930\u20134934. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7953094"},{"key":"40_CR8","unstructured":"Garofolo, J.S., et al.: TIMIT acoustic-phonetic continuous speech corpus. Linguist. Data Consortium 10(5) (1993)"},{"key":"40_CR9","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning. MIT Press, Cambridge (2016). http:\/\/www.deeplearningbook.org"},{"key":"40_CR10","unstructured":"Habets, E.A.: Room impulse response generator. Technische Universiteit Eindhoven, Technical report, vol. 2(2.4), p. 1 (2006)"},{"key":"40_CR11","doi-asserted-by":"crossref","unstructured":"Heigold, G., Moreno, I., Bengio, S., Shazeer, N.: End-to-end text-dependent speaker verification. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5115\u20135119. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472652"},{"key":"40_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"531","DOI":"10.1007\/11744085_41","volume-title":"Computer Vision \u2013 ECCV 2006","author":"S Ioffe","year":"2006","unstructured":"Ioffe, S.: Probabilistic linear discriminant analysis. In: Leonardis, A., Bischof, H., Pinz, A. (eds.) ECCV 2006. LNCS, vol. 3954, pp. 531\u2013542. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11744085_41"},{"key":"40_CR13","doi-asserted-by":"crossref","unstructured":"Jansk\u00fd, J., M\u00e1lek, J., \u010cmejla, J., Kounovsk\u00fd, T., Koldovsk\u00fd, Z., \u017dd\u2019\u00e1nsk\u00fd, J.: Adaptive blind audio source extraction supervised by dominant speaker identification using x-vectors. In: ICASSP 2020\u20132020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 676\u2013680 (2020)","DOI":"10.1109\/ICASSP40776.2020.9054693"},{"key":"40_CR14","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1007\/978-3-030-26061-3_26","volume-title":"Speech and Computer","author":"M Kune\u0161ov\u00e1","year":"2019","unstructured":"Kune\u0161ov\u00e1, M., Hr\u00faz, M., Zaj\u00edc, Z., Radov\u00e1, V.: Detection of overlapping speech for the purposes of speaker diarization. In: Salah, A.A., Karpov, A., Potapova, R. (eds.) SPECOM 2019. LNCS (LNAI), vol. 11658, pp. 247\u2013257. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-26061-3_26"},{"key":"40_CR15","volume-title":"Finite Mixture Models","author":"GJ McLachlan","year":"2004","unstructured":"McLachlan, G.J., Peel, D.: Finite Mixture Models. Wiley, Hoboken (2004)"},{"key":"40_CR16","doi-asserted-by":"crossref","unstructured":"Nagrani, A., Chung, J.S., Zisserman, A.: Voxceleb: a large-scale speaker identification dataset. arXiv preprint arXiv:1706.08612 (2017)","DOI":"10.21437\/Interspeech.2017-950"},{"key":"40_CR17","doi-asserted-by":"crossref","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: Librispeech: an ASR corpus based on public domain audio books. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5206\u20135210. IEEE (2015)","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"40_CR18","doi-asserted-by":"crossref","unstructured":"Peddinti, V., Povey, D., Khudanpur, S.: A time delay neural network architecture for efficient modeling of long temporal contexts. In: Sixteenth Annual Conference of the ISCA (2015)","DOI":"10.21437\/Interspeech.2015-647"},{"key":"40_CR19","doi-asserted-by":"crossref","unstructured":"Raj, D., Snyder, D., Povey, D., Khudanpur, S.: Probing the information encoded in x-vectors. arXiv preprint arXiv:1909.06351 (2019)","DOI":"10.1109\/ASRU46091.2019.9003979"},{"issue":"1\u20133","key":"40_CR20","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"DA Reynolds","year":"2000","unstructured":"Reynolds, D.A., Quatieri, T.F., Dunn, R.B.: Speaker verification using adapted gaussian mixture models. Digit. Signal Process. 10(1\u20133), 19\u201341 (2000)","journal-title":"Digit. Signal Process."},{"key":"40_CR21","doi-asserted-by":"crossref","unstructured":"Sajjan, N., Ganesh, S., Sharma, N., Ganapathy, S., Ryant, N.: Leveraging LSTM models for overlap detection in multi-party meetings. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5249\u20135253. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8462548"},{"key":"40_CR22","doi-asserted-by":"crossref","unstructured":"Shokouhi, N., Sathyanarayana, A., Sadjadi, S.O., Hansen, J.H.: Overlapped-speech detection with applications to driver assessment for in-vehicle active safety systems. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 2834\u20132838. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6638174"},{"key":"40_CR23","doi-asserted-by":"crossref","unstructured":"Snyder, D., Garcia-Romero, D., Sell, G., Povey, D., Khudanpur, S.: X-vectors: robust DNN embeddings for speaker recognition. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5329\u20135333. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"40_CR24","doi-asserted-by":"crossref","unstructured":"Variani, E., Lei, X., McDermott, E., Moreno, I.L., Gonzalez-Dominguez, J.: Deep neural networks for small footprint text-dependent speaker verification. In: 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4052\u20134056. IEEE (2014)","DOI":"10.1109\/ICASSP.2014.6854363"},{"key":"40_CR25","unstructured":"Vincent, E., Watanabe, S., Nugraha, A.A., Barker, J., Marxer, R.: The 4th CHiME speech separation and recognition challenge. http:\/\/spandh.dcs.shef.ac.uk\/chime_challenge\/chime2016\/. Accessed 27 Mar 2020"},{"key":"40_CR26","doi-asserted-by":"crossref","unstructured":"Wang, S., Qian, Y., Yu, K.: What does the speaker embedding encode? In: Interspeech, pp. 1497\u20131501 (2017)","DOI":"10.21437\/Interspeech.2017-1125"},{"key":"40_CR27","doi-asserted-by":"crossref","unstructured":"Wu, Z., Swietojanski, P., Veaux, C., Renals, S., King, S.: A study of speaker adaptation for DNN-based speech synthesis. In: Sixteenth Annual Conference of the International Speech Communication Association (2015)","DOI":"10.21437\/Interspeech.2015-270"},{"issue":"12","key":"40_CR28","doi-asserted-by":"publisher","first-page":"1688","DOI":"10.1109\/TASLP.2014.2346315","volume":"22","author":"SH Yella","year":"2014","unstructured":"Yella, S.H., Bourlard, H.: Overlapping speech detection using long-term conversational features for speaker diarization in meeting room conversations. IEEE\/ACM Trans. Audio Speech Lang. Process. 22(12), 1688\u20131700 (2014)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"40_CR29","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Li, J., Zhang, S., Chen, L., Gong, Y.: Domain and speaker adaptation for cortana speech recognition. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5984\u20135988. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8461553"},{"key":"40_CR30","doi-asserted-by":"crossref","unstructured":"Zmolikova, K., Delcroix, M., Kinoshita, K., Higuchi, T., Ogawa, A., Nakatani, T.: Speaker-aware neural network based beamformer for speaker extraction in speech mixtures. In: Interspeech, pp. 2655\u20132659 (2017)","DOI":"10.21437\/Interspeech.2017-667"},{"issue":"4","key":"40_CR31","doi-asserted-by":"publisher","first-page":"800","DOI":"10.1109\/JSTSP.2019.2922820","volume":"13","author":"K Zmolikova","year":"2019","unstructured":"Zmolikova, K., et al.: Speakerbeam: speaker aware neural network for target speaker extraction in speech mixtures. IEEE J. Sel. Topics Signal Process. 13(4), 800\u2013814 (2019)","journal-title":"IEEE J. Sel. Topics Signal Process."}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58323-1_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T15:47:35Z","timestamp":1709826455000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58323-1_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030583224","9783030583231"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58323-1_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"1 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TSD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Text, Speech, and Dialogue","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brno","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 September 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tsd2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.tsdconference.org\/tsd2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"In-house","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"110","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"54","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"49% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}