{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T12:07:59Z","timestamp":1771330079058,"version":"3.50.1"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032025470","type":"print"},{"value":"9783032025487","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:00:00Z","timestamp":1755820800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:00:00Z","timestamp":1755820800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-02548-7_7","type":"book-chapter","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T05:39:23Z","timestamp":1755754763000},"page":"72-83","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["TOSD-Net: A CNN-Transformer Architecture for\u00a0Robust Frame-Level Overlapping Speech Detection in\u00a0Diverse Acoustic Conditions"],"prefix":"10.1007","author":[{"given":"Yassin","family":"Terraf","sequence":"first","affiliation":[]},{"given":"Youssef","family":"Iraqi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,22]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Andrei, V., Cucu, H., Burileanu, C.: Detecting overlapped speech on short timeframes using deep learning. In: Interspeech, pp. 1198\u20131202 (2017)","DOI":"10.21437\/Interspeech.2017-188"},{"key":"7_CR2","doi-asserted-by":"crossref","unstructured":"Bullock, L., Bredin, H., Garcia-Perera, L.P.: Overlap-aware diarization: resegmentation using neural end-to-end overlapped speech detection. In: ICASSP 2020. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9053096"},{"key":"7_CR3","doi-asserted-by":"crossref","unstructured":"Charlet, D., et\u00a0al., B.: Impact of overlapping speech detection on speaker diarization for broadcast news and debates. In: ICASSP. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6639163"},{"key":"7_CR4","doi-asserted-by":"publisher","unstructured":"Cooke, M., Barker, J., Cunningham, S., Shao, X.: The grid audio-visual speech corpus (2020). https:\/\/doi.org\/10.5281\/zenodo.3625687","DOI":"10.5281\/zenodo.3625687"},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"Cornell, S., Omologo, M., Squartini, S., Vincent, E.: Overlapped speech detection and speaker counting using distant microphone arrays. Comput. Speech Lang. (2022)","DOI":"10.1016\/j.csl.2021.101306"},{"key":"7_CR6","doi-asserted-by":"crossref","unstructured":"Geiger, J.T., Eyben, F., Schuller, B., Rigoll, G.: Detecting overlapping speech with long short-term memory recurrent neural networks. In: Proceedings Interspeech 2013, 14th Annual Conference of the International Speech Communication Association, Lyon, France (2013)","DOI":"10.21437\/Interspeech.2013-27"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Liu, M., Raj, A.N.J., Rajangam, V., Ma, K., Zhuang, Z., Zhuang, S.: Multiscale-multichannel feature extraction and classification through one-dimensional convolutional neural network for speech emotion recognition. Speech Commun. (2024)","DOI":"10.1016\/j.specom.2023.103010"},{"key":"7_CR8","doi-asserted-by":"publisher","unstructured":"Livingstone, S.R., Russo, F.A.: The ryerson audio-visual database of emotional speech and song (RAVDESS) (2018). https:\/\/doi.org\/10.5281\/zenodo.1188976","DOI":"10.5281\/zenodo.1188976"},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"Mariotte, T., Larcher, e.a.: Channel-combination algorithms for robust distant voice activity and overlapped speech detection. IEEE\/ACM Trans. Audio Speech Lang. Process. (2024)","DOI":"10.1109\/TASLP.2024.3369531"},{"key":"7_CR10","volume-title":"Computer-Intensive Methods for Testing Hypotheses","author":"EW Noreen","year":"1989","unstructured":"Noreen, E.W.: Computer-Intensive Methods for Testing Hypotheses. Wiley, New York (1989)"},{"key":"7_CR11","doi-asserted-by":"crossref","unstructured":"Otterson, S., al.: Efficient use of overlap information in speaker diarization. In: 2007 IEEE Workshop on Automatic Speech Recognition & Understanding. IEEE (2007)","DOI":"10.1109\/ASRU.2007.4430194"},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Sajjan, N., Ganesh, S., Sharma, N., Ganapathy, S., Ryant, N.: Leveraging LSTM models for overlap detection in multi-party meetings. In: ICASSP. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8462548"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Scheibler, R., Bezzam, E., Dokmani\u0107, I.: Pyroomacoustics: a python package for audio room simulation and array processing algorithms. In: ICASSP. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8461310"},{"key":"7_CR14","unstructured":"Snyder, D., Chen, G., Povey, D.: MUSAN: a music, speech, and noise corpus. arXiv preprint arXiv:1510.08484 (2015)"},{"key":"7_CR15","doi-asserted-by":"crossref","unstructured":"St ter, F.R., al.: CountNet: estimating the number of concurrent speakers using supervised learning. IEEE\/ACM Trans. Audio Speech Lang. Process. (2019)","DOI":"10.1109\/TASLP.2018.2877892"},{"key":"7_CR16","doi-asserted-by":"crossref","unstructured":"Terraf, Y., Iraqi, Y.: BiConNet: a hybrid CNN-BiLSTM architecture for robust overlapping speech detection in diverse acoustic environments. Authorea Preprints (2024)","DOI":"10.36227\/techrxiv.170840678.83828462\/v1"},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Terraf, Y., Iraqi, Y.: Robust feature extraction using temporal context averaging for speaker identification in diverse acoustic environments. IEEE Access (2024)","DOI":"10.1109\/ACCESS.2024.3356730"},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Tran, V.T., Tsai, W.H.: Speaker identification in multi-talker overlapping speech using neural networks. IEEE Access (2020)","DOI":"10.1109\/ACCESS.2020.3009987"},{"key":"7_CR19","doi-asserted-by":"crossref","unstructured":"Tripathi, A., Lu, H., Sak, H.: End-to-end multi-talker overlapping speech recognition. In: ICASSP 2020-2020 Proceedings. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9054328"},{"key":"7_CR20","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. (2017)"},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Verma, V., Khanna, N.: Speaker-independent source cell-phone identification for re-compressed and noisy audio recordings. Multimed. Tools Appl. (2021)","DOI":"10.1007\/s11042-020-10205-z"},{"key":"7_CR22","doi-asserted-by":"crossref","unstructured":"Vipperla, R., Geiger, J.T., et\u00a0al., B.: Speech overlap detection and attribution using convolutive non-negative sparse coding. In: ICASSP. IEEE (2012)","DOI":"10.1109\/ICASSP.2012.6288840"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Wang, N., Ching, P., Zheng, N., Lee, T.: Robust speaker recognition using denoised vocal source and vocal tract features. IEEE Trans. Audio Speech Lang. Process. (2010)","DOI":"10.1109\/TASL.2010.2047420"},{"key":"7_CR24","doi-asserted-by":"crossref","unstructured":"Xiao, X., Kanda, N., Chen, Z., et\u00a0al., Z.: Microsoft speaker diarization system for the voxceleb speaker recognition challenge 2020. In: ICASSP (2021)","DOI":"10.1109\/ICASSP39728.2021.9413832"},{"key":"7_CR25","doi-asserted-by":"crossref","unstructured":"Yousefi, M., Hansen, J.H.: Block-based high performance CNN architectures for frame-level overlapping speech detection. IEEE\/ACM Trans. Audio Speech Lang. Process. 29 (2020)","DOI":"10.1109\/TASLP.2020.3036237"}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-02548-7_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T05:39:31Z","timestamp":1755754771000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-02548-7_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,22]]},"ISBN":["9783032025470","9783032025487"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-02548-7_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,22]]},"assertion":[{"value":"22 August 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TSD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Text, Speech, and Dialogue","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Erlangen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tsd2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.kiv.zcu.cz\/tsd2025\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}