{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T18:40:07Z","timestamp":1750531207129,"version":"3.41.0"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031961953","type":"print"},{"value":"9783031961960","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-96196-0_14","type":"book-chapter","created":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T18:13:27Z","timestamp":1750529607000},"page":"181-194","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["FusionNet: Leveraging Dual Speech Separation Networks for\u00a0Enhanced Multi-speaker Isolation"],"prefix":"10.1007","author":[{"given":"Sumedh","family":"Ravindran","sequence":"first","affiliation":[]},{"given":"Shreyas","family":"Mallesh","sequence":"additional","affiliation":[]},{"given":"Raghavendra S.","family":"Bhatagunaki","sequence":"additional","affiliation":[]},{"given":"Srikrishna R.","family":"Chitnis","sequence":"additional","affiliation":[]},{"given":"Shylaja S.","family":"Sharath","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,22]]},"reference":[{"key":"14_CR1","doi-asserted-by":"crossref","unstructured":"Du, J., et al.: A regression approach to single-channel speech separation via high-resolution deep neural networks. IEEE\/ACM Trans. Audio Speech Lang. Process. 24(8), 1424\u20131437 (2016)","DOI":"10.1109\/TASLP.2016.2558822"},{"key":"14_CR2","doi-asserted-by":"crossref","unstructured":"Subakan, C., et al.: Attention is all you need in speech separation. In: ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE (2021)","DOI":"10.1109\/ICASSP39728.2021.9413901"},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Chen, Z., et al.: Continuous speech separation: dataset and analysis. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9053426"},{"issue":"8","key":"14_CR4","doi-asserted-by":"publisher","first-page":"1256","DOI":"10.1109\/TASLP.2019.2915167","volume":"27","author":"Y Luo","year":"2019","unstructured":"Luo, Y., Mesgarani, N.: Conv-tasnet: surpassing ideal time\u2013frequency magnitude masking for speech separation. IEEE\/ACM Trans. Audio Speech Lang. Process. 27(8), 1256\u20131266 (2019)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"14_CR5","doi-asserted-by":"crossref","unstructured":"Hershey, J.R., et al.: Deep clustering: Discriminative embeddings for segmentation and separation. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7471631"},{"key":"14_CR6","doi-asserted-by":"crossref","unstructured":"Luo, Y., Chen, Z., Yoshioka, T.: Dual-path rnn: efficient long sequence modeling for time-domain single-channel speech separation. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9054266"},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"Wang, D., Zhuo, C., Takuya, Y.: Neural speech separation using spatially distributed microphones. arXiv preprint arXiv:2004.13670 (2020)","DOI":"10.21437\/Interspeech.2020-1089"},{"key":"14_CR8","doi-asserted-by":"crossref","unstructured":"Kolb\u00e6k, M., et al.: Multitalker speech separation with utterance-level permutation invariant training of deep recurrent neural networks. IEEE\/ACM Trans. Audio Speech Lang. Process. 25(10), 1901\u20131913 (2017)","DOI":"10.1109\/TASLP.2017.2726762"},{"key":"14_CR9","doi-asserted-by":"crossref","unstructured":"Nettam, P., et al.: Human speech extraction from composite audio signal in real-time using deep neural network. In: 2024 11th International Conference on Signal Processing and Integrated Networks (SPIN), IEEE (2024)","DOI":"10.1109\/SPIN60856.2024.10511834"},{"key":"14_CR10","unstructured":"Vaswani, A.: Attention is all you need. In: Advances in Neural Information Processing Systems (2017)"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Veluri, B., et al.: Real-time target sound extraction. In: ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE (2023)","DOI":"10.1109\/ICASSP49357.2023.10094573"},{"issue":"6","key":"14_CR12","doi-asserted-by":"publisher","first-page":"1336","DOI":"10.1109\/TKDE.2012.51","volume":"25","author":"Y-X Wang","year":"2012","unstructured":"Wang, Y.-X., Zhang, Y.-J.: Nonnegative matrix factorization: a comprehensive review. IEEE Trans. Knowl. Data Eng. 25(6), 1336\u20131353 (2012)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"14_CR13","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-2851-4","volume-title":"Independent component analysis","author":"T-W Lee","year":"1998","unstructured":"Lee, T.-W., Lee, T.-W.: Independent component analysis. Springer, US (1998)"},{"issue":"7","key":"14_CR14","doi-asserted-by":"publisher","first-page":"1830","DOI":"10.1109\/TSP.2004.828896","volume":"52","author":"O Yilmaz","year":"2004","unstructured":"Yilmaz, O., Rickard, S.: Blind separation of speech mixtures via time-frequency masking. IEEE Trans. Signal Process. 52(7), 1830\u20131847 (2004)","journal-title":"IEEE Trans. Signal Process."},{"key":"14_CR15","unstructured":"Martin, R.: Spectral subtraction based on minimum statistics. Power 6(8), 1182\u20131185 (1994)"},{"key":"14_CR16","first-page":"1963","volume":"7","author":"FR Bach","year":"2006","unstructured":"Bach, F.R., Jordan, M.I.: Learning spectral clustering, with application to speech separation. J. Mach. Learn. Res. 7, 1963\u20132001 (2006)","journal-title":"J. Mach. Learn. Res."},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Chen, J., et al.: New insights into the noise reduction wiener filter. IEEE Trans. Audio, Speech Lang. Process. 14(4), 1218\u20131234 (2006)","DOI":"10.1109\/TSA.2005.860851"},{"issue":"7","key":"14_CR18","doi-asserted-by":"publisher","first-page":"636","DOI":"10.1109\/T-C.1972.223567","volume":"100","author":"WK Pratt","year":"1972","unstructured":"Pratt, W.K.: Generalized Wiener filtering computation techniques. IEEE Trans. Comput. 100(7), 636\u2013641 (1972)","journal-title":"IEEE Trans. Comput."},{"key":"14_CR19","doi-asserted-by":"crossref","unstructured":"Yu, D., et al.: Permutation invariant training of deep models for speaker-independent multi-talker speech separation. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7952154"},{"key":"14_CR20","doi-asserted-by":"crossref","unstructured":"Huang, P.-S., et al.: Deep learning for monaural speech separation. In: 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE (2014)","DOI":"10.1109\/ICASSP.2014.6853860"},{"key":"14_CR21","doi-asserted-by":"crossref","unstructured":"Luo, Y., Nima, M.: Tasnet: time-domain audio separation network for real-time, single-channel speech separation. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8462116"},{"key":"14_CR22","unstructured":"Lei, B., Jimmy, J.R.K., Geoffrey, E.H.: Layer normalization. ArXiv e-prints (2016): arXiv-1607"},{"key":"14_CR23","doi-asserted-by":"publisher","first-page":"2840","DOI":"10.1109\/TASLP.2021.3099291","volume":"29","author":"N Zeghidour","year":"2021","unstructured":"Zeghidour, N., Grangier, D.: Wavesplit: end-to-end speech separation by speaker clustering. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 2840\u20132849 (2021)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"14_CR24","doi-asserted-by":"crossref","unstructured":"Chen, S., et al.: Continuous speech separation with conformer. In: ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE (2021)","DOI":"10.1109\/ICASSP39728.2021.9413423"},{"key":"14_CR25","doi-asserted-by":"crossref","unstructured":"Luo, J., et al.: Tiny-sepformer: a tiny time-domain transformer network for speech separation. arXiv preprint arXiv:2206.13689 (2022)","DOI":"10.21437\/Interspeech.2022-66"},{"key":"14_CR26","doi-asserted-by":"crossref","unstructured":"Salamon, J., et al.: Scaper: a library for soundscape synthesis and augmentation. In: 2017 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), IEEE (2017)","DOI":"10.1109\/WASPAA.2017.8170052"},{"key":"14_CR27","unstructured":"Della Libera, L., et al.: Resource-efficient separation transformer. arXiv preprint arXiv:2206.09507 (2022)"},{"key":"14_CR28","unstructured":"Ravanelli, M., et al.: SpeechBrain: a general-purpose speech toolkit. arXiv preprint arXiv:2106.04624 (2021)"},{"issue":"4-5","key":"14_CR29","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1016\/0925-2312(93)90006-O","volume":"5","author":"S Amari","year":"1993","unstructured":"Amari, S.: Backpropagation and stochastic gradient descent method. Neurocomputing 5(4\u20135), 185\u2013196 (1993)","journal-title":"Neurocomputing"},{"key":"14_CR30","doi-asserted-by":"crossref","unstructured":"Tzinis, E., et al.: Heterogeneous target speech separation. arXiv preprint arXiv:2204.03594 (2022)","DOI":"10.21437\/Interspeech.2022-10717"},{"key":"14_CR31","unstructured":"Demirsahin, I., Kjartansson, O., Gutkin, A., Rivera, C.: Open-source multi-speaker corpora of the English accents in the British isles. In: Proceedings of the Twelfth Language Resources and Evaluation Conference, pp. 6532\u20136541 (2020)"}],"container-title":["Communications in Computer and Information Science","Engineering Applications of Neural Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-96196-0_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T18:13:35Z","timestamp":1750529615000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-96196-0_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031961953","9783031961960"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-96196-0_14","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"22 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"EANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Engineering Applications of Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Limassol","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Cyprus","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 June 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eann2025a","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eannconf.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}