{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T00:54:37Z","timestamp":1743123277196,"version":"3.40.3"},"publisher-location":"Cham","reference-count":17,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031160776"},{"type":"electronic","value":"9783031160783"}],"license":[{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-16078-3_38","type":"book-chapter","created":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:16:25Z","timestamp":1661991385000},"page":"557-567","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["RAVSSNet: Recurrent Audio Visual Speech Separation"],"prefix":"10.1007","author":[{"given":"M.","family":"Chandan Shankar","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hemanth","family":"Nag","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shikha","family":"Tripathi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,9,1]]},"reference":[{"key":"38_CR1","doi-asserted-by":"publisher","unstructured":"Comon, P.: Independent component analysis, a new concept? Signal Process. 36(3), 287\u2013314 (1994). https:\/\/doi.org\/10.1016\/0165-1684(94)90029-9. ISSN 0165\u20131684","DOI":"10.1016\/0165-1684(94)90029-9"},{"key":"38_CR2","doi-asserted-by":"publisher","unstructured":"Ellis, D.: Computational auditory scene analysis exploiting speech-recognition knowledge. In: Proceedings of 1997 Workshop on Applications of Signal Processing to Audio and Acoustics, p. 4 (1997). https:\/\/doi.org\/10.1109\/ASPAA.1997.625625","DOI":"10.1109\/ASPAA.1997.625625"},{"key":"38_CR3","doi-asserted-by":"publisher","unstructured":"Akarsh, K.A., Selvi, R.S.: Speech enhancement using non negative matrix factorization and enhanced NMF. In: 2015 International Conference on Circuits, Power and Computing Technologies [ICCPCT-2015], 2015, pp. 1\u20137 (2015). https:\/\/doi.org\/10.1109\/ICCPCT.2015.7159386","DOI":"10.1109\/ICCPCT.2015.7159386"},{"key":"38_CR4","doi-asserted-by":"crossref","unstructured":"Schmidt, M.N., Olsson, R.K.: Single-channel speech separation using sparse non-negative matrix factorization. In: INTERSPEECH-2006, paper 1652-ThuFop. 10 (2006)","DOI":"10.21437\/Interspeech.2006-655"},{"key":"38_CR5","doi-asserted-by":"publisher","unstructured":"Yu, D., Kolb\u00e6k, M., Tan, Z., Jensen, J.: Permutation invariant training of deep models for speaker-independent multi-talker speech separation. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), New Orleans, LA, USA, pp. 241\u2013245 (2017). https:\/\/doi.org\/10.1109\/ICASSP.2017.7952154","DOI":"10.1109\/ICASSP.2017.7952154"},{"key":"38_CR6","doi-asserted-by":"publisher","unstructured":"Hershey, J.R., Chen, Z., Le Roux, J., Watanabe, S.: Deep clustering: discriminative embeddings for segmentation and separation. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Shanghai, China, pp. 31\u201335 (2016). https:\/\/doi.org\/10.1109\/ICASSP.2016.7471631","DOI":"10.1109\/ICASSP.2016.7471631"},{"key":"38_CR7","doi-asserted-by":"publisher","unstructured":"Ephrat, A., et al.: Looking to listen at the cocktail party: a speaker-independent audio-visual model for speech separation. ACM Trans. Graph. 37(4), 11, Article 112 (2018). https:\/\/doi.org\/10.1145\/3197517.3201357","DOI":"10.1145\/3197517.3201357"},{"key":"38_CR8","doi-asserted-by":"publisher","unstructured":"Luo, Y., Mesgarani, N.: TaSNet: time-domain audio separation network for real-time, single-channel speech separation. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing(ICASSP), Calgary, AB, Canada, 2018, pp. 696\u2013700. https:\/\/doi.org\/10.1109\/ICASSP.2018.8462116","DOI":"10.1109\/ICASSP.2018.8462116"},{"issue":"8","key":"38_CR9","doi-asserted-by":"publisher","first-page":"1256","DOI":"10.1109\/TASLP.2019.2915167","volume":"27","author":"Y Luo","year":"2019","unstructured":"Luo, Y., Mesgarani, N.: Conv-TasNet: surpassing ideal time-frequency magnitude masking for speech separation. IEEE\/ACM Trans. Audio, Speech, Lang. Process. 27(8), 1256\u20131266 (2019). https:\/\/doi.org\/10.1109\/TASLP.2019.2915167","journal-title":"IEEE\/ACM Trans. Audio, Speech, Lang. Process."},{"key":"38_CR10","doi-asserted-by":"crossref","unstructured":"Luo, Y., Chen, Z., Yoshioka, T.: Dual-path RNN: efficient long sequence modeling for time-domain single-channel speech separation. arXiv preprint arXiv:1910.06379 (2019)","DOI":"10.1109\/ICASSP40776.2020.9054266"},{"issue":"4","key":"38_CR11","doi-asserted-by":"publisher","first-page":"1462","DOI":"10.1109\/TSA.2005.858005","volume":"14","author":"E Vincent","year":"2006","unstructured":"Vincent, E., Gribonval, R., Fevotte, C.: Performance measurement in blind audio source separation. IEEE Trans. Audio, Speech, Lang. Process. 14(4), 1462\u20131469 (2006). https:\/\/doi.org\/10.1109\/TSA.2005.858005","journal-title":"IEEE Trans. Audio, Speech, Lang. Process."},{"issue":"3","key":"38_CR12","doi-asserted-by":"publisher","first-page":"542","DOI":"10.1109\/JSTSP.2020.2987209","volume":"14","author":"K Tan","year":"2020","unstructured":"Tan, K., Xu, Y., Zhang, S., Yu, M., Yu, D.: Audio-visual speech separation and dereverberation with a two-stage multimodal network. IEEE J. Sel. Topics Signal Process. 14(3), 542\u2013553 (2020). https:\/\/doi.org\/10.1109\/JSTSP.2020.2987209","journal-title":"IEEE J. Sel. Topics Signal Process."},{"key":"38_CR13","doi-asserted-by":"crossref","unstructured":"Gao, R., Grauman, K.: VisualVoice: audio-visual speech separation with cross-modal consistency, January 2021. arXiv:2101.03149 [cs.CV]","DOI":"10.1109\/CVPR46437.2021.01524"},{"key":"38_CR14","doi-asserted-by":"crossref","unstructured":"Wu, J., et al.: Time domain audio visual speech separation. In: Proceedings of IEEE Automation Speech Recognition Understanding Workshop, pp. 667\u2013673 (2019)","DOI":"10.1109\/ASRU46091.2019.9003983"},{"key":"38_CR15","doi-asserted-by":"publisher","first-page":"3540","DOI":"10.21437\/Interspeech.2021-1560","volume":"2021","author":"R Rigal","year":"2021","unstructured":"Rigal, R., Chodorowski, J., Zerr, B.: Deep audio-visual speech separation based on facial motion. Proc. Interspeech 2021, 3540\u20133544 (2021). https:\/\/doi.org\/10.21437\/Interspeech.2021-1560","journal-title":"Proc. Interspeech"},{"key":"38_CR16","unstructured":"Assael, Y.M., Shillingford, B., Whiteson, S., de Freitas, N.: LipNet: End-to-End Sentence-level Lipreading (2016). arXiv:1611.01599 [cs.LG]"},{"key":"38_CR17","doi-asserted-by":"crossref","unstructured":"Le Roux, J., Wisdom, S., Erdogan, H., Hershey, J.R.: SDR - half-baked or well done? arXiv:1811.02508 [cs.SD] (2018)","DOI":"10.1109\/ICASSP.2019.8683855"}],"container-title":["Lecture Notes in Networks and Systems","Intelligent Systems and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-16078-3_38","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:28:33Z","timestamp":1661992113000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-16078-3_38"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,1]]},"ISBN":["9783031160776","9783031160783"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-16078-3_38","relation":{},"ISSN":["2367-3370","2367-3389"],"issn-type":[{"type":"print","value":"2367-3370"},{"type":"electronic","value":"2367-3389"}],"subject":[],"published":{"date-parts":[[2022,9,1]]},"assertion":[{"value":"1 September 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IntelliSys","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Proceedings of SAI Intelligent Systems Conference","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Amsterdam","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"intellisys2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/saiconference.com\/IntelliSys","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}