{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,2]],"date-time":"2026-07-02T05:50:56Z","timestamp":1782971456475,"version":"3.54.5"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030377304","type":"print"},{"value":"9783030377311","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,12,24]],"date-time":"2019-12-24T00:00:00Z","timestamp":1577145600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-37731-1_53","type":"book-chapter","created":{"date-parts":[[2019,12,27]],"date-time":"2019-12-27T06:02:51Z","timestamp":1577426571000},"page":"653-665","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":45,"title":["FurcaNeXt: End-to-End Monaural Speech Separation with Dynamic Gated Dilated Temporal Convolutional Networks"],"prefix":"10.1007","author":[{"given":"Liwen","family":"Zhang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ziqiang","family":"Shi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiqing","family":"Han","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Anyan","family":"Shi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ding","family":"Ma","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2019,12,24]]},"reference":[{"key":"53_CR1","doi-asserted-by":"publisher","unstructured":"Assmann, P., Summerfield, Q.: The perception of speech under adverse conditions. In: Speech Processing in the Auditory System, pp. 231\u2013308. Springer, New York (2004). https:\/\/doi.org\/10.1007\/0-387-21575-1_5","DOI":"10.1007\/0-387-21575-1_5"},{"key":"53_CR2","unstructured":"Bai, S., Kolter, J.Z., Koltun, V.: An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. arXiv preprint arXiv:1803.01271 (2018)"},{"key":"53_CR3","doi-asserted-by":"crossref","unstructured":"Chen, Z., Luo, Y., Mesgarani, N.: Deep attractor network for single-microphone speaker separation. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 246\u2013250. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7952155"},{"key":"53_CR4","unstructured":"Dauphin, Y.N., Fan, A., Auli, M., Grangier, D.: Language modeling with gated convolutional networks. In: International Conference on Machine Learning, pp. 933\u2013941 (2016)"},{"key":"53_CR5","unstructured":"F\u00e9votte, C., Gribonval, R., Vincent, E.: Bss$$\\_$$eval toolbox user guide-revision 2.0 (2005)"},{"key":"53_CR6","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Delving deep into rectifiers: surpassing human-level performance on imagenet classification. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1026\u20131034 (2015)","DOI":"10.1109\/ICCV.2015.123"},{"key":"53_CR7","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"53_CR8","doi-asserted-by":"crossref","unstructured":"Hershey, J.R., Chen, Z., Le Roux, J., Watanabe, S.: Deep clustering: discriminative embeddings for segmentation and separation. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 31\u201335. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7471631"},{"key":"53_CR9","unstructured":"Howard, A.G., et al.: Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)"},{"issue":"1","key":"53_CR10","doi-asserted-by":"publisher","first-page":"122","DOI":"10.1109\/TASL.2012.2215591","volume":"21","author":"K Hu","year":"2013","unstructured":"Hu, K., Wang, D.: An unsupervised approach to cochannel speech separation. IEEE Trans. Audio, Speech, Language Processing 21(1), 122\u2013131 (2013)","journal-title":"IEEE Trans. Audio, Speech, Language Processing"},{"key":"53_CR11","doi-asserted-by":"crossref","unstructured":"Isik, Y., Roux, J.L., Chen, Z., Watanabe, S., Hershey, J.R.: Single-channel multi-speaker separation using deep clustering. arXiv preprint arXiv:1607.02173 (2016)","DOI":"10.21437\/Interspeech.2016-1176"},{"issue":"10","key":"53_CR12","doi-asserted-by":"publisher","first-page":"1901","DOI":"10.1109\/TASLP.2017.2726762","volume":"25","author":"M Kolb\u00e6k","year":"2017","unstructured":"Kolb\u00e6k, M., et al.: Multitalker speech separation with utterance-level permutation invariant training of deep recurrent neural networks. IEEE\/ACM Trans. Audio Speech Lang. Process. (TASLP) 25(10), 1901\u20131913 (2017)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process. (TASLP)"},{"key":"53_CR13","unstructured":"Le Roux, J., Weninger, F.J., Hershey, J.R.: Sparse nmf half-baked or well done? Mitsubishi Electric Research Labs (MERL), Cambridge, MA, USA, Technical Report, no. TR2015-023 (2015)"},{"key":"53_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1007\/978-3-319-49409-8_7","volume-title":"Computer Vision \u2013 ECCV 2016 Workshops","author":"C Lea","year":"2016","unstructured":"Lea, C., Vidal, R., Reiter, A., Hager, G.D.: Temporal convolutional networks: a unified approach to action segmentation. In: Hua, G., J\u00e9gou, H. (eds.) ECCV 2016. LNCS, vol. 9915, pp. 47\u201354. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-49409-8_7"},{"key":"53_CR15","doi-asserted-by":"crossref","unstructured":"Li, C., Zhu, L., Xu, S., Gao, P., Xu, B.: CBLDNN-based speaker-independent speech separation via generative adversarial training (2018)","DOI":"10.1109\/ICASSP.2018.8462505"},{"issue":"4","key":"53_CR16","doi-asserted-by":"publisher","first-page":"787","DOI":"10.1109\/TASLP.2018.2795749","volume":"26","author":"Y Luo","year":"2018","unstructured":"Luo, Y., Chen, Z., Mesgarani, N.: Speaker-independent speech separation with deep attractor network. IEEE\/ACM Trans. Audio Speech Lang. Process. 26(4), 787\u2013796 (2018)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"53_CR17","doi-asserted-by":"crossref","unstructured":"Luo, Y., Mesgarani, N.: Tasnet: time-domain audio separation network for real-time, single-channel speech separation. arXiv preprint arXiv:1711.00541 (2017)","DOI":"10.1109\/ICASSP.2018.8462116"},{"key":"53_CR18","doi-asserted-by":"crossref","unstructured":"Luo, Y., Mesgarani, N.: Tasnet: Surpassing ideal time-frequency masking for speech separation. arXiv preprint arXiv:1809.07454 (2018)","DOI":"10.1109\/TASLP.2019.2915167"},{"key":"53_CR19","doi-asserted-by":"crossref","unstructured":"Roux, J.L., Wisdom, S., Erdogan, H., Hershey, J.R.: SDR-half-baked or well done? arXiv preprint arXiv:1811.02508 (2018)","DOI":"10.1109\/ICASSP.2019.8683855"},{"issue":"1","key":"53_CR20","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1109\/TSA.2005.854106","volume":"14","author":"Y Shao","year":"2006","unstructured":"Shao, Y., Wang, D.: Model-based sequential organization in cochannel speech. IEEE Trans. Audio Speech Lang. Process. 14(1), 289\u2013298 (2006)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"53_CR21","doi-asserted-by":"crossref","unstructured":"Shi, Z., Lin, H., Liu, L., Liu, R., Hayakawa, S., Han, J.: Furcax: end-to-end monaural speech separation based on deep gated (de)convolutional neural networks with adversarial example training. In: Proceedings of the ICASSP (2019)","DOI":"10.1109\/ICASSP.2019.8682429"},{"key":"53_CR22","unstructured":"Shi, Z., et al.: Furcanet: An end-to-end deep gated convolutional, long short-term memory, deep neural networks for single channel speech separation. arXiv preprint arXiv:1902.00651 (2019)"},{"issue":"1","key":"53_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TASL.2006.876726","volume":"15","author":"P Smaragdis","year":"2007","unstructured":"Smaragdis, P., et al.: Convolutive speech bases and their application to supervised speech separation. IEEE Trans. Audio Speech Lang. Process. 15(1), 1 (2007)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"53_CR24","unstructured":"Srivastava, R.K., Greff, K., Schmidhuber, J.: Highway networks. arXiv preprint arXiv:1505.00387 (2015)"},{"key":"53_CR25","doi-asserted-by":"crossref","unstructured":"Taal, C.H., Hendriks, R.C., Heusdens, R., Jensen, J.: A short-time objective intelligibility measure for time-frequency weighted noisy speech. In: 2010 IEEE International Conference on Acoustics Speech and Signal Processing (ICASSP), pp. 4214\u20134217. IEEE (2010)","DOI":"10.1109\/ICASSP.2010.5495701"},{"key":"53_CR26","unstructured":"Van Den Oord, A., et al.: Wavenet: a generative model for raw audio. CoRR abs\/1609.03499 (2016)"},{"key":"53_CR27","unstructured":"Venkataramani, S., Casebeer, J., Smaragdis, P.: Adaptive front-ends for end-to-end source separation. In: Proceedings of the NIPS (2017)"},{"issue":"4","key":"53_CR28","doi-asserted-by":"publisher","first-page":"1462","DOI":"10.1109\/TSA.2005.858005","volume":"14","author":"E Vincent","year":"2006","unstructured":"Vincent, E., Gribonval, R., F\u00e9votte, C.: Performance measurement in blind audio source separation. IEEE Trans. Audio Speech Lang. Process. 14(4), 1462\u20131469 (2006)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"53_CR29","doi-asserted-by":"crossref","unstructured":"Virtanen, T.: Speech recognition using factorial hidden markov models for separation in the feature space. In: Ninth International Conference on Spoken Language Processing (2006)","DOI":"10.21437\/Interspeech.2006-23"},{"key":"53_CR30","doi-asserted-by":"publisher","DOI":"10.1109\/9780470043387","volume-title":"Computational Auditory Scene Analysis: Principles, Algorithms, and Applications","author":"D Wang","year":"2006","unstructured":"Wang, D., Brown, G.J.: Computational Auditory Scene Analysis: Principles, Algorithms, and Applications. Wiley-IEEE Press, New York (2006)"},{"key":"53_CR31","doi-asserted-by":"crossref","unstructured":"Wang, Z.Q., Le Roux, J., Hershey, J.R.: Alternative objective functions for deep clustering. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2018)","DOI":"10.1109\/ICASSP.2018.8462507"},{"key":"53_CR32","doi-asserted-by":"crossref","unstructured":"Xu, C., et al.: Single channel speech separation with constrained utterance level permutation invariant training using grid LSTM (2018)","DOI":"10.1109\/ICASSP.2018.8462471"},{"key":"53_CR33","unstructured":"Yang, W., Benbouchta, M., Yantorno, R.: Performance of the modified bark spectral distortion as an objective speech quality measure. In: Proceedings of the 1998 IEEE International Conference on Acoustics, Speech and Signal Processing, vol. 1, pp. 541\u2013544. IEEE (1998)"},{"key":"53_CR34","unstructured":"Yousef, M., Hussain, K.F., Mohammed, U.S.: Accurate, data-efficient, unconstrained text recognition with convolutional neural networks. arXiv preprint arXiv:1812.11894 (2018)"},{"key":"53_CR35","doi-asserted-by":"crossref","unstructured":"Yu, D., Kolb\u00e6k, M., Tan, Z.H., Jensen, J.: Permutation invariant training of deep models for speaker-independent multi-talker speech separation. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 241\u2013245. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7952154"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-37731-1_53","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,9]],"date-time":"2022-10-09T11:38:49Z","timestamp":1665315529000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-37731-1_53"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12,24]]},"ISBN":["9783030377304","9783030377311"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-37731-1_53","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,12,24]]},"assertion":[{"value":"24 December 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 January 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 January 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.mmm2020.kr\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"171","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"40","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"23% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Of the 171 submissions, 46 were accepted as poster papers; of the 49 special session paper submissions, 28 were accepted for oral presentation and 8 for poster presentation; 9 demo papers and 10 VBS papers were also accepted.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}