{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T14:15:23Z","timestamp":1740147323629,"version":"3.37.3"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61876208","61873094"],"award-info":[{"award-number":["61876208","61873094"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SOCA"],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1007\/s11761-022-00340-w","type":"journal-article","created":{"date-parts":[[2022,6,13]],"date-time":"2022-06-13T17:03:52Z","timestamp":1655139832000},"page":"111-119","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Speaker extraction network with attention mechanism for speech dialogue system"],"prefix":"10.1007","volume":"16","author":[{"given":"Yun","family":"Hao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiaju","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiangkang","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zijia","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fei","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qingyao","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,6,13]]},"reference":[{"issue":"1","key":"340_CR1","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1109\/29.45616","volume":"38","author":"K-F Lee","year":"1990","unstructured":"Lee K-F, Hon H-W, Reddy R (1990) An overview of the sphinx speech recognition system. IEEE Trans Acoust Speech Signal Process 38(1):35\u201345","journal-title":"IEEE Trans Acoust Speech Signal Process"},{"key":"340_CR2","doi-asserted-by":"crossref","unstructured":"Lin Y-C, Chiang T-H, Wang H-M, Peng C-M, Chang C-H (1998) The design of a multi-domain mandarin Chinese spoken dialogue system. In: Fifth international conference on spoken language processing","DOI":"10.21437\/ICSLP.1998-62"},{"key":"340_CR3","doi-asserted-by":"crossref","unstructured":"Zibert J, Martincic-Ipsic S, Hajdinjak M, Ipsic I, Mihelic F (2003) Development of a bilingual spoken dialog system for weather information retrieval. In: Eighth European conference on speech communication and technology","DOI":"10.21437\/Eurospeech.2003-279"},{"key":"340_CR4","doi-asserted-by":"crossref","unstructured":"Huang C, Xu P, Zhang X, Zhao S, Huang T, Xu B (1999) Lodestar: a mandarin spoken dialogue system for travel information retrieval. In: Sixth European conference on speech communication and technology. Citeseer","DOI":"10.21437\/Eurospeech.1999-270"},{"key":"340_CR5","doi-asserted-by":"crossref","unstructured":"Liu J, Xu Y, Seneff S, Zue V (2008) Citybrowser II: a multimodal restaurant guide in mandarin. In: International symposium on Chinese spoken language processing","DOI":"10.1109\/CHINSL.2008.ECP.50"},{"issue":"1","key":"340_CR6","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1109\/TASL.2010.2045180","volume":"19","author":"PC Loizou","year":"2010","unstructured":"Loizou PC, Kim G (2010) Reasons why current speech-enhancement algorithms do not improve speech intelligibility and suggested solutions. IEEE Trans Audio Speech Lang Process 19(1):47\u201356","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"2","key":"340_CR7","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1109\/TASSP.1979.1163209","volume":"27","author":"S Boll","year":"1979","unstructured":"Boll S (1979) Suppression of acoustic noise in speech using spectral subtraction. IEEE Trans Acoust Speech Signal Process 27(2):113\u2013120","journal-title":"IEEE Trans Acoust Speech Signal Process"},{"issue":"12","key":"340_CR8","doi-asserted-by":"publisher","first-page":"1586","DOI":"10.1109\/PROC.1979.11540","volume":"67","author":"JS Lim","year":"1979","unstructured":"Lim JS, Oppenheim AV (1979) Enhancement and bandwidth compression of noisy speech. Proc IEEE 67(12):1586\u20131604","journal-title":"Proc IEEE"},{"issue":"3","key":"340_CR9","doi-asserted-by":"publisher","first-page":"476","DOI":"10.1109\/TASL.2012.2226156","volume":"21","author":"S Liang","year":"2012","unstructured":"Liang S, Liu W, Jiang W (2012) A new Bayesian method incorporating with local correlation for IBM estimation. IEEE Trans Audio Speech Lang Process 21(3):476\u2013487","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"340_CR10","unstructured":"Roweis ST (2000) One microphone source separation. In: NIPS, vol 13"},{"issue":"4","key":"340_CR11","doi-asserted-by":"publisher","first-page":"1118","DOI":"10.1109\/TASL.2011.2172425","volume":"20","author":"A Ozerov","year":"2011","unstructured":"Ozerov A, Vincent E, Bimbot F (2011) A general flexible framework for the handling of prior information in audio source separation. IEEE Trans Audio Speech Lang Process 20(4):1118\u20131133","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"10","key":"340_CR12","doi-asserted-by":"publisher","first-page":"2140","DOI":"10.1109\/TASL.2013.2270369","volume":"21","author":"N Mohammadiha","year":"2013","unstructured":"Mohammadiha N, Smaragdis P, Leijon A (2013) Supervised and unsupervised speech enhancement using nonnegative matrix factorization. IEEE Trans Audio Speech Lang Process 21(10):2140\u20132151","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"3","key":"340_CR13","doi-asserted-by":"publisher","first-page":"1066","DOI":"10.1109\/TASL.2006.885253","volume":"15","author":"T Virtanen","year":"2007","unstructured":"Virtanen T (2007) Monaural sound source separation by nonnegative matrix factorization with temporal continuity and sparseness criteria. IEEE Trans Audio Speech Lang Process 15(3):1066\u20131074","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"1","key":"340_CR14","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1109\/TNN.2007.913988","volume":"19","author":"D Wang","year":"2008","unstructured":"Wang D, Brown G (2008) Computational auditory scene analysis: principles, algorithms and applications. IEEE Trans Neural Netw 19(1):199\u2013199","journal-title":"IEEE Trans Neural Netw"},{"key":"340_CR15","unstructured":"Jia X, Li D (2022) TFCN: temporal-frequential convolutional network for single-channel speech enhancement. arXiv:2201.00480"},{"key":"340_CR16","doi-asserted-by":"crossref","unstructured":"Hao Y, Huang X, Huang H, Wu Q (2021) Denoi-spex+: a speaker extraction network based speech dialogue system. In: The IEEE international conference on e-business engineering (ICEBE)","DOI":"10.1109\/ICEBE52470.2021.00030"},{"issue":"5","key":"340_CR17","doi-asserted-by":"publisher","first-page":"450","DOI":"10.1016\/j.specom.2010.02.004","volume":"52","author":"K Paliwal","year":"2010","unstructured":"Paliwal K, W\u00f3jcicki K, Schwerin B (2010) Single-channel speech enhancement using spectral subtraction in the short-time modulation domain. Speech Commun 52(5):450\u2013475","journal-title":"Speech Commun"},{"issue":"7","key":"340_CR18","doi-asserted-by":"publisher","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"GE Hinton","year":"2006","unstructured":"Hinton GE, Osindero S, Teh Y-W (2006) A fast learning algorithm for deep belief nets. Neural Comput 18(7):1527\u20131554","journal-title":"Neural Comput"},{"key":"340_CR19","unstructured":"Nair V, Hinton GE (2010) Rectified linear units improve restricted Boltzmann machines. In: Icml"},{"key":"340_CR20","doi-asserted-by":"crossref","unstructured":"Abdel-Hamid O, Mohamed A-r, Jiang H, Penn G (2012) Applying convolutional neural networks concepts to hybrid NN-HMM model for speech recognition. In: 2012 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 4277\u20134280","DOI":"10.1109\/ICASSP.2012.6288864"},{"key":"340_CR21","doi-asserted-by":"crossref","unstructured":"Abdel-Hamid O, Deng L, Yu D (2013) Exploring convolutional neural network structures and optimization techniques for speech recognition. In: Interspeech, vol 11. Citeseer, pp 73\u201375","DOI":"10.21437\/Interspeech.2013-744"},{"issue":"1","key":"340_CR22","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1007\/s11761-020-00304-y","volume":"15","author":"MI Smahi","year":"2021","unstructured":"Smahi MI, Hadjila F, Tibermacine C, Benamar A (2021) A deep learning approach for collaborative prediction of web service QoS. SOCA 15(1):5\u201320","journal-title":"SOCA"},{"issue":"10","key":"340_CR23","doi-asserted-by":"publisher","first-page":"1702","DOI":"10.1109\/TASLP.2018.2842159","volume":"26","author":"D Wang","year":"2018","unstructured":"Wang D, Chen J (2018) Supervised speech separation based on deep learning: an overview. IEEE\/ACM Trans Audio Speech Lang Process 26(10):1702-1726","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"340_CR24","doi-asserted-by":"crossref","unstructured":"Fan C, Liu B, Tao J, Wen Z, Yi J, Bai Y (2018) Utterance-level permutation invariant training with discriminative learning for single channel speech separation. In: 2018 11th international symposium on chinese spoken language processing (ISCSLP). IEEE, pp 26\u201330","DOI":"10.1109\/ISCSLP.2018.8706611"},{"key":"340_CR25","doi-asserted-by":"crossref","unstructured":"Yu D, Kolb\u00e6k M, Tan Z-H, Jensen J (2017) Permutation invariant training of deep models for speaker-independent multi-talker speech separation. In: 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 241\u2013245","DOI":"10.1109\/ICASSP.2017.7952154"},{"issue":"10","key":"340_CR26","doi-asserted-by":"publisher","first-page":"1901","DOI":"10.1109\/TASLP.2017.2726762","volume":"25","author":"M Kolb\u00e6k","year":"2017","unstructured":"Kolb\u00e6k M, Yu D, Tan Z-H, Jensen J (2017) Multitalker speech separation with utterance-level permutation invariant training of deep recurrent neural networks. IEEE\/ACM Trans Audio Speech Lang Process 25(10):1901\u20131913","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"340_CR27","doi-asserted-by":"crossref","unstructured":"Hershey JR, Chen Z, Le\u00a0Roux J, Watanabe S (2016) Deep clustering: discriminative embeddings for segmentation and separation. In: 2016 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 31\u201335","DOI":"10.1109\/ICASSP.2016.7471631"},{"issue":"3","key":"340_CR28","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1109\/TASLP.2015.2512042","volume":"24","author":"DS Williamson","year":"2016","unstructured":"Williamson DS, Wang Y, Wang DL (2016) Complex ratio masking for monaural speech separation. IEEE\/ACM Trans Audio Speech Lang Process 24(3):483","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"340_CR29","doi-asserted-by":"crossref","unstructured":"Lee Y-S, Wang C-Y, Wang S-F, Wang J-C, Wu C-H (2017) Fully complex deep neural network for phase-incorporating monaural source separation. In: 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 281\u2013285","DOI":"10.1109\/ICASSP.2017.7952162"},{"key":"340_CR30","doi-asserted-by":"crossref","unstructured":"Pascual S, Bonafonte A, Serra J (2017) Segan: speech enhancement generative adversarial network. arXiv:1703.09452","DOI":"10.21437\/Interspeech.2017-1428"},{"issue":"6","key":"340_CR31","doi-asserted-by":"publisher","first-page":"4705","DOI":"10.1121\/1.4986931","volume":"141","author":"J Chen","year":"2017","unstructured":"Chen J, Wang D (2017) Long short-term memory for speaker generalization in supervised speech separation. J Acoust Soc Am 141(6):4705-4714","journal-title":"J Acoust Soc Am"},{"issue":"8","key":"340_CR32","doi-asserted-by":"publisher","first-page":"1256","DOI":"10.1109\/TASLP.2019.2915167","volume":"27","author":"Y Luo","year":"2019","unstructured":"Luo Y, Mesgarani N (2019) CONV-TasNet: surpassing ideal time-frequency magnitude masking for speech separation. IEEE\/ACM Trans Audio Speech Lang Process 27(8):1256\u20131266","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"340_CR33","doi-asserted-by":"crossref","unstructured":"Xu C, Rao W, Chng ES, Li H (2019) Time-domain speaker extraction network. In: 2019 IEEE automatic speech recognition and understanding workshop (ASRU). IEEE, pp 327\u2013334","DOI":"10.1109\/ASRU46091.2019.9004016"},{"key":"340_CR34","doi-asserted-by":"crossref","unstructured":"Lea C, Vidal R, Reiter A, Hager GD (2016) Temporal convolutional networks: a unified approach to action segmentation. In: European conference on computer vision. Springer, pp 47\u201354","DOI":"10.1007\/978-3-319-49409-8_7"},{"key":"340_CR35","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"340_CR36","doi-asserted-by":"publisher","first-page":"1370","DOI":"10.1109\/TASLP.2020.2987429","volume":"28","author":"C Xu","year":"2020","unstructured":"Xu C, Rao W, Chng ES, Li H (2020) SpEx: Multi-scale time domain speaker extraction network. IEEE\/ACM Trans Audio Speech Lang Process 28:1370\u20131384","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"340_CR37","doi-asserted-by":"crossref","unstructured":"Delcroix M, Ochiai T, Zmolikova K, Kinoshita K, Tawara N, Nakatani T, Araki S (2020) Improving speaker discrimination of target speech extraction with time-domain speakerbeam. In: ICASSP 2020\u20142020 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 691\u2013695","DOI":"10.1109\/ICASSP40776.2020.9054683"},{"key":"340_CR38","doi-asserted-by":"crossref","unstructured":"Bu H, Du J, Na X, Wu B, Zheng H (2017) Aishell-1: an open-source mandarin speech corpus and a speech recognition baseline. In: 2017 20th conference of the oriental chapter of the international coordinating committee on speech databases and speech I\/O systems and assessment (O-COCOSDA). IEEE, pp 1\u20135","DOI":"10.1109\/ICSDA.2017.8384449"}],"container-title":["Service Oriented Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11761-022-00340-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11761-022-00340-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11761-022-00340-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,8]],"date-time":"2023-02-08T02:06:22Z","timestamp":1675821982000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11761-022-00340-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6]]},"references-count":38,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,6]]}},"alternative-id":["340"],"URL":"https:\/\/doi.org\/10.1007\/s11761-022-00340-w","relation":{},"ISSN":["1863-2386","1863-2394"],"issn-type":[{"type":"print","value":"1863-2386"},{"type":"electronic","value":"1863-2394"}],"subject":[],"published":{"date-parts":[[2022,6]]},"assertion":[{"value":"27 March 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 May 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 May 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 June 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}