{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:29:49Z","timestamp":1775230189437,"version":"3.50.1"},"reference-count":46,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1109\/asru.2017.8268969","type":"proceedings-article","created":{"date-parts":[[2018,1,25]],"date-time":"2018-01-25T21:43:53Z","timestamp":1516916633000},"page":"437-444","source":"Crossref","is-referenced-by-count":20,"title":["Cracking the cocktail party problem by multi-beam deep attractor network"],"prefix":"10.1109","author":[{"given":"Zhuo","family":"Chen","sequence":"first","affiliation":[]},{"given":"Jinyu","family":"Li","sequence":"additional","affiliation":[]},{"given":"Xiong","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"Takuya","family":"Yoshioka","sequence":"additional","affiliation":[]},{"given":"Huaming","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Zhenghao","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yifan","family":"Gong","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"131","author":"li","year":"2012","journal-title":"Improving wideband speech recognition using mixed-bandwidth training data in CD-DNN-HMM"},{"key":"ref38","doi-asserted-by":"crossref","first-page":"2365","DOI":"10.21437\/Interspeech.2013-552","article-title":"Restructuring of deep neural network acoustic models with singular value decomposition","author":"xue","year":"2013","journal-title":"InterSpeech"},{"key":"ref33","first-page":"1021","author":"elko","year":"2008","journal-title":"Microphone Arrays"},{"key":"ref32","first-page":"11","author":"elko","year":"2004","journal-title":"Differential Microphone Arrays"},{"key":"ref31","author":"chen","year":"2017","journal-title":"Speaker-independent speech separation with deep attractor network"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952756"},{"key":"ref37","author":"miao","year":"2016","journal-title":"Simplifying long short-term memory acoustic models for fast training and decoding"},{"key":"ref36","first-page":"338","author":"sak","year":"2014","journal-title":"Long short-term memory recurrent neural network architectures for large scale acoustic modeling"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1121\/1.382599"},{"key":"ref34","article-title":"Differential microphone arrays","year":"0","journal-title":"Dipling Hannes Pessentheiner"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1985.1164550"},{"key":"ref40","author":"li","year":"2017","journal-title":"Large-scale domain adaptation via teacher-student learning"},{"key":"ref11","author":"wang","year":"2006","journal-title":"Computational Auditory Scene Analysis Principles Algorithms and Applications"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2008.11.001"},{"key":"ref13","first-page":"102","article-title":"Notes on nonnegative tensor factorization of the spectrogram for audio source separation: statistical insights and towards self-clustering of the spatial cues","author":"f\u00e9votte","year":"2010","journal-title":"Proc Int Symp Computer Music Modeling and Retrieval"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2013.2270369"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2013.6701883"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/S0925-2312(98)00047-2"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2004.832994"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2010.5495994"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2051355"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472778"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2011-169","article-title":"Conversational speech transcription using context-dependent deep neural networks","author":"seide","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2672401"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1006\/csla.1994.1016"},{"key":"ref6","article-title":"Application of pretrained deep neural networks to large vocabulary speech recognition","author":"jaitly","year":"2012","journal-title":"Proc Inter-speech"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7471664"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2011.6163900"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639345"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/1486.001.0001","author":"bregman","year":"1990","journal-title":"Auditory Scene Analysis The Perceptual Organization of Sound"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2017.7510508"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1121\/1.1907229"},{"key":"ref46","author":"isik","year":"2016","journal-title":"Single-channel multi-speaker separation using deep clustering"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638256"},{"key":"ref45","article-title":"Integration of speech enhancement and recognition using long-short term memory recurrent neural network","author":"chen","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7471631"},{"key":"ref21","article-title":"Speech recognition in the presence of highly non-stationary noise based on spatial, spectral and temporal speech\/noise modeling combined with dynamic variance adaptation","author":"delcroix","year":"2011","journal-title":"CHiMEWorkshop on Machine Listening in Multisource Environments"},{"key":"ref42","article-title":"Bss_eval toolbox user guide-revision 2.0","author":"f\u00e9votte","year":"2005"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952154"},{"key":"ref41","doi-asserted-by":"crossref","first-page":"1910","DOI":"10.21437\/Interspeech.2014-432","article-title":"Learning small-size DNN with output-distribution-based criteria","author":"li","year":"2014","journal-title":"InterSpeech"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952155"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639038"},{"key":"ref26","author":"chen","year":"2017","journal-title":"Improving mask learning based speech enhancement system with restoration layers and residual connection"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/53.665"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2352935"}],"event":{"name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Okinawa, Japan","start":{"date-parts":[[2017,12,16]]},"end":{"date-parts":[[2017,12,20]]}},"container-title":["2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8260578\/8268903\/08268969.pdf?arnumber=8268969","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:47:49Z","timestamp":1751244469000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8268969\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/asru.2017.8268969","relation":{},"subject":[],"published":{"date-parts":[[2017,12]]}}}