{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,17]],"date-time":"2026-06-17T16:19:39Z","timestamp":1781713179082,"version":"3.54.5"},"reference-count":65,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/taslp.2022.3162080","type":"journal-article","created":{"date-parts":[[2022,3,24]],"date-time":"2022-03-24T21:48:13Z","timestamp":1648158493000},"page":"1493-1507","source":"Crossref","is-referenced-by-count":57,"title":["Encoder-Decoder Based Attractors for End-to-End Neural Diarization"],"prefix":"10.1109","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3166-4956","authenticated-orcid":false,"given":"Shota","family":"Horiguchi","sequence":"first","affiliation":[{"name":"Hitachi, Ltd., Kokubunji-shi, Tokyo, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6523-8146","authenticated-orcid":false,"given":"Yusuke","family":"Fujita","sequence":"additional","affiliation":[{"name":"Hitachi, Ltd., Tokyo, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5970-8631","authenticated-orcid":false,"given":"Shinji","family":"Watanabe","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yawen","family":"Xue","sequence":"additional","affiliation":[{"name":"Hitachi, Ltd., Kokubunji-shi, Tokyo, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7449-5726","authenticated-orcid":false,"given":"Paola","family":"Garcia","sequence":"additional","affiliation":[{"name":"Johns Hopkins University, Baltimore, MD, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2021.101317"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-007-9040-x"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/CHiME.2020-1"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053426"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/CHiME.2018-8"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414998"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462628"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2019.2961071"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2014.7078610"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462646"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682572"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2899"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003959"},{"key":"ref14","article-title":"End-to-end neural diarization: Reformulating speaker diarization as simple multi-label classification","author":"Fujita","year":"2020"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1022"},{"key":"ref16","article-title":"Neural speaker diarization with speaker-wise chain rule","author":"Fujita","year":"2020"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383555"},{"key":"ref18","article-title":"The Hitachi-JHU DIHARD III system: Competitive end-to-end neural diarization and X-vector clustering systems combined by DOVER-Lap","volume-title":"Proc. 3rd DIHARD Speech Diarization Challenge Workshop","author":"Horiguchi","year":"2021"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414371"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-708"},{"key":"ref21","first-page":"2834","article-title":"LSTM neural network-based speaker segmentation using acoustic and language modelling","volume-title":"Proc. Conf. Int. Speech Commun. Assoc.","author":"Massana","year":"2017"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1947"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2013.2285474"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1893"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2813"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413832"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683892"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2013.2264673"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-166"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953094"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461546"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383602"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2955293"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2021.101254"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383617"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053760"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1268"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1208"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1602"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952154"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952118"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462116"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2915167"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2795749"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3099291"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1550"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952155"},{"key":"ref49","first-page":"3744","article-title":"Set Transformer: A framework for attention-based permutation-invariant neural networks","volume-title":"Proc. Int. Conf. Mach. Learn","author":"Lee","year":"2019"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-99978-4_10"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7471631"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054251"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413436"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383490"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9004031"},{"key":"ref57","first-page":"7","article-title":"Corpus of spontaneous Japanese: Its design and evaluation","volume-title":"Proc. ISCA IEEE Workshop Spontaneous Speech Process. Recognit.","author":"Maekawa","year":"2003"},{"key":"ref58","first-page":"1248","article-title":"Guided source separation meets a strong ASR backend: Hitachi\/Paderborn University joint investigation for dinner party scenario","volume-title":"Proc. Conf. Int. Speech Commun. Assoc.","author":"Kanda","year":"2019"},{"key":"ref59","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma","year":"2015"},{"key":"ref60","first-page":"5998","article-title":"Attention is all you need","volume-title":"Proc. Adv. Neural Inf. Process. Syst","author":"Vaswani","year":"2017"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9004009"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461778"},{"key":"ref63","article-title":"BUT system description for the third DIHARD speech diarization challenge","volume-title":"Proc. 3rd DIHARD Speech Diarization Challenge Workshop","author":"Landini","year":"2021"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414841"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU51503.2021.9687875"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1909"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/9657755\/09741374.pdf?arnumber=9741374","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,17]],"date-time":"2024-01-17T23:59:27Z","timestamp":1705535967000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9741374\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":65,"URL":"https:\/\/doi.org\/10.1109\/taslp.2022.3162080","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]}}}