{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T17:29:48Z","timestamp":1776706188987,"version":"3.51.2"},"reference-count":76,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/taslp.2021.3093817","type":"journal-article","created":{"date-parts":[[2021,6,30]],"date-time":"2021-06-30T19:57:05Z","timestamp":1625083025000},"page":"2324-2338","source":"Crossref","is-referenced-by-count":12,"title":["Hybrid Speech and Text Analysis Methods for Speaker Change Detection"],"prefix":"10.1109","volume":"29","author":[{"given":"Or Haim","family":"Anidjar","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Itshak","family":"Lapidot","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9940-5654","authenticated-orcid":false,"given":"Chen","family":"Hajaj","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3670-0784","authenticated-orcid":false,"given":"Amit","family":"Dvir","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Issachar","family":"Gilad","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-00794-2_37"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2014.7078608"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2346315"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00054"},{"key":"ref76","first-page":"8778","article-title":"Generalized cross entropy loss for training deep neural networks with noisy labels","author":"zhang","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683892"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1388"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.3390\/econometrics8010007"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2018.05.026"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO.2018.8552933"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2017.8169991"},{"key":"ref31","article-title":"Siamese neural networks for one-shot image recognition","volume":"2","author":"koch","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref30","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"ICLR (Poster)"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682749"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054624"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/502628.502630"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-018-6992-3"},{"key":"ref60","first-page":"1685","article-title":"Speaker recognition using mfcc and improved weighted vector quantization algorithm","volume":"7","author":"sunitha","year":"2015","journal-title":"Int J Eng Technol"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1080\/10618600.2019.1647216"},{"key":"ref61","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.878256"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053122"},{"key":"ref64","doi-asserted-by":"crossref","first-page":"107299","DOI":"10.1016\/j.sigpro.2019.107299","article-title":"Selective review of offline change point detection methods","volume":"167","author":"truong","year":"2019","journal-title":"Signal Process"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpowsour.2020.228051"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462665"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1214\/20-EJS1710"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2010.2081790"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462628"},{"key":"ref68","first-page":"577","article-title":"Is word error rate a good indicator for spoken language understanding accuracy","author":"wang","year":"0","journal-title":"Proc IEEE Workshop Autom Speech Recognit Understanding"},{"key":"ref69","first-page":"867","article-title":"A widely applicable Bayesian information criterion","volume":"14","author":"watanabe","year":"2013","journal-title":"J Mach Learn Res"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.4103\/0972-6748.62274"},{"key":"ref1","article-title":"A thousand words are worth more than one recording: NLP based speaker change point detection","author":"anidjar","year":"2021","journal-title":"Proc INTERSPEECH"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1162\/COLI_a_00137"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472652"},{"key":"ref21","first-page":"715","article-title":"Multi-domain joint semantic frame parsing using bi-directional RNN-LSTM","author":"hakkani-t\u00fcr","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1022"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639630"},{"key":"ref26","first-page":"2834","article-title":"LSTM neural network-based speaker segmentation using acoustic and language modelling","author":"massana","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953097"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1268"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-13-9042-5_25"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053317"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/EUSIPCO.2015.7362757"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2020-4"},{"key":"ref55","first-page":"945","article-title":"Exploiting intra-conversation variability for speaker diarization","author":"shum","year":"2011","journal-title":"Proc 12th Annu Conf Int Speech Commun Assoc"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(00)00028-5"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1943"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1893"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"73","DOI":"10.4310\/CMS.2021.v19.n1.a4","article-title":"An MBO scheme for clustering and semi-supervised clustering of signed networks","volume":"19","author":"cucuringu","year":"2021","journal-title":"Commun Math Sci"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-67220-5_10"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/641007.641127"},{"key":"ref12","author":"das","year":"2017","journal-title":"Int J Sci Res Comput Sci Eng Inf Technol"},{"key":"ref13","first-page":"4171","article-title":"Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"0","journal-title":"Proc NAACL-HLT"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2009.4960529"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICODSE.2017.8285868"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvoice.2018.02.003"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053477"},{"key":"ref18","article-title":"End-to-end neural diarization: Reformulating speaker diarization as simple multi-label classification","author":"fujita","year":"0","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"ref19","first-page":"1243","article-title":"Convolutional sequence to sequence learning","author":"gehring","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2161079"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054106"},{"key":"ref6","first-page":"7124","article-title":"Pyannote. Audio: Neural building blocks for speaker diarization","author":"bredin","year":"0","journal-title":"Proc IEEE Int Conf Acoust Speech Signal Process"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2009.5306205"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IWAENC.2018.8521259"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683640"},{"key":"ref49","article-title":"Softmax is not an artificial trick: An information-theoretic view of softmax in neural networks","author":"qin","year":"0"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1017\/S1351324916000334"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2007.1077"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.21437\/INTERSPEECH.2017-950"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1947"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1364"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.3607820"},{"key":"ref41","first-page":"466","article-title":"Segmentation-based method for text-dependent speaker recognition in embedded applications","author":"luo","year":"0","journal-title":"Proc Second APSIPA Ann Summit Conf"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3133110"},{"key":"ref43","article-title":"Links: A high-dimensional online clustering method","author":"mansfield","year":"0"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/9289074\/09468954.pdf?arnumber=9468954","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:53:57Z","timestamp":1652194437000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9468954\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":76,"URL":"https:\/\/doi.org\/10.1109\/taslp.2021.3093817","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}