{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T18:49:18Z","timestamp":1768070958633,"version":"3.49.0"},"reference-count":36,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,12]]},"DOI":"10.1109\/slt.2016.7846261","type":"proceedings-article","created":{"date-parts":[[2017,2,10]],"date-time":"2017-02-10T15:58:30Z","timestamp":1486742310000},"page":"171-178","source":"Crossref","is-referenced-by-count":95,"title":["End-to-End attention based text-dependent speaker verification"],"prefix":"10.1109","author":[{"given":"Shi-Xiong","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Zhuo","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yong","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Jinyu","family":"Li","sequence":"additional","affiliation":[]},{"given":"Yifan","family":"Gong","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","article-title":"Theano: A Python framework for fast computation of mathematical expressions","volume":"abs 1605 2688","year":"2016","journal-title":"ArXiv e-prints"},{"key":"ref32","first-page":"130","article-title":"Kronecker products and matrix calculus: With applications","author":"graham","year":"1982"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"ref30","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"arXiv preprint arXiv 1502 01032"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2339736"},{"key":"ref35","article-title":"Feature warping for robust speaker verification","author":"pelecanos","year":"2001","journal-title":"InterSpeech"},{"key":"ref34","article-title":"Keras","author":"chollet","year":"2015"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2010.2090893"},{"key":"ref11","first-page":"1","article-title":"Text-dependent speaker recognition using plda with uncertainty propagation","volume":"500","author":"stafylakis","year":"2013","journal-title":"Matrix"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853887"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2016-2"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2015.2420092"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2016-4"},{"key":"ref16","first-page":"15","article-title":"Cosine similarity scoring without score normalization techniques","author":"najim","year":"2010","journal-title":"Odyssey"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639151"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1033"},{"key":"ref19","article-title":"The IBM 2015 english conversational telephone speech recognition system","author":"george","year":"2015","journal-title":"arXiv preprint arXiv 1505 03561"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2007.1081"},{"key":"ref4","first-page":"56","article-title":"Text-dependent speaker verification: Classifiers, databases and rsr2015","volume":"60","author":"haizhou","year":"0","journal-title":"Speech Communication"},{"key":"ref27","first-page":"223","article-title":"Tandem connectionist feature extraction for conversational speech recognition","author":"zhu","year":"2004","journal-title":"5th International Workshop on Machine Learning for Multimodal Interaction"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1991.150355"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854363"},{"key":"ref29","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"ICLRE"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472652"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.881693"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(95)00009-D"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1999.758125"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853692"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/5.628714"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1007\/BF02187718","article-title":"An O(nlogn) algorithm for the all-nearest-neighbors problem","volume":"4","author":"pravin","year":"1989","journal-title":"Discrete & Computational Geometry"},{"key":"ref22","article-title":"A unified deep neural network for speaker and language recognition","author":"richardson","year":"2015","journal-title":"arXiv preprint arXiv 1504 00923"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","article-title":"Front-end factor analysis for speaker verification","volume":"19","author":"najim","year":"2011","journal-title":"IEEE Transactions on Audio Speech and Language Processing"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854370"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","article-title":"Context-dependent pre-trained deep neural networks for large-vocabulary speech recognition","volume":"20","author":"george","year":"2012","journal-title":"IEEE Transactions on Audio Speech and Language Processing"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.367023"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"1327","DOI":"10.21437\/Interspeech.2014-329","article-title":"Tandem deep features for text-dependent speaker verification","author":"fu","year":"2014","journal-title":"InterSpeech"}],"event":{"name":"2016 IEEE Spoken Language Technology Workshop (SLT)","location":"San Diego, CA","start":{"date-parts":[[2016,12,13]]},"end":{"date-parts":[[2016,12,16]]}},"container-title":["2016 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7836849\/7846230\/07846261.pdf?arnumber=7846261","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,15]],"date-time":"2025-06-15T07:19:49Z","timestamp":1749971989000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7846261\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,12]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/slt.2016.7846261","relation":{},"subject":[],"published":{"date-parts":[[2016,12]]}}}