{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T20:01:49Z","timestamp":1760385709618,"version":"3.28.0"},"reference-count":24,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,11]]},"DOI":"10.1109\/iscslp.2018.8706589","type":"proceedings-article","created":{"date-parts":[[2019,5,20]],"date-time":"2019-05-20T18:58:35Z","timestamp":1558378715000},"page":"200-204","source":"Crossref","is-referenced-by-count":7,"title":["Exploring a Unified Attention-Based Pooling Framework for Speaker Verification"],"prefix":"10.1109","author":[{"given":"Yi","family":"Liu","sequence":"first","affiliation":[]},{"given":"Liang","family":"He","sequence":"additional","affiliation":[]},{"given":"Weiwei","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Jia","family":"Liu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"xu","year":"2015","journal-title":"arXiv preprint arXiv 1502 03500"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"ref12","first-page":"6000","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref14","first-page":"171","article-title":"End-to-end attention based text-dependent speaker verification","author":"zhang","year":"2016","journal-title":"Proc IEEE Spoken Language Technology Workshop (SLT)"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-993"},{"key":"ref16","first-page":"5359","article-title":"Attention-based models for text-dependent speaker verification","author":"chowdhury","year":"2017","journal-title":"Proc IEEE ICASSP"},{"key":"ref17","first-page":"256","article-title":"Analysis of i-vector length normalization in speaker recognition systems","author":"garcia-romero","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8269006"},{"key":"ref19","first-page":"69","article-title":"The Fisher corpus: a resource for the next generations of speech-to-text","volume":"4","author":"cieri","year":"2004","journal-title":"LREC"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-452"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854363"},{"key":"ref6","article-title":"Deep speaker: An end-to-end neural speaker embedding system","author":"li","year":"2017","journal-title":"arXiv preprint arXiv 1705 02304"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472652"},{"key":"ref8","first-page":"854","article-title":"End-to-end text-independent speaker verification with flexibility in utterance duration","author":"zhang","year":"2017","journal-title":"Proc IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-620"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21236\/ADA613971"},{"key":"ref1","first-page":"28","article-title":"Joint factor analysis of speaker and session variability: Theory and algorithms","volume":"14","author":"kenny","year":"2005","journal-title":"Montreal (Report) CRIM-06\/08-13"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952536"},{"key":"ref20","first-page":"2726","article-title":"The NIST 2010 speaker recognition evaluation","author":"martin","year":"2010","journal-title":"Proc INTERSPEECH"},{"key":"ref22","article-title":"The kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"Proc IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"},{"key":"ref21","first-page":"2579","article-title":"NIST 2008 speaker recognition evaluation: Performance across telephone and room microphone channels","author":"martin","year":"2009","journal-title":"Proc INTERSPEECH"},{"key":"ref24","first-page":"265","article-title":"Tensorflow: A system for large-scale machine learning","volume":"16","author":"abadi","year":"2016","journal-title":"OSDI"},{"key":"ref23","article-title":"A time delay neural network architecture for efficient modeling of long temporal contexts","author":"peddinti","year":"2015","journal-title":"Proc INTERSPEECH"}],"event":{"name":"2018 11th International Symposium on Chinese Spoken Language Processing (ISCSLP)","start":{"date-parts":[[2018,11,26]]},"location":"Taipei City, Taiwan","end":{"date-parts":[[2018,11,29]]}},"container-title":["2018 11th International Symposium on Chinese Spoken Language Processing (ISCSLP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8701133\/8706262\/08706589.pdf?arnumber=8706589","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,23]],"date-time":"2020-08-23T20:19:00Z","timestamp":1598213940000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8706589\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,11]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/iscslp.2018.8706589","relation":{},"subject":[],"published":{"date-parts":[[2018,11]]}}}