{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:06:59Z","timestamp":1740100019427,"version":"3.37.3"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,6,6]]},"DOI":"10.1109\/icassp39728.2021.9414094","type":"proceedings-article","created":{"date-parts":[[2021,5,13]],"date-time":"2021-05-13T19:53:45Z","timestamp":1620935625000},"page":"6708-6712","source":"Crossref","is-referenced-by-count":4,"title":["Short-Time Spectral Aggregation for Speaker Embedding"],"prefix":"10.1109","author":[{"given":"Youzhi","family":"Tu","sequence":"first","affiliation":[]},{"given":"Man-Wai","family":"Mak","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref11","first-page":"4700","article-title":"Deep residual learning for image recognition","author":"huang","year":"2017","journal-title":"Proc IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683120"},{"key":"ref13","first-page":"6794","article-title":"Frequency and temporal convolutional attention for text-independent speaker recognition","author":"yadav","year":"2019","journal-title":"Proc International Conference on Acoustics Speech and Signal Processing"},{"key":"ref14","first-page":"2449","article-title":"Spectral representations for convolutional neural networks","author":"rippel","year":"2015","journal-title":"Advances in neural information processing systems"},{"journal-title":"Discrete-Time Speech Signal Processing Principles and Practice","year":"2002","author":"quatieri","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1977.1162950"},{"article-title":"Deep speaker: An end-to-end neural speaker embedding system","year":"2017","author":"li","key":"ref17"},{"key":"ref18","first-page":"74","article-title":"Exploring the encoding layer and loss function in end-to-end speaker and language recognition system","author":"chien","year":"2018","journal-title":"Proc Odyssey The Speaker and Language Recognition Workshop"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-993"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2019.101027"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1417"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2979"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2020-30"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054134"},{"key":"ref7","first-page":"2448","article-title":"Analysis of BUT submission in far-field scenarios of VOiCES 2019 challenge","author":"mat?jka","year":"2019","journal-title":"Proc Annual Conference of the International Speech Communication Association"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2064307"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3004760"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1158"},{"article-title":"The VOiCES from a distance challenge 2019 evaluation plan","year":"2019","author":"nandwana","key":"ref22"},{"journal-title":"Discrete-Time Signal Processing","year":"1999","author":"oppenheim","key":"ref21"},{"key":"ref24","first-page":"1567","article-title":"Analysis of score normalization in multilingual speaker recognition","author":"mat?jka","year":"2017","journal-title":"Proc Annual Conference of the International Speech Communication Association"},{"key":"ref23","first-page":"235","article-title":"Short term spectral analysis, synthesis, and modification by discrete Fourier transform","volume":"25","author":"wang","year":"2018","journal-title":"IEEE Signal Processing Letters"}],"event":{"name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2021,6,6]]},"location":"Toronto, ON, Canada","end":{"date-parts":[[2021,6,11]]}},"container-title":["ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9413349\/9413350\/09414094.pdf?arnumber=9414094","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:40:47Z","timestamp":1652197247000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9414094\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,6]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/icassp39728.2021.9414094","relation":{},"subject":[],"published":{"date-parts":[[2021,6,6]]}}}