{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T15:49:52Z","timestamp":1774540192730,"version":"3.50.1"},"reference-count":84,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2016,8,1]],"date-time":"2016-08-01T00:00:00Z","timestamp":1470009600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100000266","name":"EPSRC","doi-asserted-by":"publisher","award":["EP\/I031022\/1"],"award-info":[{"award-number":["EP\/I031022\/1"]}],"id":[{"id":"10.13039\/501100000266","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Speech Technology (NST)"},{"DOI":"10.13039\/501100000780","name":"European Union","doi-asserted-by":"crossref","award":["H2020"],"award-info":[{"award-number":["H2020"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"crossref"}]},{"name":"SUMMA","award":["688139"],"award-info":[{"award-number":["688139"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2016,8]]},"DOI":"10.1109\/taslp.2016.2560534","type":"journal-article","created":{"date-parts":[[2016,4,29]],"date-time":"2016-04-29T01:03:11Z","timestamp":1461891791000},"page":"1450-1463","source":"Crossref","is-referenced-by-count":83,"title":["Learning Hidden Unit Contributions for Unsupervised Acoustic Model Adaptation"],"prefix":"10.1109","volume":"24","author":[{"given":"Pawel","family":"Swietojanski","sequence":"first","affiliation":[]},{"given":"Jinyu","family":"Li","sequence":"additional","affiliation":[]},{"given":"Steve","family":"Renals","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2009.4960445"},{"key":"ref72","first-page":"887","article-title":"A novel loss function for the overall risk criterion based discriminative training of HMM models","author":"kaiser","year":"0","journal-title":"Proc Int Conf Spoken Lang Process"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1016\/S1364-6613(99)01294-2"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1992.225837"},{"key":"ref76","first-page":"5275","article-title":"On combining i-vectors and discriminative adaptation methods for unsupervised speaker normalization in dnn acoustic models","author":"samarakoon","year":"0","journal-title":"Proc IEEE Int Conf Acoust Speech Signal Process"},{"key":"ref77","first-page":"2047","article-title":"Human vs machine spoofing detection on wideband and narrowband data","author":"wester","year":"0","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref74","article-title":"The NICT ASR system for IWSLT 2013","author":"huang","year":"0","journal-title":"Proc Int Workshop Spoken Lang Translation"},{"key":"ref39","first-page":"1081","article-title":"Regularized sequence-level deep neural network model adaptation","author":"huang","year":"0","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404807"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639201"},{"key":"ref78","article-title":"2000 NIST evaluation of conversational speech recognition over the telephone: English and Mandarin performance results","author":"fiscus","year":"0","journal-title":"Proc Speech Transcription Workshop"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854662"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854663"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2457612"},{"key":"ref31","first-page":"2180","article-title":"Adaptation of deep neural network acoustic models using factorised i-vectors","author":"karanasou","year":"0","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854823"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639212"},{"key":"ref36","doi-asserted-by":"crossref","first-page":"1713","DOI":"10.1109\/TASLP.2014.2346313","article-title":"Fast adaptation of deep neural network based on discriminant codes for speech recognition","volume":"22","author":"xue","year":"2014","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639211"},{"key":"ref34","first-page":"234","article-title":"Recnorm: Simultaneous normalisation and classification applied to speech recognition","author":"bridle","year":"0","journal-title":"Proc Adv Neural Inf Process Sys"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2007.4430116"},{"key":"ref62","first-page":"26","article-title":"The UEDIN ASR systems for the IWSLT 2014 evaluation","author":"bell","year":"0","journal-title":"Proc Int Workshop Spoken Lang Translation"},{"key":"ref61","first-page":"12","article-title":"Overview of the IWSLT 2012 evaluation campaign","author":"federico","year":"0","journal-title":"Proc Int Workshop Spoken Lang Translation"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707744"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2011.6163922"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707749"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2064307"},{"key":"ref65","doi-asserted-by":"crossref","DOI":"10.1007\/3-540-49430-8_2","article-title":"Efficient backprop","author":"lecun","year":"1998","journal-title":"Neural Networks Tricks of the Trade"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288864"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707705"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2014.2325781"},{"key":"ref68","first-page":"2345","article-title":"Sequence-discriminative training of deep neural networks","author":"vesely","year":"0","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref69","article-title":"The Kaldi speech recognition toolkit","author":"povey","year":"0","journal-title":"Proc IEEE Workshop Autom Speech Recog and Understanding"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-3210-1"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947494"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288833"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2163395"},{"key":"ref24","first-page":"2171","article-title":"Speaker adaptation for hybrid HMM&#x2013;ANN continuous speech recognition system","author":"neto","year":"0","journal-title":"Proc 4th Eur Conf Speech Commun Technol"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639014"},{"key":"ref26","article-title":"Comparison of discriminative input and output transformations for speaker adaptation in the hybrid nn\/hmm systems","author":"li","year":"0","journal-title":"Proc 11th Annu Conf Int Speech Commun Assoc"},{"key":"ref25","first-page":"2183","article-title":"Connectionist speaker normalization and adaptation","author":"abrash","year":"0","journal-title":"Proc 4th Eur Conf Speech Commun Technol"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2014.7078566"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/0893-6080(89)90020-8"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICSLP.1996.607807"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178829"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178787"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178785"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(01)00028-4"},{"key":"ref54","first-page":"3224","article-title":"Parameterised sigmoid and ReLU hidden activation functions for DNN acoustic modelling","author":"zhang","year":"0","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/18.256500"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/0893-6080(91)90009-T"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404856"},{"key":"ref11","first-page":"11","article-title":"Speaker adaptation for continuous density HMMs: A review","author":"woodland","year":"0","journal-title":"Proc ISCA ITR Workshop Adaptation Methods Speech Recognition"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854828"},{"key":"ref12","first-page":"1248","article-title":"Rapid and effective speaker adaptation of convolutional neural network based models for speech recognition","author":"abdel-hamid","year":"0","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2014.7078569"},{"key":"ref14","first-page":"5010","article-title":"SAT-LHUC: Speaker adaptive training for learning hidden unit contributions","author":"swietojanski","year":"0","journal-title":"Proc IEEE Int Conf Acoust Speech Signal Process"},{"key":"ref15","first-page":"261","article-title":"Wit $^3$: Web inventory of transcribed and translated talks","author":"cettolo","year":"0","journal-title":"Proc 16th Conf Eur Assoc Mach Translat"},{"key":"ref82","first-page":"1319","article-title":"Maxout networks","author":"goodfellow","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-007-9040-x"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2014.7078567"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1992.225858"},{"key":"ref84","first-page":"2579","article-title":"Visualizing high-dimensional data using t-sne","volume":"9","author":"van der maaten","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref18","first-page":"553","article-title":"Performance analysis of the Aurora large vocabulary baseline system","author":"parihar","year":"0","journal-title":"Proc 12th Eur Signal Process Conf"},{"key":"ref83","first-page":"1929","article-title":"Dropout: 2014 A simple way to prevent neural networks from overfitting","author":"srivastava","year":"0","journal-title":"J Mach Learn Res"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1006\/csla.1998.0043"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639100"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2011.6163899"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/89.260359"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2000.862024"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2134090"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639347"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.367023"},{"key":"ref49","first-page":"3605","article-title":"Structured output layer with auxiliary targets for context-dependent acoustic modelling","author":"swietojanski","year":"0","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638967"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2013.2270370"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178783"},{"key":"ref48","first-page":"3625","article-title":"Rapid adaptation for deep neural networks through multi-task learning","author":"huang","year":"0","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref47","first-page":"1076","article-title":"Maximum a posteriori adaptation of network parameters in deep models","author":"huang","year":"0","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854826"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404787"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178784"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2012.6424251"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/7480453\/07462247.pdf?arnumber=7462247","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:11:41Z","timestamp":1642003901000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7462247\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,8]]},"references-count":84,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/taslp.2016.2560534","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,8]]}}}