{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T15:30:49Z","timestamp":1777390249509,"version":"3.51.4"},"reference-count":72,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100002341","name":"Academy of Finland","doi-asserted-by":"publisher","award":["313970"],"award-info":[{"award-number":["313970"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Finnish Scientific Advisory Board for Defence","award":["2500M-0106"],"award-info":[{"award-number":["2500M-0106"]}]},{"name":"ARAP"},{"name":"ARAP grant from the Institute for Infocomm Research, A*STAR"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/taslp.2020.2964953","type":"journal-article","created":{"date-parts":[[2020,1,8]],"date-time":"2020-01-08T21:23:48Z","timestamp":1578518628000},"page":"682-695","source":"Crossref","is-referenced-by-count":14,"title":["Maximal Figure-of-Merit Framework to Detect Multi-Label Phonetic Features for Spoken Language Recognition"],"prefix":"10.1109","volume":"28","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4052-2754","authenticated-orcid":false,"given":"Ivan","family":"Kukanov","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Trung Ngo","family":"Trong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ville","family":"Hautamaki","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0770-0507","authenticated-orcid":false,"given":"Sabato Marco","family":"Siniscalchi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Valerio Mario","family":"Salerno","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9133-3000","authenticated-orcid":false,"given":"Kong Aik","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref72","first-page":"2546","article-title":"Algorithms for hyper-parameter optimization","author":"bergstra","year":"2011","journal-title":"Advances in Neural Information Processing Systems 24"},{"key":"ref71","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"0","journal-title":"Proc 3rd Int Conf Learn Representations"},{"key":"ref70","article-title":"Measuring, refining and calibrating speaker and language information extracted from speech","author":"br\u00fcmmer","year":"2010"},{"key":"ref39","first-page":"408","article-title":"Boosting universal speech attributes classification with deep neural network for foreign accent characterization","author":"hautam\u00e4ki","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.5772\/17600"},{"key":"ref33","article-title":"Recurrent neural network and maximal figure of merit for acoustic event detection","author":"kukanov","year":"2017"},{"key":"ref32","first-page":"489","article-title":"Deep learning with maximal figure-of-merit cost to advance multi-label speech attribute detection","author":"kukanov","year":"2016","journal-title":"Proc IEEE Spoken Lang Technol Workshop"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854454"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/1148020.1148022"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1121\/1.4890284"},{"key":"ref36","first-page":"1829","article-title":"Detection-based ASR in the automatic speech attribute transcription project","author":"bromberg","year":"2007","journal-title":"Proc 8th Annu Conf Int Speech Commun Assoc"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2013.2238591"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461396"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.3390\/app6060162"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.3390\/app6060162"},{"key":"ref61","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","volume":"9","author":"glorot","year":"2010","journal-title":"Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2690575"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/5.726793"},{"key":"ref64","article-title":"Fast and accurate deep network learning by exponential linear units (ELUs)","author":"clevert","year":"0","journal-title":"Proc Intl Conf on Learning Representations"},{"key":"ref27","author":"duda","year":"2000","journal-title":"Pattern Classification"},{"key":"ref65","article-title":"Nist 2017 language recognition evaluation plan","year":"2017"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-624"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1006\/csla.1997.0026"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2005.06.003"},{"key":"ref68","first-page":"1471","article-title":"Within-class covariance normalization for SVM-based speaker recognition","author":"hatch","year":"2006","journal-title":"Proc INTERSPEECH"},{"key":"ref69","doi-asserted-by":"crossref","first-page":"330","DOI":"10.1007\/978-3-540-74200-5_19","article-title":"An introduction to application-independent evaluation of speaker recognition systems","author":"leeuwen","year":"2007","journal-title":"Speaker Classification I"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2012.2237151"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1948.tb00917.x"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2489558"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2006.162"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"978","DOI":"10.1126\/science.247.4945.978","article-title":"Regularization algorithms for learning that are equivalent to multilayer networks","volume":"247","author":"poggio","year":"1990","journal-title":"Sci"},{"key":"ref24","first-page":"437","article-title":"Large-scale multi-label text classification - revisiting neural networks","author":"nam","year":"0","journal-title":"Proc ECML PKDD"},{"key":"ref23","first-page":"895","article-title":"The OGI multi-language telephone speech corpus","author":"muthusamy","year":"0","journal-title":"Proc Int Conf Spoken Lang Process"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/NNSP.1991.239512"},{"key":"ref25","article-title":"A literature survey on algorithms for multi-label learning","author":"sorower","year":"2010"},{"key":"ref50","article-title":"Classification assessment methods","author":"tharwat","year":"2018","journal-title":"Inform Comput and Appl"},{"key":"ref51","article-title":"A study on cost behaviors of binary classification measures in class-imbalanced problems","author":"hu","year":"2014","journal-title":"CoRR"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2013.39"},{"key":"ref58","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"Proceedings of the 32nd Intl Conf on Machine Learning"},{"key":"ref57","author":"hu","year":"2000","journal-title":"Handbook for Neural Network Signal Processing"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-011-3030-1_17"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-018-5736-y"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2550"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref52","first-page":"832","article-title":"Scalable Learning of Non-Decomposable Objectives","volume":"54","author":"eban","year":"2017","journal-title":"Proc Int Conf Artif Intell Statist"},{"key":"ref10","first-page":"299","article-title":"Neural network bottleneck features for language identification","author":"mat?jka","year":"0","journal-title":"Proc Odyssey"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/0885-2308(89)90020-X"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511808852"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472744"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2012.05.001"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1996.540282"},{"key":"ref15","first-page":"891","article-title":"Combining articulatory and acoustic information for speech recognition in noisy and reverberant environments","author":"kirchhoff","year":"1998","journal-title":"Proc Int Conf Spoken Lang Process"},{"key":"ref16","first-page":"16","article-title":"A flexible stream architecture for ASR using articulatory features","author":"metze","year":"2002","journal-title":"Proc Int Conf Spoken Lang Process"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2014.12.008"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1006\/csla.2000.0148"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.05.004"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1996.543236"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2005.06.003"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2064307"},{"key":"ref5","first-page":"2237","article-title":"Phonotactic language identification using high quality phoneme recognition","author":"mat?jka","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref8","first-page":"109","article-title":"Deep language: A comprehensive deep learning approach to end-to-end language recognition","author":"trong","year":"0","journal-title":"Proc Odyssey"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-624"},{"key":"ref49","first-page":"485","article-title":"Towards an organizing principle for a layered perceptual network","author":"linsker","year":"1988","journal-title":"Neural Information Processing Systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0100795"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/2906.001.0001"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/78.175747"},{"key":"ref48","first-page":"1033","article-title":"Beyond Fano's inequality: Bounds on the optimal F-score, BER, and cost-sensitive risk and their implications","volume":"14","author":"zhao","year":"2013","journal-title":"J Mach Learn Res"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.05.008"},{"key":"ref42","author":"katamba","year":"1989","journal-title":"An Introduction to Japanese Phonology"},{"key":"ref41","first-page":"4261","article-title":"Toward a detector-based universal phone recognition","author":"siniscalchi","year":"2008","journal-title":"Proc IEEE Int Conf on Acoust Speech Signal Process"},{"key":"ref44","author":"virtanen","year":"0","journal-title":"Proc Workshop Detection Classif Acoust Scenes Events"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2339736"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/8938144\/08952610.pdf?arnumber=8952610","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T19:55:27Z","timestamp":1643313327000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8952610\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":72,"URL":"https:\/\/doi.org\/10.1109\/taslp.2020.2964953","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]}}}