{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T05:47:11Z","timestamp":1751348831558,"version":"3.37.3"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319995786"},{"type":"electronic","value":"9783319995793"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-99579-3_30","type":"book-chapter","created":{"date-parts":[[2018,8,24]],"date-time":"2018-08-24T07:36:09Z","timestamp":1535096169000},"page":"281-290","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Optimized Active Learning Strategy for Audiovisual Speaker Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5307-6186","authenticated-orcid":false,"given":"Stamatis","family":"Karlos","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Konstantinos","family":"Kaleris","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nikos","family":"Fazakis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vasileios G.","family":"Kanas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sotiris","family":"Kotsiantis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,8,25]]},"reference":[{"key":"30_CR1","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1109\/79.671130","volume":"15","author":"BH Juang","year":"1998","unstructured":"Juang, B.H., Chen, T.: The past, present, and future of speech processing. IEEE Signal Process. Mag. 15, 24\u201348 (1998)","journal-title":"IEEE Signal Process. Mag."},{"key":"30_CR2","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1016\/S0167-9260(02)00045-7","volume":"32","author":"J-C Wang","year":"2002","unstructured":"Wang, J.-C., Wang, J.-F., Weng, Y.-S.: Chip design of MFCC extraction for speech recognition. Integr. VLSI J. 32, 111\u2013131 (2002)","journal-title":"Integr. VLSI J."},{"key":"30_CR3","doi-asserted-by":"publisher","first-page":"1979","DOI":"10.1109\/TASL.2007.902499","volume":"15","author":"L Burget","year":"2007","unstructured":"Burget, L., Matejka, P., Schwarz, P., Glembek, O., Cernock\u00fd, J.: Analysis of feature extraction and channel compensation in a GMM speaker recognition system. IEEE Trans. Audio Speech Lang. Process. 15, 1979\u20131986 (2007)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"30_CR4","first-page":"193","volume":"6","author":"V Srinivas","year":"2013","unstructured":"Srinivas, V., Santhi Rani, C., Madhu, T.: Investigation of decision tree induction, probabilistic technique and SVM for speaker identification. Int. J. Signal Process. Image Process. Pattern Recognit. 6, 193\u2013204 (2013)","journal-title":"Int. J. Signal Process. Image Process. Pattern Recognit."},{"key":"30_CR5","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1109\/MCI.2007.353420","volume":"2","author":"J-C Wang","year":"2007","unstructured":"Wang, J.-C., Yang, C.-H., Wang, J.-F., Lee, H.-P.: Robust speaker identification and verification. IEEE Comput. Intell. Mag. 2, 52\u201359 (2007)","journal-title":"IEEE Comput. Intell. Mag."},{"key":"30_CR6","doi-asserted-by":"crossref","unstructured":"Lei, Y., Scheffer, N., Ferrer, L., McLaren, M.: A novel scheme for speaker recognition using a phonetically-aware deep neural network. In: ICASSP, pp. 1695\u20131699. IEEE (2014)","DOI":"10.21236\/ADA613971"},{"key":"30_CR7","doi-asserted-by":"publisher","first-page":"308","DOI":"10.1109\/LSP.2006.870086","volume":"13","author":"WM Campbell","year":"2006","unstructured":"Campbell, W.M., Sturim, D., Reynolds, D.A.: Support vector machines using GMM supervectors for speaker verification. IEEE Signal Process. Lett. 13, 308\u2013311 (2006)","journal-title":"IEEE Signal Process. Lett."},{"key":"30_CR8","doi-asserted-by":"crossref","unstructured":"Syed, A.R., Rosenberg, A., Kislal, E.: Supervised and unsupervised active learning for automatic speech recognition of low-resource languages. In: ICASSP, pp. 5320\u20135324. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472693"},{"key":"30_CR9","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-031-01560-1","volume-title":"Active Learning","author":"B Settles","year":"2012","unstructured":"Settles, B.: Active Learning. Morgan & Claypool Publishers, San Rafael (2012)"},{"issue":"4","key":"30_CR10","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1109\/MSP.2017.2699358","volume":"34","author":"Z Zhang","year":"2017","unstructured":"Zhang, Z., Cummins, N., Schuller, B.: Advanced data exploitation in speech analysis: an overview. IEEE Signal Process. Mag. 34(4), 107\u2013129 (2017)","journal-title":"IEEE Signal Process. Mag."},{"key":"30_CR11","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1155\/2017\/1945630","volume":"2017","author":"G Wen","year":"2017","unstructured":"Wen, G., Li, H., Huang, J., Li, D., Xun, E.: Random deep belief networks for recognizing emotions from speech signals. Comput. Intell. Neurosci. 2017, 9 (2017)","journal-title":"Comput. Intell. Neurosci."},{"key":"30_CR12","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1007\/978-3-319-66429-3_22","volume-title":"Speech and Computer","author":"I Szekr\u00e9nyes","year":"2017","unstructured":"Szekr\u00e9nyes, I., Kov\u00e1cs, G.: Classification of formal and informal dialogues based on turn-taking and intonation using deep neural networks. In: Karpov, A., Potapova, R., Mporas, I. (eds.) SPECOM 2017. LNCS (LNAI), vol. 10458, pp. 233\u2013243. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-66429-3_22"},{"issue":"2","key":"30_CR13","doi-asserted-by":"publisher","first-page":"398","DOI":"10.1111\/coin.12169","volume":"34","author":"E Granell","year":"2018","unstructured":"Granell, E., Romero, V., Mart\u00ednez-Hinarejos, C.D.: Multimodality, interactivity, and crowdsourcing for document transcription. Comput. Intell. 34(2), 398\u2013419 (2018)","journal-title":"Comput. Intell."},{"key":"30_CR14","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1007\/978-3-319-66429-3_13","volume-title":"Speech and Computer","author":"J Matou\u0161ek","year":"2017","unstructured":"Matou\u0161ek, J., Tihelka, D.: Annotation error detection: anomaly detection vs. classification. In: Karpov, A., Potapova, R., Mporas, I. (eds.) SPECOM 2017. LNCS (LNAI), vol. 10458, pp. 141\u2013151. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-66429-3_13"},{"key":"30_CR15","doi-asserted-by":"crossref","unstructured":"Safavi, S., Gan, H., Mporas, I.: Improving speaker verification performance under spoofing attacks by fusion of different operational modes. In: Proceedings of the 2017 IEEE 13th International Colloquium Signal Processing and its Applications, CSPA 2017, pp. 219\u2013223 (2017)","DOI":"10.1109\/CSPA.2017.8064954"},{"key":"30_CR16","doi-asserted-by":"publisher","first-page":"1650017","DOI":"10.1142\/S0218213016500172","volume":"25","author":"H Ahn","year":"2016","unstructured":"Ahn, H., Kim, H.: Enhanced spoken sentence retrieval using a conventional automatic speech recognizer in smart home. Int. J. Artif. Intell. Tools 25, 1650017 (2016)","journal-title":"Int. J. Artif. Intell. Tools"},{"key":"30_CR17","doi-asserted-by":"crossref","unstructured":"Lux, M.: Content based image retrieval with LIRe. In: Proceeding of the 19th ACM International Conference on Multimedia, pp. 735\u2013738. ACM (2011)","DOI":"10.1145\/2072298.2072432"},{"key":"30_CR18","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"651","DOI":"10.1007\/978-3-319-43958-7_79","volume-title":"Speech and Computer","author":"S Karlos","year":"2016","unstructured":"Karlos, S., Fazakis, N., Karanikola, K., Kotsiantis, S., Sgarbas, K.: Speech recognition combining MFCCs and image features. In: Ronzhin, A., Potapova, R., N\u00e9meth, G. (eds.) SPECOM 2016. LNCS (LNAI), vol. 9811, pp. 651\u2013658. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-43958-7_79"},{"issue":"1","key":"30_CR19","doi-asserted-by":"publisher","first-page":"014008","DOI":"10.1088\/1749-4699\/8\/1\/014008","volume":"8","author":"J Bergstra","year":"2015","unstructured":"Bergstra, J., Komer, B., Eliasmith, C., Yamins, D., Cox, D.D.: Hyperopt: a Python library for model selection and hyperparameter optimization. Comput. Sci. Discov. 8(1), 014008 (2015)","journal-title":"Comput. Sci. Discov."},{"key":"30_CR20","unstructured":"Cummins, F., Grimaldi, M., Leonard, T., Simko, J.: The CHAINS speech corpus: CHAracterizing INdividual Speakers. In: Proceedings of SPECOM, pp. 1\u20136 (2006)"},{"key":"30_CR21","unstructured":"Kamaris, G., Karlos, S., Terpinas, S., Koutsaidis, D., Mourjopoulos, J.: Audio system spatial image evaluation via binaural feature classification. In: Audio Engineering Society Convention 142. Audio Engineering Society (2017)"},{"key":"30_CR22","unstructured":"Yang, Y.-Y., Lee, S.-C., Chung, Y.-A., Wu, T.-E., Chen, S.-A., Lin, H.-T.: libact: Pool-based active learning in Python. CoRR abs\/1710.00379 (2017)"},{"key":"30_CR23","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., et al.: Machine learning in Python. J. Mach. Learn. Res. 12, 2825\u20132830 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"30_CR24","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1016\/j.specom.2018.02.009","volume":"99","author":"V Vestman","year":"2018","unstructured":"Vestman, V., Gowda, D., Sahidullah, M., Alku, P., Kinnunen, T.: Speaker recognition from whispered speech: a tutorial survey and an application of time-varying linear prediction. Speech Commun. 99, 62\u201379 (2018)","journal-title":"Speech Commun."},{"key":"30_CR25","doi-asserted-by":"publisher","first-page":"2188","DOI":"10.1109\/TASLP.2017.2747097","volume":"25","author":"C Yu","year":"2017","unstructured":"Yu, C., Hansen, J.H.L.: Active learning based constrained clustering for speaker diarization. IEEE\/ACM Trans. Audio Speech Lang. Process. 25, 2188\u20132198 (2017)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"30_CR26","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1155\/2017\/1320780","volume":"2017","author":"D Xie","year":"2017","unstructured":"Xie, D., Zhang, L., Bai, L.: Deep learning in visual computing and signal processing. Appl. Comput. Intell. Soft Comput. 2017, 13 (2017)","journal-title":"Appl. Comput. Intell. Soft Comput."},{"key":"30_CR27","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1155\/2012\/650818","volume":"2012","author":"X Guo","year":"2012","unstructured":"Guo, X., Toyoda, Y., Li, H., Huang, J., Ding, S., Liu, Y.: Environmental sound recognition using time-frequency intersection patterns. Appl. Comput. Intell. Soft Comput. 2012, 1\u20136 (2012)","journal-title":"Appl. Comput. Intell. Soft Comput."},{"key":"30_CR28","unstructured":"Bernard, J., Zeppelzauer, M., Sedlmair, M., Aigner, W.: A unified process for visual-interactive labeling. In: Proceedings of EuroVis Workshop on Visual Analytics (EuroVA), pp. 10\u201314 (2017)"},{"key":"30_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1142\/s0218213015500104","volume":"24","author":"Y Ben Ayed","year":"2015","unstructured":"Ben Ayed, Y.: A new SVM kernel for keyword spotting using confidence measures. Int. J. Artif. Intell. Tools 24, 1\u201322 (2015)","journal-title":"Int. J. Artif. Intell. Tools"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-99579-3_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,30]],"date-time":"2022-08-30T04:57:34Z","timestamp":1661835454000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-99579-3_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319995786","9783319995793"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-99579-3_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}