{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T15:25:15Z","timestamp":1773156315852,"version":"3.50.1"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783031209796","type":"print"},{"value":"9783031209802","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-20980-2_34","type":"book-chapter","created":{"date-parts":[[2022,11,12]],"date-time":"2022-11-12T19:03:09Z","timestamp":1668279789000},"page":"391-403","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Multi-level Fusion of\u00a0Fisher Vector Encoded BERT and\u00a0Wav2vec 2.0 Embeddings for\u00a0Native Language Identification"],"prefix":"10.1007","author":[{"given":"Dani","family":"Krebbers","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7947-5508","authenticated-orcid":false,"given":"Heysem","family":"Kaya","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3424-652X","authenticated-orcid":false,"given":"Alexey","family":"Karpov","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,11,10]]},"reference":[{"key":"34_CR1","doi-asserted-by":"publisher","unstructured":"Abad, A., Ribeiro, E., Kepler, F., Astudillo, R., Trancoso, I.: Exploiting phone log-likelihood ratio features for the detection of the native language of non-native English speakers. In: Proceedings of Interspeech 2016, pp. 2413\u20132417 (2016). https:\/\/doi.org\/10.21437\/Interspeech.2016-1491","DOI":"10.21437\/Interspeech.2016-1491"},{"key":"34_CR2","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations. Adv. Neural Inf. Process. Syst. 33, 12449\u201312460 (2020)"},{"key":"34_CR3","doi-asserted-by":"publisher","unstructured":"Chowdhury, S.A., Ali, A., Shon, S., Glass, J.: What does an end-to-end dialect identification model learn about non-dialectal information? In: Proceedings of Interspeech 2020, pp. 462\u2013466 (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-2235","DOI":"10.21437\/Interspeech.2020-2235"},{"key":"34_CR4","doi-asserted-by":"crossref","unstructured":"Conneau, A., Baevski, A., Collobert, R., Mohamed, A., Auli, M.: Unsupervised cross-lingual representation learning for speech recognition. arXiv preprint arXiv:2006.13979 (2020)","DOI":"10.21437\/Interspeech.2021-329"},{"key":"34_CR5","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2019)"},{"key":"34_CR6","doi-asserted-by":"crossref","unstructured":"Eyben, F., Weninger, F., Gro\u00df, F., Schuller, B.: Recent developments in opensmile, the Munich open-source multimedia feature extractor. In: Proceedings of the 21st ACM International Conference on Multimedia, pp. 835\u2013838. ACM (2013)","DOI":"10.1145\/2502081.2502224"},{"key":"34_CR7","doi-asserted-by":"publisher","unstructured":"Fan, Z., Li, M., Zhou, S., Xu, B.: Exploring wav2vec 2.0 on speaker verification and language identification. In: Proceedings of Interspeech 2021, pp. 1509\u20131513 (2021). https:\/\/doi.org\/10.21437\/Interspeech.2021-1280","DOI":"10.21437\/Interspeech.2021-1280"},{"key":"34_CR8","doi-asserted-by":"publisher","unstructured":"Gosztolya, G., Gr\u00f3sz, T., Busa-Fekete, R., T\u00f3th, L.: Determining native language and deception using phonetic features and classifier combination. In: Proceedings of Interspeech 2016, pp. 2418\u20132422 (2016). https:\/\/doi.org\/10.21437\/Interspeech.2016-962","DOI":"10.21437\/Interspeech.2016-962"},{"key":"34_CR9","doi-asserted-by":"crossref","unstructured":"Hao, Y., Dong, L., Wei, F., Xu, K.: Visualizing and understanding the effectiveness of bert. arXiv preprint arXiv:1908.05620 (2019)","DOI":"10.18653\/v1\/D19-1424"},{"key":"34_CR10","doi-asserted-by":"crossref","unstructured":"Hermansky, H.: Perceptual linear predictive (PLP) analysis of speech. the J. Acoust. Soc. Am. 87(4), 1738\u20131752 (1990)","DOI":"10.1121\/1.399423"},{"issue":"4","key":"34_CR11","doi-asserted-by":"publisher","first-page":"578","DOI":"10.1109\/89.326616","volume":"2","author":"H Hermansky","year":"1994","unstructured":"Hermansky, H., Morgan, N.: Rasta processing of speech. IEEE Trans. Speech Audio Process. 2(4), 578\u2013589 (1994)","journal-title":"IEEE Trans. Speech Audio Process."},{"issue":"2","key":"34_CR12","doi-asserted-by":"publisher","first-page":"513","DOI":"10.1109\/TSMCB.2011.2168604","volume":"42","author":"GB Huang","year":"2012","unstructured":"Huang, G.B., Zhou, H., Ding, X., Zhang, R.: Extreme learning machine for regression and multiclass classification. IEEE Trans. Syst. Man Cybern. Part B Cybern. 42(2), 513\u2013529 (2012)","journal-title":"IEEE Trans. Syst. Man Cybern. Part B Cybern."},{"issue":"1","key":"34_CR13","doi-asserted-by":"publisher","first-page":"489","DOI":"10.1016\/j.neucom.2005.12.126","volume":"70","author":"GB Huang","year":"2006","unstructured":"Huang, G.B., Zhu, Q.Y., Siew, C.K.: Extreme learning machine: theory and applications. Neurocomputing 70(1), 489\u2013501 (2006)","journal-title":"Neurocomputing"},{"key":"34_CR14","doi-asserted-by":"crossref","unstructured":"Kaya, H., Gurpinar, F., Ali Salah, A.: Multi-modal score fusion and decision trees for explainable automatic job candidate screening from video CVS. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, pp. 1\u20139, July 2017","DOI":"10.1109\/CVPRW.2017.210"},{"key":"34_CR15","doi-asserted-by":"publisher","unstructured":"Kaya, H., Karpov, A.A.: Fusing acoustic feature representations for computational paralinguistics tasks. In: Proceedings of Interspeech 2016, pp. 2046\u20132050 (2016). https:\/\/doi.org\/10.21437\/Interspeech.2016-995","DOI":"10.21437\/Interspeech.2016-995"},{"key":"34_CR16","doi-asserted-by":"publisher","unstructured":"Kaya, H., Karpov, A.A.: Introducing weighted kernel classifiers for handling imbalanced paralinguistic corpora: snoring, addressee and cold. In: Proceedings of Interspeech 2017, pp. 3527\u20133531 (2017). https:\/\/doi.org\/10.21437\/Interspeech.2017-653","DOI":"10.21437\/Interspeech.2017-653"},{"key":"34_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1007\/978-3-319-40663-3_14","volume-title":"Advances in Neural Networks","author":"H Kaya","year":"2016","unstructured":"Kaya, H., Karpov, A.A., Salah, A.A.: Robust acoustic emotion recognition based on cascaded normalization and extreme learning machines. In: Cheng, L., Liu, Q., Ronzhin, A. (eds.) ISNN 2016. LNCS, vol. 9719, pp. 115\u2013123. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-40663-3_14"},{"issue":"6","key":"34_CR18","doi-asserted-by":"publisher","first-page":"671","DOI":"10.1109\/LSP.2014.2365393","volume":"22","author":"H Kaya","year":"2015","unstructured":"Kaya, H., \u00d6zkaptan, T., Salah, A.A., G\u00fcrgen, F.: Random discriminative projection based feature selection with application to conflict recognition. IEEE Sig. Process. Lett. 22(6), 671\u2013675 (2015). https:\/\/doi.org\/10.1109\/LSP.2014.2365393","journal-title":"IEEE Sig. Process. Lett."},{"key":"34_CR19","doi-asserted-by":"crossref","unstructured":"Malmasi, S., Dras, M.: Language transfer hypotheses with linear SVM weights. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1385\u20131390 (2014)","DOI":"10.3115\/v1\/D14-1144"},{"key":"34_CR20","doi-asserted-by":"crossref","unstructured":"Malmasi, S., et al.: A report on the 2017 native language identification shared task. In: Proceedings of the 12th Workshop on Innovative Use of NLP for Building Educational Applications, pp. 62\u201375 (2017)","DOI":"10.18653\/v1\/W17-5007"},{"key":"34_CR21","doi-asserted-by":"crossref","unstructured":"Pepino, L., Riera, P., Ferrer, L.: Emotion recognition from speech using wav2vec 2.0 embeddings. arXiv preprint arXiv:2104.03502 (2021)","DOI":"10.21437\/Interspeech.2021-703"},{"key":"34_CR22","doi-asserted-by":"crossref","unstructured":"Perkins, R.: Native language identification (NLID) for forensic authorship analysis of weblogs. In: New threats and Countermeasures in Digital Crime and Cyber Terrorism, pp. 213\u2013234. IGI Global (2015)","DOI":"10.4018\/978-1-4666-8345-7.ch012"},{"key":"34_CR23","doi-asserted-by":"crossref","unstructured":"Perronnin, F., Dance, C.: Fisher kernels on visual vocabularies for image categorization. In: 2007 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20138. IEEE (2007)","DOI":"10.1109\/CVPR.2007.383266"},{"key":"34_CR24","doi-asserted-by":"crossref","unstructured":"Plummer, B.A., Kordas, P., Kiapour, M.H., Zheng, S., Piramuthu, R., Lazebnik, S.: Conditional image-text embedding networks. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 249\u2013264 (2018)","DOI":"10.1007\/978-3-030-01258-8_16"},{"key":"34_CR25","doi-asserted-by":"publisher","unstructured":"Qian, Y., et al.: Improving sub-phone modeling for better native language identification with non-native English speech. In: Proceedings of Interspeech 2017, pp. 2586\u20132590 (2017). https:\/\/doi.org\/10.21437\/Interspeech.2017-245","DOI":"10.21437\/Interspeech.2017-245"},{"key":"34_CR26","doi-asserted-by":"publisher","unstructured":"Ramesh, G., Kumar, C.S., Murty, K.S.R.: Self-supervised phonotactic representations for language identification. In: Proceedings of Interspeech 2021, pp. 1514\u20131518 (2021). https:\/\/doi.org\/10.21437\/Interspeech.2021-1310","DOI":"10.21437\/Interspeech.2021-1310"},{"key":"34_CR27","unstructured":"Rozovskaya, A., Roth, D.: Algorithm selection and model adaptation for ESL correction tasks. In: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, pp. 924\u2013933 (2011)"},{"key":"34_CR28","doi-asserted-by":"publisher","unstructured":"Schuller, B., et al.: The interspeech 2015 computational paralinguistics challenge: Nativeness, parkinson\u2019s & eating condition. In: Proceedings of Interspeech 2015, pp. 478\u2013482 (2015). https:\/\/doi.org\/10.21437\/Interspeech.2015-179","DOI":"10.21437\/Interspeech.2015-179"},{"key":"34_CR29","doi-asserted-by":"publisher","unstructured":"Schuller, B., et al.: The interspeech 2016 computational paralinguistics challenge: deception, sincerity & native language. In: Proceedings of Interspeech 2016, pp. 2001\u20132005 (2016). https:\/\/doi.org\/10.21437\/Interspeech.2016-129","DOI":"10.21437\/Interspeech.2016-129"},{"key":"34_CR30","doi-asserted-by":"publisher","unstructured":"Schuller, B., et al.: The interspeech 2013 computational paralinguistics challenge: Social signals, conflict, emotion, autism. In: Proceedings of Interspeech 2013, pp. 148\u2013152 (2013). https:\/\/doi.org\/10.21437\/Interspeech.2013-56","DOI":"10.21437\/Interspeech.2013-56"},{"key":"34_CR31","doi-asserted-by":"publisher","unstructured":"Shivakumar, P.G., Chakravarthula, S.N., Georgiou, P.: Multimodal fusion of multirate acoustic, prosodic, and lexical speaker characteristics for native language identification. In: Proceedings of Interspeech 2016, pp. 2408\u20132412 (2016). https:\/\/doi.org\/10.21437\/Interspeech.2016-1312","DOI":"10.21437\/Interspeech.2016-1312"},{"key":"34_CR32","doi-asserted-by":"publisher","unstructured":"So\u011fanc\u0131o\u011flu, G., et al.: Is everything fine, grandma? acoustic and linguistic modeling for robust elderly speech emotion recognition. In: Proceedings of Interspeech 2020, pp. 2097\u20132101 (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-3160","DOI":"10.21437\/Interspeech.2020-3160"},{"key":"34_CR33","first-page":"581","volume-title":"Encyclopedia of Statistical Sciences","author":"H Wold","year":"1985","unstructured":"Wold, H.: Partial least squares. In: Kotz, S., Johnson, N.L. (eds.) Encyclopedia of Statistical Sciences, pp. 581\u2013591. Wiley, New York (1985)"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-20980-2_34","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,12]],"date-time":"2022-11-12T19:07:35Z","timestamp":1668280055000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-20980-2_34"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031209796","9783031209802"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-20980-2_34","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"10 November 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Gurugram","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 November 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 November 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.specom.co.in","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"99","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"60","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"61% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}