{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T21:20:07Z","timestamp":1743110407405,"version":"3.40.3"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031779602"},{"type":"electronic","value":"9783031779619"}],"license":[{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-77961-9_7","type":"book-chapter","created":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T13:53:48Z","timestamp":1732197228000},"page":"92-103","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["AutoMode-ASR: Learning to\u00a0Select ASR Systems for\u00a0Better Quality and\u00a0Cost"],"prefix":"10.1007","author":[{"given":"Ahmet","family":"G\u00fcnd\u00fcz","sequence":"first","affiliation":[]},{"given":"Yunsu","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Kamer","family":"Ali Yuksel","sequence":"additional","affiliation":[]},{"given":"Mohamed","family":"Al-Badrashiny","sequence":"additional","affiliation":[]},{"given":"Thiago","family":"Castro Ferreira","sequence":"additional","affiliation":[]},{"given":"Hassan","family":"Sawaf","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,22]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Abdi, L., Hashemi, S.: To combat multi-class imbalanced problems by means of over-sampling techniques. IEEE Trans. Knowl. Data Eng. 28(1) (2015)","DOI":"10.1109\/TKDE.2015.2458858"},{"key":"7_CR2","unstructured":"Ardila, R., et al.: Common voice: a massively-multilingual speech corpus. In: Proceedings of the Twelfth Language Resources and Evaluation Conference, pp. 4218\u20134222 (2020)"},{"key":"7_CR3","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations. Adv. Neural Inform. Process. Syst. 33, 12449\u201312460 (2020)"},{"issue":"10\u201311","key":"7_CR4","doi-asserted-by":"publisher","first-page":"763","DOI":"10.1016\/j.specom.2007.02.006","volume":"49","author":"M Benzeghiba","year":"2007","unstructured":"Benzeghiba, M., et al.: Automatic speech recognition and speech variability: a review. Speech Commun. 49(10\u201311), 763\u2013786 (2007)","journal-title":"Speech Commun."},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"Chan, W., Jaitly, N., Le, Q., Vinyals, O.: Listen, attend and spell: A neural network for large vocabulary conversational speech recognition. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"7_CR6","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla, N.V., Bowyer, K.W., Hall, L.O., Kegelmeyer, W.P.: Smote: synthetic minority over-sampling technique. J. Artifi. Intell. Res. 16, 321\u2013357 (2002)","journal-title":"J. Artifi. Intell. Res."},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Chen, T., Guestrin, C.: Xgboost: a scalable tree boosting system. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (2016)","DOI":"10.1145\/2939672.2939785"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Conneau, A., Baevski, A., Collobert, R., Mohamed, A., Auli, M.: Unsupervised cross-lingual representation learning for speech recognition. arXiv preprint arXiv:2006.13979 (2020)","DOI":"10.21437\/Interspeech.2021-329"},{"key":"7_CR9","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2019.103456","volume":"114","author":"M Ebrahimi","year":"2019","unstructured":"Ebrahimi, M., Mohammadi-Dehcheshmeh, M., Ebrahimie, E., Petrovski, K.R.: Comprehensive analysis of machine learning models for prediction of sub-clinical mastitis: deep learning and gradient-boosted trees outperform other models. Comput. Biol. Med. 114, 103456 (2019)","journal-title":"Comput. Biol. Med."},{"key":"7_CR10","unstructured":"Fiscus, J.G.: A post-processing system to yield reduced word error rates: recognizer output voting error reduction (rover). In: 1997 IEEE Workshop on Automatic Speech Recognition and Understanding Proceedings, pp. 347\u2013354. IEEE (1997)"},{"key":"7_CR11","doi-asserted-by":"crossref","unstructured":"Friedman, J.H.: Greedy function approximation: a gradient boosting machine. Annals Stat. 1189\u20131232 (2001)","DOI":"10.1214\/aos\/1013203451"},{"key":"7_CR12","doi-asserted-by":"publisher","unstructured":"Gitman, I., Lavrukhin, V., Laptev, A., Ginsburg, B.: Confidence-based Ensembles of End-to-End Speech Recognition Models. In: Proc. INTERSPEECH 2023 (2023). https:\/\/doi.org\/10.21437\/Interspeech2023-1281","DOI":"10.21437\/Interspeech2023-1281"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Graves, A., Mohamed, A.r., Hinton, G.: Speech recognition with deep recurrent neural networks. In: IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"7_CR14","doi-asserted-by":"crossref","unstructured":"Gulati, A., et\u00a0al.: Conformer: convolution-augmented transformer for speech recognition. Interspeech (2020)","DOI":"10.21437\/Interspeech.2020-3015"},{"issue":"16\u201317","key":"7_CR15","doi-asserted-by":"publisher","first-page":"1897","DOI":"10.1016\/j.artint.2008.08.002","volume":"172","author":"E H\u00fcllermeier","year":"2008","unstructured":"H\u00fcllermeier, E., F\u00fcrnkranz, J., Cheng, W., Brinker, K.: Label ranking by learning pairwise preferences. Artif. Intell. 172(16\u201317), 1897\u20131916 (2008)","journal-title":"Artif. Intell."},{"key":"7_CR16","doi-asserted-by":"publisher","unstructured":"Javadi, G., Yuksel, K.A., Kim, Y., Ferreira, T.C., Al-Badrashiny, M.: Word-level asr quality estimation for efficient corpus sampling and post-editing through analyzing attentions of a reference-free metric. In: IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seoul, Korea, 14-19 April. IEEE (2024). https:\/\/doi.org\/10.48550\/arXiv.2401.11268","DOI":"10.48550\/arXiv.2401.11268"},{"key":"7_CR17","unstructured":"Ke, G., et al.: Lightgbm: a highly efficient gradient boosting decision tree. Adv. Neural Inform. Process. Syst. 30 (2017)"},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Liu, T.Y., et\u00a0al.: Learning to rank for information retrieval. Foundat. Trends\u00ae Inform. Retrieval 3(3) (2009)","DOI":"10.1561\/1500000016"},{"issue":"4","key":"7_CR19","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1006\/csla.2000.0152","volume":"14","author":"L Mangu","year":"2000","unstructured":"Mangu, L., Brill, E., Stolcke, A.: Finding consensus in speech recognition: word error minimization and other applications of confusion networks. Comput. Speech Lang. 14(4), 373\u2013400 (2000)","journal-title":"Comput. Speech Lang."},{"key":"7_CR20","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/j.patcog.2006.04.041","volume":"40","author":"G Ou","year":"2007","unstructured":"Ou, G., Murphey, Y.L.: Multi-class pattern classification using neural networks. Pattern Recogn. 40, 4\u201318 (2007)","journal-title":"Pattern Recogn."},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: Librispeech: an asr corpus based on public domain audio books. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5206\u20135210. IEEE (2015)","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"7_CR22","unstructured":"Pratap, V., et\u00a0al.: Scaling speech technology to 1,000+ languages. arXiv preprint arXiv:2305.13516 (2023)"},{"key":"7_CR23","unstructured":"Prokhorenkova, L., Gusev, G., Vorobev, A., Dorogush, A.V., Gulin, A.: Catboost: unbiased boosting with categorical features. Adv. Neural Inform. Process. Syst. 31 (2018)"},{"key":"7_CR24","unstructured":"Radford, A., Kim, J.W., Xu, T., Brockman, G., McLeavey, C., Sutskever, I.: Robust speech recognition via large-scale weak supervision. In: International Conference on Machine Learning, pp. 28492\u201328518. PMLR (2023)"},{"key":"7_CR25","unstructured":"Schmitt, M.: Deep learning vs. gradient boosting: Benchmarking state-of-the-art machine learning algorithms for credit scoring. arXiv preprint arXiv:2205.10535 (2022)"},{"key":"7_CR26","doi-asserted-by":"crossref","unstructured":"Schwenk, H.: Using boosting to improve a hybrid hmm\/neural network speech recognizer. In: 1999 IEEE International Conference on Acoustics, Speech, and Signal Processing. Proceedings, ICASSP, vol.\u00a02, pp. 1009\u20131012. IEEE (1999)","DOI":"10.1109\/ICASSP.1999.759874"},{"issue":"199","key":"7_CR27","first-page":"1","volume":"18","author":"NB Shah","year":"2018","unstructured":"Shah, N.B., Wainwright, M.J.: Simple, robust and optimal ranking from pairwise comparisons. J. Mach. Learn. Res. 18(199), 1\u201338 (2018)","journal-title":"J. Mach. Learn. Res."},{"key":"7_CR28","doi-asserted-by":"crossref","unstructured":"Siohan, O., Ramabhadran, B., Kingsbury, B.: Constructing ensembles of asr systems using randomized decision trees. In: Proc. IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2005, vol.\u00a01, pp. I\u2013197. IEEE (2005)","DOI":"10.1109\/ICASSP.2005.1415084"},{"key":"7_CR29","doi-asserted-by":"crossref","unstructured":"Tanha, J., Abdi, Y., Samadi, N., Razzaghi, N., Asadpour, M.: Boosting methods for multi-class imbalanced data classification: an experimental review. J. Big Data 7 (2020)","DOI":"10.1186\/s40537-020-00349-y"},{"key":"7_CR30","unstructured":"Wang, C., Wu, Q., Weimer, M., Zhu, E.: Flaml: A fast and lightweight automl library (2021)"},{"key":"7_CR31","doi-asserted-by":"crossref","unstructured":"Wang, S., Yao, X.: Multiclass imbalance problems: analysis and potential solutions. IEEE Trans. Syst. Man Cybernet. Part B (Cybernet.) 42(4) (2012)","DOI":"10.1109\/TSMCB.2012.2187280"},{"key":"7_CR32","unstructured":"Wauthier, F., Jordan, M., Jojic, N.: Efficient ranking from pairwise comparisons. In: International Conference on Machine Learning, pp. 109\u2013117. PMLR (2013)"},{"key":"7_CR33","doi-asserted-by":"crossref","unstructured":"Wu, Q., Wang, C., Huang, S.: Frugal optimization for cost-related hyperparameters. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 10347\u201310354 (2021)","DOI":"10.1609\/aaai.v35i12.17239"},{"key":"7_CR34","doi-asserted-by":"crossref","unstructured":"Yijing, L., Haixiang, G., Xiao, L., Yanan, L., Jinling, L.: Adapted ensemble classification algorithm based on multiple classifier system and feature selection for classifying multi-class imbalanced data. Knowl.-Based Syst. 94 (2016)","DOI":"10.1016\/j.knosys.2015.11.013"},{"key":"7_CR35","doi-asserted-by":"publisher","unstructured":"Yuksel, K.A., Ferreira, T.C., Gunduz, A., Al-Badrashiny, M., Javadi, G.: A reference-less quality metric for automatic speech recognition via contrastive-learning of a multi-language model with self-supervision. In: IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP, Rhodes Island, Greece, 4-10 June 2023, pp.\u00a01\u20135. IEEE (2023). https:\/\/doi.org\/10.1109\/ICASSPW59220.2023.10193003","DOI":"10.1109\/ICASSPW59220.2023.10193003"},{"key":"7_CR36","doi-asserted-by":"publisher","unstructured":"Yuksel, K.A., Ferreira, T.C., Javadi, G., Al-Badrashiny, M., Gunduz, A.: Norefer: a referenceless quality metric for automatic speech recognition via semi-supervised language model fine-tuning with contrastive learning. In: Proc. INTERSPEECH 2023 pp. 466\u2013470 (2023). https:\/\/doi.org\/10.21437\/Interspeech.2023-643","DOI":"10.21437\/Interspeech.2023-643"},{"key":"7_CR37","unstructured":"Zhang, Y., et\u00a0al.: Google usm: Scaling automatic speech recognition beyond 100 languages. arXiv preprint arXiv:2303.01037 (2023)"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-77961-9_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,9]],"date-time":"2025-01-09T16:04:16Z","timestamp":1736438656000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-77961-9_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,22]]},"ISBN":["9783031779602","9783031779619"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-77961-9_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,22]]},"assertion":[{"value":"22 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Belgrade","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Serbia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/specom2024.ftn.uns.ac.rs\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}