{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:04:38Z","timestamp":1759104278848,"version":"3.44.0"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032061171","type":"print"},{"value":"9783032061188","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:00:00Z","timestamp":1759104000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:00:00Z","timestamp":1759104000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-06118-8_3","type":"book-chapter","created":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T11:22:54Z","timestamp":1759058574000},"page":"36-54","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Talk is Cheap, Energy is Not: Towards a\u00a0Green, Context-Aware Metrics Framework for\u00a0Automatic Speech Recognition"],"prefix":"10.1007","author":[{"given":"Maria","family":"Ulan","sequence":"first","affiliation":[]},{"given":"Erik Johannes","family":"Husom","sequence":"additional","affiliation":[]},{"given":"Jeriek Van den","family":"Abeele","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,29]]},"reference":[{"key":"3_CR1","unstructured":"Anthony, L.F.W., et\u00a0al.: Carbontracker: tracking and predicting the carbon footprint of training deep learning models. arXiv preprint arXiv:2007.03051 (2020)"},{"key":"3_CR2","doi-asserted-by":"publisher","unstructured":"Ao, J., et\u00a0al.: SpeechT5: unified-modal encoder-decoder pre-training for spoken language processing. ACL Anthol. 1, 5723\u20135738 (2022). https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.393","DOI":"10.18653\/v1\/2022.acl-long.393"},{"key":"3_CR3","unstructured":"Ardila, R., et\u00a0al.: Common voice: a massively-multilingual speech corpus. ACL Anthol., 4218\u20134222 (2020). https:\/\/aclanthology.org\/2020.lrec-1.520"},{"key":"3_CR4","doi-asserted-by":"publisher","first-page":"80194","DOI":"10.1109\/ACCESS.2024.3409745","volume":"12","author":"MF Argerich","year":"2024","unstructured":"Argerich, M.F., et al.: Measuring and improving the energy efficiency of large language models inference. IEEE Access 12, 80194\u201380207 (2024). https:\/\/doi.org\/10.1109\/ACCESS.2024.3409745","journal-title":"IEEE Access"},{"key":"3_CR5","unstructured":"Baevski, A., et\u00a0al.: wav2vec 2.0: a framework for self-supervised learning of speech representations. NeurIPS 33, 12449\u201312460 (2020). https:\/\/proceedings.neurips.cc\/paper\/2020\/hash\/92d1e1eb1cd6f9fba3227870bb6d7f07-Abstract.html"},{"key":"3_CR6","doi-asserted-by":"publisher","unstructured":"Barker, J., et\u00a0al.: The fifth \u2018CHiME\u2019 speech separation and recognition challenge: dataset, task and baselines. In: Interspeech 2018, pp. 1561\u20131565 (2018). https:\/\/doi.org\/10.21437\/Interspeech.2018-1768","DOI":"10.21437\/Interspeech.2018-1768"},{"key":"3_CR7","unstructured":"Chakravarty, A.: Deep learning models in speech recognition: measuring GPU energy consumption, impact of noise and model quantization for edge deployment. arXiv preprint arXiv:2405.01004 (2024)"},{"issue":"6","key":"3_CR8","doi-asserted-by":"publisher","first-page":"1505","DOI":"10.1109\/JSTSP.2022.3188113","volume":"16","author":"S Chen","year":"2022","unstructured":"Chen, S., et al.: WavLM: large-scale self-supervised pre-training for full stack speech processing. IEEE J. Sel. Top. Sig. Process. 16(6), 1505\u20131518 (2022). https:\/\/doi.org\/10.1109\/JSTSP.2022.3188113","journal-title":"IEEE J. Sel. Top. Sig. Process."},{"key":"3_CR9","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1038\/d41586-025-00616-z","volume":"639","author":"S Chen","year":"2025","unstructured":"Chen, S.: How much energy will AI really consume? The good, the bad and the unknown. Nature 639, 22\u201324 (2025). https:\/\/doi.org\/10.1038\/d41586-025-00616-z","journal-title":"Nature"},{"key":"3_CR10","doi-asserted-by":"publisher","unstructured":"Courty, B., et\u00a0al.: mlco2\/codecarbon: v2.4.1 (2024). https:\/\/doi.org\/10.5281\/zenodo.11171501","DOI":"10.5281\/zenodo.11171501"},{"key":"3_CR11","unstructured":"Gandhi, S., et\u00a0al.: Distil-whisper: robust knowledge distillation via large-scale pseudo labelling. arXiv preprint arXiv:2311.00430 (2023)"},{"issue":"1","key":"3_CR12","doi-asserted-by":"publisher","first-page":"10039","DOI":"10.5555\/3455716.3455964","volume":"21","author":"P Henderson","year":"2020","unstructured":"Henderson, P., et al.: Towards the systematic reporting of the energy and carbon footprints of machine learning. J. Mach. Learn. Res. 21(1), 10039\u201310081 (2020). https:\/\/doi.org\/10.5555\/3455716.3455964","journal-title":"J. Mach. Learn. Res."},{"key":"3_CR13","doi-asserted-by":"publisher","unstructured":"Hernandez, F., et\u00a0al.: TED-LIUM 3: twice as much data and corpus repartition for experiments on speaker adaptation. In: Speech and Computer, pp. 198\u2013208. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-99579-3_21","DOI":"10.1007\/978-3-319-99579-3_21"},{"key":"3_CR14","doi-asserted-by":"publisher","first-page":"3451","DOI":"10.1109\/TASLP.2021.3122291","volume":"29","author":"WN Hsu","year":"2021","unstructured":"Hsu, W.N., et al.: HuBERT: self-supervised speech representation learning by masked prediction of hidden units. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 3451\u20133460 (2021). https:\/\/doi.org\/10.1109\/TASLP.2021.3122291","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"3_CR15","unstructured":"Husom, E.J., et\u00a0al.: The price of prompting: profiling energy use in large language models inference. arXiv preprint arXiv:2407.16893 (2024)"},{"key":"3_CR16","doi-asserted-by":"publisher","unstructured":"Jay, M., et\u00a0al.: An experimental comparison of software-based power meters: focus on CPU and GPU. In: Proceedings of the IEEE\/ACM CCGrid 2023, pp. 106\u2013118. IEEE (2023). https:\/\/doi.org\/10.1109\/CCGrid57682.2023.00020","DOI":"10.1109\/CCGrid57682.2023.00020"},{"key":"3_CR17","unstructured":"Jitsi: JiWER (2025). https:\/\/github.com\/jitsi\/jiwer"},{"key":"3_CR18","unstructured":"Lacoste, A., et\u00a0al.: Quantifying the carbon emissions of machine learning. arXiv preprint arXiv:1910.09700 (2019)"},{"issue":"12","key":"3_CR19","doi-asserted-by":"publisher","first-page":"2100707","DOI":"10.1002\/advs.202100707","volume":"8","author":"L Lannelongue","year":"2021","unstructured":"Lannelongue, L., et al.: Green algorithms: quantifying the carbon footprint of computation. Adv. Sci. 8(12), 2100707 (2021)","journal-title":"Adv. Sci."},{"key":"3_CR20","unstructured":"Luccioni, A.S., et\u00a0al.: Counting carbon: a survey of factors influencing the emissions of machine learning. arXiv preprint arXiv:2302.08476 (2023)"},{"key":"3_CR21","doi-asserted-by":"publisher","unstructured":"Luccioni, S., et\u00a0al.: Power hungry processing: watts driving the cost of AI deployment? In: FAccT \u201924 Proceedings, pp. 85\u201399. ACM (2024). https:\/\/doi.org\/10.1145\/3630106.3658542","DOI":"10.1145\/3630106.3658542"},{"key":"3_CR22","unstructured":"Luccioni, S., et\u00a0al.: AI energy score leaderboard - February 2025 (2025). https:\/\/huggingface.co\/spaces\/AIEnergyScore\/Leaderboard"},{"key":"3_CR23","unstructured":"Microsoft Corporation: Measuring the real-time factor on your device. https:\/\/learn.microsoft.com\/en-us\/azure\/ai-services\/speech-service\/embedded-speech-performance-evaluations"},{"key":"3_CR24","doi-asserted-by":"publisher","unstructured":"Morris, A.C., et\u00a0al.: From WER and RIL to MER and WIL: improved evaluation measures for connected speech recognition. In: Interspeech 2004, pp. 2765\u20132768 (2004). https:\/\/doi.org\/10.21437\/Interspeech.2004-668","DOI":"10.21437\/Interspeech.2004-668"},{"key":"3_CR25","unstructured":"Naidu, R., et\u00a0al.: Towards quantifying the carbon emissions of differentially private machine learning. arXiv preprint arXiv:2107.06946 (2021)"},{"key":"3_CR26","unstructured":"NVIDIA Corporation: NVIDIA DRIVE OS 5.2 Linux SDK developer guide: tegrastats utility (2023). https:\/\/docs.nvidia.com\/drive\/drive-os-5.2.0.0L\/drive-os\/index.html#page\/DRIVE_OS_Linux_SDK_Development_Guide\/Utilities\/util_tegrastats.html"},{"key":"3_CR27","doi-asserted-by":"publisher","unstructured":"Panayotov, V., et\u00a0al.: Librispeech: an ASR corpus based on public domain audio books. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 19\u201324. IEEE (2015). https:\/\/doi.org\/10.1109\/ICASSP.2015.7178964","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"3_CR28","doi-asserted-by":"publisher","unstructured":"Parcollet, T., et\u00a0al.: The energy and carbon footprint of training end-to-end speech recognizers. In: Interspeech 2021, pp. 4583\u20134587 (2021). https:\/\/doi.org\/10.21437\/Interspeech.2021-456","DOI":"10.21437\/Interspeech.2021-456"},{"key":"3_CR29","unstructured":"Patterson, D., et\u00a0al.: Carbon emissions and large neural network training. arXiv preprint arXiv:2104.10350 (2021)"},{"issue":"7","key":"3_CR30","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/MC.2022.3148714","volume":"55","author":"D Patterson","year":"2022","unstructured":"Patterson, D., et al.: The carbon footprint of machine learning training will plateau. Then Shrink. Comput. 55(7), 18\u201328 (2022). https:\/\/doi.org\/10.1109\/MC.2022.3148714","journal-title":"Then Shrink. Comput."},{"key":"3_CR31","unstructured":"Povey, D., et\u00a0al.: The Kaldi speech recognition toolkit. In: Proceedings of the IEEE ASRU 2011. IEEE Signal Processing Society, Catalog No.: CFP11SRW-USB. IEEE (2011)"},{"key":"3_CR32","unstructured":"Radford, A., et\u00a0al.: Robust speech recognition via large-scale weak supervision. In: ICML, pp. 28492\u201328518. PMLR (2023). https:\/\/proceedings.mlr.press\/v202\/radford23a.html"},{"key":"3_CR33","doi-asserted-by":"publisher","unstructured":"Samsi, S., et\u00a0al.: From words to Watts: benchmarking the energy costs of large language model inference. In: 2023 IEEE High Performance Extreme Computing Conference (HPEC), pp. 25\u201329. IEEE (2023). https:\/\/doi.org\/10.1109\/HPEC58863.2023.10363447","DOI":"10.1109\/HPEC58863.2023.10363447"},{"issue":"12","key":"3_CR34","doi-asserted-by":"publisher","first-page":"54","DOI":"10.1145\/3381831","volume":"63","author":"R Schwartz","year":"2020","unstructured":"Schwartz, R., et al.: Green AI. Commun. ACM 63(12), 54\u201363 (2020). https:\/\/doi.org\/10.1145\/3381831","journal-title":"Commun. ACM"},{"key":"3_CR35","doi-asserted-by":"crossref","unstructured":"Shalavi, N., et\u00a0al.: Accurate calibration of power measurements from internal power sensors on NVIDIA Jetson devices. In: Proceedings IEEE EDGE 2023, pp. 166\u2013170. IEEE (2023)","DOI":"10.1109\/EDGE60047.2023.00034"},{"key":"3_CR36","unstructured":"Srivastav, V., et\u00a0al.: Open automatic speech recognition leaderboard (2023). https:\/\/huggingface.co\/spaces\/hf-audio\/open_asr_leaderboard"},{"issue":"09","key":"3_CR37","doi-asserted-by":"publisher","first-page":"13693","DOI":"10.1609\/aaai.v34i09.7123","volume":"34","author":"E Strubell","year":"2020","unstructured":"Strubell, E., et al.: Energy and policy considerations for modern deep learning research. AAAI 34(09), 13693\u201313696 (2020). https:\/\/doi.org\/10.1609\/aaai.v34i09.7123","journal-title":"AAAI"},{"key":"3_CR38","unstructured":"Texas Instruments: INA3221 data sheet, product information and support $$\\vert $$ TI.com (2016). https:\/\/www.ti.com\/product\/INA3221"},{"issue":"4","key":"3_CR39","doi-asserted-by":"publisher","first-page":"e1507","DOI":"10.1002\/widm.1507","volume":"13","author":"R Verdecchia","year":"2023","unstructured":"Verdecchia, R., et al.: A systematic review of Green AI. WIREs Data Min. Knowl. Discov. 13(4), e1507 (2023). https:\/\/doi.org\/10.1002\/widm.1507","journal-title":"WIREs Data Min. Knowl. Discov."},{"issue":"10","key":"3_CR40","doi-asserted-by":"publisher","first-page":"2191","DOI":"10.1016\/j.joule.2023.09.004","volume":"7","author":"A Vries","year":"2023","unstructured":"Vries, A.: The growing energy footprint of artificial intelligence. Joule 7(10), 2191\u20132194 (2023). https:\/\/doi.org\/10.1016\/j.joule.2023.09.004","journal-title":"Joule"},{"key":"3_CR41","doi-asserted-by":"publisher","unstructured":"Wang, C., et\u00a0al.: VoxPopuli: a large-scale multilingual speech corpus for representation learning, semi-supervised learning and interpretation. ACL Anthol., 993\u20131003 (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.80","DOI":"10.18653\/v1\/2021.acl-long.80"},{"key":"3_CR42","unstructured":"Wang, C., et\u00a0al.: UniSpeech: unified speech representation learning with labeled and unlabeled data. In: ICML, pp. 10937\u201310947. PMLR (2021). https:\/\/proceedings.mlr.press\/v139\/wang21y.html"},{"key":"3_CR43","unstructured":"Woodard, J., et\u00a0al.: An information theoretic measure of speech recognition performance. In: Workshop on Standardisation for Speech I\/O Technology, Naval Air Development Center, Warminster, PA (1982)"},{"key":"3_CR44","first-page":"795","volume":"4","author":"CJ Wu","year":"2022","unstructured":"Wu, C.J., et al.: Sustainable AI: environmental implications, challenges and opportunities. PMLS 4, 795\u2013813 (2022)","journal-title":"PMLS"},{"key":"3_CR45","doi-asserted-by":"publisher","unstructured":"Yang, Z., et\u00a0al.: Accurate and convenient energy measurements for GPUs: a detailed study of NVIDIA GPU\u2019s built-in power sensor. In: SC \u201924 Proceedings, pp. 1\u201317 (2024). https:\/\/doi.org\/10.1109\/SC41406.2024.00028","DOI":"10.1109\/SC41406.2024.00028"},{"key":"3_CR46","unstructured":"Yang, Z., et\u00a0al.: Quality at the tail of machine learning inference. arXiv preprint arXiv:2212.13925 (2022)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Applied Data Science Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-06118-8_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T11:23:06Z","timestamp":1759058586000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-06118-8_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,29]]},"ISBN":["9783032061171","9783032061188"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-06118-8_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,29]]},"assertion":[{"value":"29 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Porto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecmlpkdd.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}