{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T22:15:30Z","timestamp":1773612930535,"version":"3.50.1"},"publisher-location":"Cham","reference-count":62,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031920882","type":"print"},{"value":"9783031920899","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-92089-9_24","type":"book-chapter","created":{"date-parts":[[2025,5,24]],"date-time":"2025-05-24T12:49:21Z","timestamp":1748090961000},"page":"388-405","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Watt for\u00a0What: Rethinking Deep Learning\u2019s Energy-Performance Relationship"],"prefix":"10.1007","author":[{"given":"Shreyank N.","family":"Gowda","sequence":"first","affiliation":[]},{"given":"Xinyue","family":"Hao","sequence":"additional","affiliation":[]},{"given":"Gen","family":"Li","sequence":"additional","affiliation":[]},{"given":"Shashank Narayana","family":"Gowda","sequence":"additional","affiliation":[]},{"given":"Xiaobo","family":"Jin","sequence":"additional","affiliation":[]},{"given":"Laura","family":"Sevilla-Lara","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"24_CR1","unstructured":"Anthony, L.F.W., Kanding, B., Selvan, R.: Carbontracker: Tracking and predicting the carbon footprint of training deep learning models. arXiv preprint arXiv:2007.03051 (2020)"},{"key":"24_CR2","doi-asserted-by":"crossref","unstructured":"Bannour, N., Ghannay, S., N\u00e9v\u00e9ol, A., Ligozat, A.L.: Evaluating the carbon footprint of nlp methods: a survey and analysis of existing tools. In: Proceedings of the Second Workshop on Simple and Efficient Natural Language Processing, pp. 11\u201321 (2021)","DOI":"10.18653\/v1\/2021.sustainlp-1.2"},{"key":"24_CR3","unstructured":"Bao, H., Dong, L., Piao, S., Wei, F.: Beit: Bert pre-training of image transformers. arXiv preprint arXiv:2106.08254 (2021)"},{"key":"24_CR4","doi-asserted-by":"crossref","unstructured":"Bender, E.M., Gebru, T., McMillan-Major, A., Shmitchell, S.: On the dangers of stochastic parrots: Can language models be too big? In: Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency, pp. 610\u2013623 (2021)","DOI":"10.1145\/3442188.3445922"},{"key":"24_CR5","unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space-time attention all you need for video understanding? In: Proceedings of the International Conference on Machine Learning (ICML) (July 2021)"},{"key":"24_CR6","first-page":"129","volume":"2","author":"D Blalock","year":"2020","unstructured":"Blalock, D., Gonzalez Ortiz, J.J., Frankle, J., Guttag, J.: What is the state of neural network pruning? Proc. Mach. Learn. Syst. 2, 129\u2013146 (2020)","journal-title":"Proc. Mach. Learn. Syst."},{"key":"24_CR7","unstructured":"BritishGas: What\u2019s the average gas and electricity bill in great britain? (2023). https:\/\/www.britishgas.co.uk\/energy\/guides\/average-bill.html"},{"key":"24_CR8","doi-asserted-by":"publisher","first-page":"S118","DOI":"10.1134\/S1064562422060230","volume":"106","author":"SA Budennyy","year":"2022","unstructured":"Budennyy, S.A., et al.: Eco2ai: carbon emissions tracking of machine learning models as the first step towards sustainable ai. Doklady Mathem. 106, S118\u2013S128 (2022)","journal-title":"Doklady Mathem."},{"key":"24_CR9","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"24_CR10","unstructured":"Chen, L.C., Papandreou, G., Schroff, F., Adam, H.: Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587 (2017)"},{"key":"24_CR11","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1290\u20131299 (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"Chollet, F.: Xception: Deep learning with depthwise separable convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.195"},{"key":"24_CR13","doi-asserted-by":"crossref","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"24_CR14","unstructured":"Dehghani, M., Arnab, A., Beyer, L., Vaswani, A., Tay, Y.: The efficiency misnomer. arXiv preprint arXiv:2110.12894 (2021)"},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"24_CR16","unstructured":"Desislavov, R., Mart\u00ednez-Plumed, F., Hern\u00e1ndez-Orallo, J.: Compute and energy consumption trends in deep learning inference. arXiv preprint arXiv:2109.05472 (2021)"},{"key":"24_CR17","doi-asserted-by":"publisher","unstructured":"Ding, M., Xiao, B., Codella, N., Luo, P., Wang, J., Yuan, L.: Davit: Dual attention vision transformers. In: European Conference on Computer Vision, pp. 74\u201392. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-20053-3_5","DOI":"10.1007\/978-3-031-20053-3_5"},{"key":"24_CR18","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"24_CR19","unstructured":"Getzner, J., Charpentier, B., G\u00fcnnemann, S.: Accuracy is not the only metric that matters: Estimating the energy consumption of deep learning models. arXiv preprint arXiv:2304.00897 (2023)"},{"key":"24_CR20","doi-asserted-by":"crossref","unstructured":"Gowda, S.N., Rohrbach, M., Sevilla-Lara, L.: Smart frame selection for action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 1451\u20131459 (2021)","DOI":"10.1609\/aaai.v35i2.16235"},{"key":"24_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1007\/978-3-030-20870-7_36","volume-title":"Computer Vision \u2013 ACCV 2018","author":"SN Gowda","year":"2019","unstructured":"Gowda, S.N., Yuan, C.: ColorNet: investigating the importance of color spaces for image classification. In: Jawahar, C.V., Li, H., Mori, G., Schindler, K. (eds.) ACCV 2018. LNCS, vol. 11364, pp. 581\u2013596. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-20870-7_36"},{"key":"24_CR22","doi-asserted-by":"crossref","unstructured":"Goyal, R., et\u00a0al.: The \u201csomething something\u201d video database for learning and evaluating visual common sense. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5842\u20135850 (2017)","DOI":"10.1109\/ICCV.2017.622"},{"key":"24_CR23","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16000\u201316009 (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"24_CR24","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"24_CR25","unstructured":"Henderson, P., Hu, J., Jong, W.S., Peyton, J., Prabhu, V., Rahtu, E., Xiao, R.: Codecarbon. https:\/\/github.com\/mlco2\/codecarbon (2021)"},{"key":"24_CR26","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"key":"24_CR27","doi-asserted-by":"crossref","unstructured":"Howard, A., et\u00a0al.: Searching for mobilenetv3. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1314\u20131324 (2019)","DOI":"10.1109\/ICCV.2019.00140"},{"key":"24_CR28","unstructured":"Howard, A.G., et al.: Mobilenets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)"},{"key":"24_CR29","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der\u00a0Maaten, L., Weinberger, K.Q.: Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4700\u20134708 (2017)","DOI":"10.1109\/CVPR.2017.243"},{"key":"24_CR30","unstructured":"Kay, W., et al.: The kinetics human action video dataset. CoRR abs\/ arxiv: 1705.06950 (2017)"},{"key":"24_CR31","doi-asserted-by":"crossref","unstructured":"Khowaja, S.A., Khuwaja, P., Dev, K.: Chatgpt needs spade (sustainability, privacy, digital divide, and ethics) evaluation: A review. arXiv preprint arXiv:2305.03123 (2023)","DOI":"10.36227\/techrxiv.22619932.v2"},{"key":"24_CR32","doi-asserted-by":"crossref","unstructured":"Kondratyuk, D., Yuan, L., Li, Y., Zhang, L., Tan, M., Brown, M., Gong, B.: Movinets: Mobile video networks for efficient video recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16020\u201316030 (2021)","DOI":"10.1109\/CVPR46437.2021.01576"},{"key":"24_CR33","doi-asserted-by":"crossref","unstructured":"Korbar, B., Tran, D., Torresani, L.: Scsampler: sampling salient clips from video for efficient action recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6232\u20136242 (2019)","DOI":"10.1109\/ICCV.2019.00633"},{"key":"24_CR34","unstructured":"Lacoste, A., Luccioni, A., Schmidt, V., Dandres, T.: Quantifying the carbon emissions of machine learning. arXiv preprint arXiv:1910.09700 (2019)"},{"key":"24_CR35","doi-asserted-by":"crossref","unstructured":"Li, K., et al.: Uniformerv2: unlocking the potential of image vits for video understanding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 1632\u20131643 (October 2023)","DOI":"10.1109\/ICCV51070.2023.00157"},{"key":"24_CR36","first-page":"12934","volume":"35","author":"Y Li","year":"2022","unstructured":"Li, Y., et al.: Efficientformer: vision transformers at mobilenet speed. Adv. Neural. Inf. Process. Syst. 35, 12934\u201312949 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"9","key":"24_CR37","doi-asserted-by":"publisher","first-page":"5172","DOI":"10.3390\/su14095172","volume":"14","author":"AL Ligozat","year":"2022","unstructured":"Ligozat, A.L., Lefevre, J., Bugeau, A., Combaz, J.: Unraveling the hidden environmental impacts of ai solutions for environment life cycle assessment of ai solutions. Sustainability 14(9), 5172 (2022)","journal-title":"Sustainability"},{"key":"24_CR38","doi-asserted-by":"crossref","unstructured":"Lin, J., Gan, C., Han, S.: Tsm: temporal shift module for efficient video understanding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7083\u20137093 (2019)","DOI":"10.1109\/ICCV.2019.00718"},{"key":"24_CR39","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"24_CR40","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C.Y., Feichtenhofer, C., Darrell, T., Xie, S.: A convnet for the 2020s. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11976\u201311986 (2022)","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"24_CR41","unstructured":"Patterson, D., et al.: Carbon emissions and large neural network training. arXiv preprint arXiv:2104.10350 (2021)"},{"key":"24_CR42","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"24_CR43","doi-asserted-by":"crossref","unstructured":"Radosavovic, I., Kosaraju, R.P., Girshick, R., He, K., Doll\u00e1r, P.: Designing network design spaces. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10428\u201310436 (2020)","DOI":"10.1109\/CVPR42600.2020.01044"},{"key":"24_CR44","unstructured":"Ridnik, T., Ben-Baruch, E., Noy, A., Zelnik-Manor, L.: Imagenet-21k pretraining for the masses. arXiv preprint arXiv:2104.10972 (2021)"},{"key":"24_CR45","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: Mobilenetv2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"issue":"12","key":"24_CR46","doi-asserted-by":"publisher","first-page":"54","DOI":"10.1145\/3381831","volume":"63","author":"R Schwartz","year":"2020","unstructured":"Schwartz, R., Dodge, J., Smith, N.A., Etzioni, O.: Green ai. Commun. ACM 63(12), 54\u201363 (2020)","journal-title":"Commun. ACM"},{"key":"24_CR47","doi-asserted-by":"publisher","unstructured":"Selvan, R., Bhagwat, N., Wolff\u00a0Anthony, L.F., Kanding, B., Dam, E.B.: Carbon footprint of selecting and training deep learning models for medical image analysis. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 506\u2013516. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-16443-9_49","DOI":"10.1007\/978-3-031-16443-9_49"},{"key":"24_CR48","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"24_CR49","doi-asserted-by":"crossref","unstructured":"Strubell, E., Ganesh, A., McCallum, A.: Energy and policy considerations for deep learning in nlp. arXiv preprint arXiv:1906.02243 (2019)","DOI":"10.18653\/v1\/P19-1355"},{"key":"24_CR50","doi-asserted-by":"crossref","unstructured":"Strudel, R., Garcia, R., Laptev, I., Schmid, C.: Segmenter: transformer for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7262\u20137272 (2021)","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"24_CR51","doi-asserted-by":"crossref","unstructured":"Sun, C., Shrivastava, A., Singh, S., Gupta, A.: Revisiting unreasonable effectiveness of data in deep learning era. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 843\u2013852 (2017)","DOI":"10.1109\/ICCV.2017.97"},{"key":"24_CR52","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132826 (2016)","DOI":"10.1109\/CVPR.2016.308"},{"key":"24_CR53","unstructured":"Tan, M., Le, Q.: Efficientnet: rethinking model scaling for convolutional neural networks. In: International Conference on Machine Learning, pp. 6105\u20136114. PMLR (2019)"},{"key":"24_CR54","unstructured":"Tan, M., Le, Q.: Efficientnetv2: Smaller models and faster training. In: International Conference on Machine Learning, pp. 10096\u201310106. PMLR (2021)"},{"key":"24_CR55","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp. 10347\u201310357. PMLR (2021)"},{"key":"24_CR56","unstructured":"Valeye, F.: Tracarbon (2023). https:\/\/github.com\/fvaleye\/tracarbon"},{"key":"24_CR57","unstructured":"Weng, O.: Neural network quantization for efficient inference: A survey. arXiv preprint arXiv:2112.06126 (2021)"},{"key":"24_CR58","doi-asserted-by":"crossref","unstructured":"Yu, C., Wang, J., Peng, C., Gao, C., Yu, G., Sang, N.: Bisenet: bilateral segmentation network for real-time semantic segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 325\u2013341 (2018)","DOI":"10.1007\/978-3-030-01261-8_20"},{"key":"24_CR59","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2881\u20132890 (2017)","DOI":"10.1109\/CVPR.2017.660"},{"key":"24_CR60","doi-asserted-by":"crossref","unstructured":"Zheng, S., et\u00a0al.: Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6881\u20136890 (2021)","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"24_CR61","doi-asserted-by":"crossref","unstructured":"Zhou, B., Andonian, A., Oliva, A., Torralba, A.: Temporal relational reasoning in videos. In: European Conference on Computer Vision (2018)","DOI":"10.1007\/978-3-030-01246-5_49"},{"key":"24_CR62","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., Torralba, A.: Scene parsing through ade20k dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 633\u2013641 (2017)","DOI":"10.1109\/CVPR.2017.544"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-92089-9_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,24]],"date-time":"2025-05-24T12:49:35Z","timestamp":1748090975000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-92089-9_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031920882","9783031920899"],"references-count":62,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-92089-9_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}