{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T04:48:20Z","timestamp":1776401300438,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,4]]},"DOI":"10.1145\/3793853.3795760","type":"proceedings-article","created":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T14:25:49Z","timestamp":1775485549000},"page":"214-225","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["ELLMPEG: An Edge-based Agentic LLM Video Processing Tool"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-5405-0643","authenticated-orcid":false,"given":"Zoha","family":"Azimi Ourimi","sequence":"first","affiliation":[{"name":"Alpen-Adria-Universit\u00e4t Klagenfurt, Austria, Klagenfurt, Austria and Christian Doppler Laboratory ATHENA, Klagenfurt, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2376-5802","authenticated-orcid":false,"given":"Reza","family":"Fahrani","sequence":"additional","affiliation":[{"name":"Alpen-Adria-Universit\u00e4t Klagenfurt, Austria, Klagenfurt, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8247-5426","authenticated-orcid":false,"given":"Radu","family":"Prodan","sequence":"additional","affiliation":[{"name":"University of Innsbruck, Innsbruck, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0031-5243","authenticated-orcid":false,"given":"Christian","family":"Timmerer","sequence":"additional","affiliation":[{"name":"Alpen-Adria-Universit\u00e4t Klagenfurt, Austria, Klagenfurt, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,4,6]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"2024 Global Internet Phenomena Report","author":"Networks AppLogic","year":"2024","unstructured":"AppLogic Networks, \u201c2024 Global Internet Phenomena Report.\u201d https:\/\/www.sandvine.com\/, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICC45041.2023.10279262"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510450.3517290"},{"key":"e_1_3_2_1_4_1","unstructured":"FFmpeg \u201cA Complete Cross-Platform Solution to Record Convert and Stream Audio and Video\u201d. https:\/\/www.ffmpeg.org\/ 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP59821.2023.10402699"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588444.3591012"},{"key":"e_1_3_2_1_7_1","volume-title":"Towards AI-Assisted Sustainable Adaptive Video Streaming Systems: Tutorial and Survey","author":"Farahani R.","year":"2024","unstructured":"R. Farahani, Z. Azimi, C. Timmerer, and R. Prodan, \u201cTowards AI-Assisted Sustainable Adaptive Video Streaming Systems: Tutorial and Survey,\u201d arXiv preprint arXiv:2406.02302, 2024."},{"key":"e_1_3_2_1_8_1","volume-title":"Videodrafter: Content-Consistent Multi-scene Video Generation With LLM","author":"Long F.","year":"2024","unstructured":"F. Long, Z. Qiu, T. Yao, and T. Mei, \u201cVideodrafter: Content-Consistent Multi-scene Video Generation With LLM,\u201d arXiv preprint arXiv:2401.01256, 2024."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01353"},{"key":"e_1_3_2_1_10_1","volume-title":"Video-MME: The First-Ever Comprehensive Evaluation Benchmark of Multi-modal LLMs in Video Analysis","author":"Fu C.","year":"2024","unstructured":"C. Fu, Y. Dai, Y. Luo, L. Li, S. Ren, R. Zhang, Z. Wang, C. Zhou, Y. Shen, M. Zhang, et al., \u201cVideo-MME: The First-Ever Comprehensive Evaluation Benchmark of Multi-modal LLMs in Video Analysis,\u201d arXiv preprint arXiv:2405.21075, 2024."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3715675.3715835"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3640543.3645143"},{"key":"e_1_3_2_1_13_1","volume-title":"Advances in Neural Information Processing Systems","author":"Huang H.","year":"2024","unstructured":"H. Huang, Y. Feng, C. Shi, L. Xu, J. Yu, and S. Yang, \u201cFree-Bloom: Zero-Shot Text-to-Video Generator with LLM Director and LDM Animator,\u201d Advances in Neural Information Processing Systems, 2024."},{"key":"e_1_3_2_1_14_1","volume-title":"StreamingBench: Assessing the Gap for MLLMs to Achieve Streaming Video Understanding","author":"Lin J.","year":"2024","unstructured":"J. Lin, Z. Fang, C. Chen, Z. Wan, F. Luo, P. Li, Y. Liu, and M. Sun, \u201cStreamingBench: Assessing the Gap for MLLMs to Achieve Streaming Video Understanding,\u201d arXiv preprint arXiv:2411.03628, 2024."},{"key":"e_1_3_2_1_15_1","volume-title":"A Simple LLM Framework for Long-range Video Question-Answering","author":"Zhang C.","year":"2023","unstructured":"C. Zhang, T. Lu, M.M. Islam, Z. Wang, S. Yu, M. Bansal, and G. Bertasius, \u201cA Simple LLM Framework for Long-range Video Question-Answering,\u201d arXiv preprint arXiv:2312.17235, 2023."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00035"},{"key":"e_1_3_2_1_17_1","volume-title":"LLMPEG","author":"Strenge Garrit","year":"2024","unstructured":"Garrit Strenge, \u201cLLMPEG.\u201d https:\/\/github.com\/gstrenge\/llmpeg, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_18_1","volume-title":"The Most Powerful Platform for Building AI Products","author":"Open","year":"2024","unstructured":"Open AI, \u201cThe Most Powerful Platform for Building AI Products\u201d https:\/\/openaicom\/api\/, 2024 Accessed: 12 Feb 2026"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-024-4222-0"},{"key":"e_1_3_2_1_20_1","volume-title":"Advances in Neural Information Processing Systems","author":"Shinn N.","year":"2023","unstructured":"N. Shinn, F. Cassano, A. Gopinath, K. Narasimhan, and S. Yao, \u201cReflexion: Language Agents with Verbal Reinforcement Learning,\u201d Advances in Neural Information Processing Systems, 2023."},{"key":"e_1_3_2_1_21_1","volume-title":"Critic: Large language models can self-correct with tool-interactive critiquing","author":"Gou Z.","year":"2023","unstructured":"Z. Gou, Z. Shao, Y. Gong, Y. Shen, Y. Yang, N. Duan, and W. Chen, \u201cCritic: Large language models can self-correct with tool-interactive critiquing\u201d arXiv preprint arXiv:2305.11738, 2023."},{"key":"e_1_3_2_1_22_1","volume-title":"Advances in Neural Information Processing Systems","author":"Madaan A.","year":"2023","unstructured":"A. Madaan, N. Tandon, P. Gupta, S. Hallinan, L. Gao, S. Wiegreffe, U. Alon, N. Dziri, S. Prabhumoye, Y. Yang, et al., \u201cSelf-refine: Iterative Refinement with Self-feedback,\u201d Advances in Neural Information Processing Systems, 2023."},{"key":"e_1_3_2_1_23_1","volume-title":"Small Language Models are the Future of Agentic AI","author":"Belcak P.","year":"2025","unstructured":"P. Belcak, G. Heinrich, S. Diao, Y. Fu, X. Dong, S. Muralidharan, Y. C. Lin, and P. Molchanov, \u201cSmall Language Models are the Future of Agentic AI,\u201d arXiv preprint arXiv:2506.02153, 2025."},{"key":"e_1_3_2_1_24_1","volume-title":"Advances in Neural Information Processing Systems","author":"Liu H.","year":"2023","unstructured":"H. Liu, C. Li, Q. Wu, and Y. J. Lee, \u201cVisual Instruction Tuning,\u201d Advances in Neural Information Processing Systems, 2023."},{"key":"e_1_3_2_1_25_1","volume-title":"Reframe Anything: LLM Agent for Open World Video Reframing","author":"Cao J.","year":"2024","unstructured":"J. Cao, Y. Wu, W. Chi, W. Zhu, Z. Su, and J. Wu, \u201cReframe Anything: LLM Agent for Open World Video Reframing,\u201d arXiv preprint arXiv:2403.06070, 2024."},{"key":"e_1_3_2_1_26_1","volume-title":"Towards Agentic AI for Multimodal-Guided Video Object Segmentation","author":"Tran T.","year":"2025","unstructured":"T. Tran, T. M. Le, and T. Tran, \u201cTowards Agentic AI for Multimodal-Guided Video Object Segmentation,\u201d arXiv preprint arXiv:2508.10572, 2025."},{"key":"e_1_3_2_1_27_1","volume-title":"Prompt-Driven Agentic Video Editing System: Autonomous Comprehension of Long-Form, Story-Driven Media","author":"Ding Z.","year":"2025","unstructured":"Z. Ding, X. Wang, J. Chen, P. O. Kristensson, and J. Shen, \u201cPrompt-Driven Agentic Video Editing System: Autonomous Comprehension of Long-Form, Story-Driven Media,\u201d arXiv preprint arXiv:2509.16811, 2025."},{"key":"e_1_3_2_1_28_1","volume-title":"Gemini 2.0 Flash","author":"Google","year":"2025","unstructured":"Google, \u201cGemini 2.0 Flash.\u201d https:\/\/ai.google.dev\/gemini-api\/docs\/models#gemini-2.0-flash, 2025. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_29_1","volume-title":"GLM: General Language Model Pretraining With Autoregressive Blank Infilling","author":"Du Z.","year":"2021","unstructured":"Z. Du, Y. Qian, X. Liu, M. Ding, J. Qiu, Z. Yang, and J. Tang, \u201cGLM: General Language Model Pretraining With Autoregressive Blank Infilling,\u201d arXiv preprint arXiv:2103.10360, 2021."},{"key":"e_1_3_2_1_30_1","volume-title":"Videodirectorgpt: Consistent Multi-scene Video Generation via LLM-guided Planning","author":"Lin H.","year":"2023","unstructured":"H. Lin, A. Zala, J. Cho, and M. Bansal, \u201cVideodirectorgpt: Consistent Multi-scene Video Generation via LLM-guided Planning,\u201d arXiv preprint arXiv:2309.15091, 2023."},{"key":"e_1_3_2_1_31_1","volume-title":"Video-RAG: Visually-aligned Retrieval-Augmented Long Video Comprehension","author":"Luo Y.","year":"2024","unstructured":"Y. Luo, X. Zheng, X. Yang, G. Li, H. Lin, J. Huang, J. Ji, F. Chao, J. Luo, and R. Ji, \u201cVideo-RAG: Visually-aligned Retrieval-Augmented Long Video Comprehension,\u201d arXiv preprint arXiv:2411.13093, 2024."},{"key":"e_1_3_2_1_32_1","volume-title":"Video-LLaVA: Learning United Visual Representation by Alignment Before Projection","author":"Lin B.","year":"2023","unstructured":"B. Lin, Y. Ye, B. Zhu, J. Cui, M. Ning, P. Jin, and L. Yuan, \u201cVideo-LLaVA: Learning United Visual Representation by Alignment Before Projection,\u201d arXiv preprint arXiv:2311.10122, 2023."},{"key":"e_1_3_2_1_33_1","volume-title":"Qwen2-VL: Enhancing Vision-Language Model's Perception of the World at any Resolution","author":"Wang P.","year":"2024","unstructured":"P. Wang, S. Bai, S. Tan, S. Wang, Z. Fan, J. Bai, K. Chen, X. Liu, J. Wang, W. Ge, et al., \u201cQwen2-VL: Enhancing Vision-Language Model's Perception of the World at any Resolution,\u201d arXiv preprint arXiv:2409.12191, 2024."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00232"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3680088"},{"key":"e_1_3_2_1_36_1","volume-title":"Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90%* ChatGPT Quality","author":"Chiang W.-L.","year":"2023","unstructured":"W.-L. Chiang, Z. Li, Z. Lin, Y. Sheng, Z. Wu, H. Zhang, L. Zheng, S. Zhuang, Y. Zhuang, J. E. Gonzalez, I. Stoica, and E. P. Xing, \u201cVicuna: An Open-Source Chatbot Impressing GPT-4 with 90%* ChatGPT Quality.\u201d https:\/\/lmsys.org\/blog\/2023-03-30-vicuna\/, 2023."},{"key":"e_1_3_2_1_37_1","volume-title":"AVA: Towards Agentic Video Analytics with Vision Language Models","author":"Yan Y.","year":"2025","unstructured":"Y. Yan, S. Jiang, T. Cao, Y. Yang, Q. Yang, Y. Shu, Y. Yang, and L. Qiu, \u201cAVA: Towards Agentic Video Analytics with Vision Language Models,\u201d 2025."},{"key":"e_1_3_2_1_38_1","volume-title":"Deep Video Discovery: Agentic Search with Tool Use for Long-form Video Understanding","author":"Zhang X.","year":"1807","unstructured":"X. Zhang, Z. Jia, Z. Guo, J. Li, B. Li, H. Li, and Y. Lu, \u201cDeep Video Discovery: Agentic Search with Tool Use for Long-form Video Understanding,\u201d arXiv preprint arXiv:2505.18079, 2025."},{"key":"e_1_3_2_1_39_1","volume-title":"Qwen2.5","author":"Qwen","year":"2024","unstructured":"Qwen, \u201cQwen2.5.\u201d https:\/\/huggingface.co\/Qwen\/Qwen2.5-7B, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_40_1","volume-title":"Gemma 2: Improving Open Language Models at a Practical Size","author":"Team G.","year":"2024","unstructured":"G. Team, M. Riviere, S. Pathak, P. G. Sessa, C. Hardin, S. Bhupatiraju, L. Hussenot, T. Mesnard, B. Shahriari, A. Ram\u00e9, et al., \u201cGemma 2: Improving Open Language Models at a Practical Size,\u201d arXiv preprint arXiv:2408.00118, 2024."},{"key":"e_1_3_2_1_41_1","volume-title":"ICLR 2025 Third Workshop on Deep Learning for Code","author":"Aggarwal P.","year":"2025","unstructured":"P. Aggarwal and S. Welleck, \u201cProgramming with Pixels: Towards Generalist Software Engineering Agents,\u201d in ICLR 2025 Third Workshop on Deep Learning for Code, 2025."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/NILES63360.2024.10753267"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TALE62452.2024.10834365"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639187"},{"key":"e_1_3_2_1_45_1","volume-title":"Meta Llama 3.3 Multilingual Large Language Model","year":"2024","unstructured":"Meta-llama, \u201cMeta Llama 3.3 Multilingual Large Language Model.\u201d https:\/\/huggingface.co\/meta-llama\/Llama-3.3-70B-Instruct, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_46_1","volume-title":"European Conference on Computer Vision","author":"Liu S.","year":"2024","unstructured":"S. Liu, Z. Zeng, T. Ren, F. Li, H. Zhang, J. Yang, Q. Jiang, C. Li, J. Yang, H. Su, et al., \u201cGrounding Dino: Marrying Dino with Grounded Pre-training for Open-set Object Detection,\u201d in European Conference on Computer Vision, Springer, 2024."},{"key":"e_1_3_2_1_47_1","volume-title":"Sam 2: Segment Anything in Images and Videos","author":"Ravi N.","year":"2024","unstructured":"N. Ravi, V. Gabeur, Y.-T. Hu, R. Hu, C. Ryali, T. Ma, H. Khedr, R. R\u00e4dle, C. Rolland, L. Gustafson, et al., \u201cSam 2: Segment Anything in Images and Videos,\u201d arXiv preprint arXiv:2408.00714, 2024."},{"key":"e_1_3_2_1_48_1","volume-title":"Beats: Audio Pre-training with Acoustic Tokenizers","author":"Chen S.","year":"2022","unstructured":"S. Chen, Y. Wu, C. Wang, S. Liu, D. Tompkins, Z. Chen, and F. Wei, \u201cBeats: Audio Pre-training with Acoustic Tokenizers,\u201d arXiv preprint arXiv:2212.09058, 2022."},{"key":"e_1_3_2_1_49_1","volume-title":"FFmpeg","year":"2024","unstructured":"FFmpeg, \u201cFFmpeg.\u201d https:\/\/github.com\/FFmpeg\/FFmpeg, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_50_1","volume-title":"FFmpeg Integration","author":"Fraunhofer","year":"2024","unstructured":"Fraunhofer HHI, \u201cFFmpeg Integration.\u201d https:\/\/github.com\/fraunhoferhhi\/vvenc\/wiki\/FFmpeg-Integration, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_51_1","volume-title":"VVenC","author":"Fraunhofer","year":"2024","unstructured":"Fraunhofer HHI, \u201cVVenC.\u201d https:\/\/github.com\/fraunhoferhhi\/vvenc, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_52_1","volume-title":"FFmprovisr","year":"2024","unstructured":"AMIAopensource, \u201cFFmprovisr.\u201d https:\/\/amiaopensource.github.io\/ffmprovisr\/, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_53_1","first-page":"12","author":"Gianni","year":"2024","unstructured":"Rosato, Gianni and others, \u201cVVenC.\u201d https:\/\/wiki.x266.mov\/docs\/encoders\/VVen C, 2024. Accessed: 12 Feb. 2026.","journal-title":"VVen"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2003.815165"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2012.2221191"},{"key":"e_1_3_2_1_56_1","volume-title":"Llama: Open and Efficient Foundation Language Models","author":"Touvron H.","year":"2023","unstructured":"H. Touvron, T. Lavril, G. Izacard, X. Martinet, M.-A. Lachaux, T. Lacroix, B. Rozi\u00e8re, N. Goyal, E. Hambro, F. Azhar, et al., \u201cLlama: Open and Efficient Foundation Language Models,\u201d arXiv preprint arXiv:2302.13971, 2023."},{"key":"e_1_3_2_1_57_1","volume-title":"F. Bressand, G. Lengyel, G. Lample, L. Saulnier, et al., \u201cMistral 7B","author":"Jiang A. Q.","year":"2023","unstructured":"A. Q. Jiang, A. Sablayrolles, A. Mensch, C. Bamford, D. S. Chaplot, D. d. l. Casas, F. Bressand, G. Lengyel, G. Lample, L. Saulnier, et al., \u201cMistral 7B,\u201d arXiv preprint arXiv:2310.06825, 2023."},{"key":"e_1_3_2_1_58_1","volume-title":"FFmpeg Documentation","year":"2024","unstructured":"FFmpeg, \u201cFFmpeg Documentation.\u201d https:\/\/ffmpeg.org\/documentation.html, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_59_1","volume-title":"VVenC","author":"Fraunhofer","year":"2024","unstructured":"Fraunhofer HHI, \u201cVVenC.\u201d https:\/\/github.com\/fraunhoferhhi\/vvenc\/wiki, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_60_1","volume-title":"Langchain Text Splitters","year":"2024","unstructured":"LangChain, \u201cLangchain Text Splitters.\u201d https:\/\/python.langchain.com\/api_refer ence\/text_splitters\/index.html, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_61_1","volume-title":"What are Tokens and How to Count Them?","year":"2024","unstructured":"OpenAI, \u201cWhat are Tokens and How to Count Them?.\u201d https:\/\/help.openai.com\/en\/articles\/4936856-what-are-tokens-and-how-to-count-them, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_62_1","volume-title":"C-Pack: Packaged Resources To Advance General Chinese Embedding","author":"Xiao S.","year":"2023","unstructured":"S. Xiao, Z. Liu, P. Zhang, and N. Muennighoff, \u201cC-Pack: Packaged Resources To Advance General Chinese Embedding,\u201d 2023."},{"key":"e_1_3_2_1_63_1","volume-title":"The FAISS Library","author":"Douze M.","year":"2024","unstructured":"M. Douze, A. Guzhva, C. Deng, J. Johnson, G. Szilvasy, P.-E. Mazar\u00e9, M. Lomeli, L. Hosseini, and H. J\u00e9gou, \u201cThe FAISS Library,\u201d arXiv preprint arXiv:2401.08281, 2024."},{"key":"e_1_3_2_1_64_1","author":"Schaffer R.","year":"1993","unstructured":"R. Schaffer and R. Sedgewick, \u201cThe Analysis of Heapsort,\u201d Journal of Algorithms, 1993.","journal-title":"Journal of Algorithms"},{"key":"e_1_3_2_1_65_1","volume-title":"Open LLM Leaderboard","author":"Beeching E.","year":"2023","unstructured":"E. Beeching, C. Fourrier, N. Habib, S. Han, N. Lambert, N. Rajani, O. Sanseviero, L. Tunstall, and T. Wolf, \u201cOpen LLM Leaderboard.\u201d https:\/\/huggingface.co\/spaces\/open-llm-leaderboard-old\/open_llm_leaderboard, 2023. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_66_1","volume-title":"Advances in Neural Information Processing Systems","author":"Zheng L.","year":"2023","unstructured":"L. Zheng, W.-L. Chiang, Y. Sheng, S. Zhuang, Z. Wu, Y. Zhuang, Z. Lin, Z. Li, D. Li, E. Xing, et al., \u201cJudging LLM-as-a-Judge with MT-Bench and Chatbot Arena,\u201d Advances in Neural Information Processing Systems, 2023."},{"key":"e_1_3_2_1_67_1","volume-title":"GPT-4o","year":"2023","unstructured":"OpenAI, \u201cGPT-4o.\u201d https:\/\/openai.com\/index\/hello-gpt-4o\/, 2023. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_68_1","volume-title":"Track and Reduce CO2 Emissions From Your Computing","author":"Development Team CodeCarbon","year":"2024","unstructured":"CodeCarbon Development Team, \u201cTrack and Reduce CO2 Emissions From Your Computing.\u201d https:\/\/codecarbon.io\/, 2024. Accessed: 12 Feb. 2026."},{"key":"e_1_3_2_1_69_1","volume-title":"Bertscore: Evaluating Text Generation with Bert","author":"Zhang T.","year":"1904","unstructured":"T. Zhang, V. Kishore, F. Wu, K. Q. Weinberger, and Y. Artzi, \u201cBertscore: Evaluating Text Generation with Bert,\u201d arXiv preprint arXiv:1904.09675, 2019."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.3115\/1557690.1557747"},{"key":"e_1_3_2_1_71_1","volume-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics","author":"Papineni K.","year":"2002","unstructured":"K. Papineni, S. Roukos, T. Ward, and W.-J. Zhu, \u201cBLEU: A Method For Automatic Evaluation of Machine Translation,\u201d in Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, 2002."},{"key":"e_1_3_2_1_72_1","author":"Stergiou A.","year":"2022","unstructured":"A. Stergiou and R. Poppe, \u201cAdapool: Exponential adaptive pooling for information-retaining downsampling,\u201d IEEE Transactions on Image Processing, 2022.","journal-title":"IEEE Transactions on Image Processing"}],"event":{"name":"MMSys '26: ACM Multimedia Systems Conference 2026","location":"Hong Kong Hong Kong","acronym":"MMSys '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the ACM Multimedia Systems Conference 2026"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3793853.3795760","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T15:23:48Z","timestamp":1776353028000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3793853.3795760"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,4]]},"references-count":72,"alternative-id":["10.1145\/3793853.3795760","10.1145\/3793853"],"URL":"https:\/\/doi.org\/10.1145\/3793853.3795760","relation":{},"subject":[],"published":{"date-parts":[[2026,4,4]]},"assertion":[{"value":"2026-04-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}