{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,15]],"date-time":"2026-01-15T22:50:46Z","timestamp":1768517446733,"version":"3.49.0"},"reference-count":49,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,14]],"date-time":"2025-12-14T00:00:00Z","timestamp":1765670400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,14]],"date-time":"2025-12-14T00:00:00Z","timestamp":1765670400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFC2607400"],"award-info":[{"award-number":["2024YFC2607400"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62432008,62472248,62302259"],"award-info":[{"award-number":["62432008,62472248,62302259"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,14]]},"DOI":"10.1109\/icpads67057.2025.11323053","type":"proceedings-article","created":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T20:36:54Z","timestamp":1768423014000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["SwiftReTaKe: Quick and Accurate Redundancy Reduction for Cloud-Edge Collaborative Video-Language Understanding"],"prefix":"10.1109","author":[{"given":"Xinqi","family":"Jin","sequence":"first","affiliation":[{"name":"School of Software &#x0026; BNRist, Tsinghua University,Beijing,China"}]},{"given":"Fan","family":"Dang","sequence":"additional","affiliation":[{"name":"School of Software, Beijing Jiaotong University,Beijing,China"}]},{"given":"Kebin","family":"Liu","sequence":"additional","affiliation":[{"name":"Tsinghua University,Global Innovation Exchange,Beijing,China"}]},{"given":"Jiangchuan","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computing Science, Simon Fraser University,Burnaby,Canada"}]},{"given":"Jingao","family":"Xu","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University,Computer Science Department,Pittsburgh,United States"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3458305.3463377"},{"key":"ref2","first-page":"119","article-title":"Ekya: Continuous learning of video analytics models on edge compute servers","volume-title":"19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Bhardwaj"},{"key":"ref3","first-page":"377","article-title":"Live video analytics at scale with approximation and Delay-Tolerance","volume-title":"14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Zhang"},{"key":"ref4","volume-title":"NVIDIA-AI-Blueprints\/video-search-and-summarization: Blueprint for Ingesting massive volumes of live or archived videos and extract insights for summarization and interactive Q&A","year":"2025"},{"key":"ref5","first-page":"459","article-title":"Video analytics with zerostreaming cameras","volume-title":"2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Xu"},{"key":"ref6","article-title":"Qwen2.5-vl technical report","author":"Bai","year":"2025","journal-title":"arXiv preprint"},{"key":"ref7","article-title":"Seed1.5-vl technical report","author":"Team","year":"2025","journal-title":"arXiv preprint"},{"key":"ref8","article-title":"Internvl3: Exploring advanced training and testtime recipes for open-source multimodal models","author":"Zhu","year":"2025","journal-title":"arXiv preprint"},{"key":"ref9","article-title":"Llavavideo: Video instruction tuning with synthetic data","author":"Zhang","year":"2025","journal-title":"arXiv preprint"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-acl.283"},{"key":"ref11","article-title":"Pyramiddrop: Accelerating your large vision-language models via pyramid visual redundancy reduction","author":"Xing","year":"2024","journal-title":"arXiv preprint"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.235"},{"key":"ref13","article-title":"Video-mme: The first-ever comprehensive evaluation benchmark of multi-modal 11 ms in video analysis","author":"Fu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/BigData59044.2023.10386743"},{"key":"ref15","first-page":"23716","article-title":"Flamingo: a visual language model for fewshot learning","volume-title":"Advances in Neural Information Processing Systems","volume":"35","author":"Alayrac","year":"2022"},{"key":"ref16","first-page":"12888","article-title":"BLIP: Bootstrapping language-image pre-training for unified vision-language understanding and generation","volume-title":"Proceedings of the 39th International Conference on Machine Learning","volume":"162","author":"Li"},{"key":"ref17","first-page":"19730","article-title":"BLIP-2: Bootstrapping languageimage pre-training with frozen image encoders and large language models","volume-title":"Proceedings of the 40th International Conference on Machine Learning","volume":"202","author":"Li"},{"key":"ref18","article-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models","author":"Zhu","year":"2023","journal-title":"arXiv preprint"},{"key":"ref19","volume-title":"Live encoding with VP9 using FFmpeg","year":"2025"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.4324\/9781410605337-29"},{"key":"ref22","article-title":"What do vision transformers learn? a visual exploration","author":"Ghiasi","year":"2022","journal-title":"arXiv preprint"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.deelio-1.1"},{"key":"ref24","article-title":"FlashAttention: Fast and memory-efficient exact attention with IO-awareness","author":"Dao","year":"2022","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"ref25","article-title":"Flashattention-2: Faster attention with better parallelism and work partitioning","author":"Dao","year":"2023","journal-title":"arXiv preprint"},{"key":"ref26","volume-title":"Speedtest Global Index - Internet Speed around the world","author":"Ookla","year":"2025"},{"key":"ref27","volume-title":"Chat with ai: The surprising turn of real-time video communication from human to ai","author":"Wu","year":"2025"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52734.2025.01843"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3754839"},{"key":"ref30","volume-title":"Deepseek-v3.2-exp: Boosting long-context efficiency with deepseek sparse attention","year":"2025"},{"key":"ref31","article-title":"Deepseek-v3 technical report","year":"2025","journal-title":"arXiv preprint"},{"issue":"1","key":"ref32","article-title":"Switch transformers: scaling to trillion parameter models with simple and efficient sparsity","volume":"23","author":"Fedus","year":"2022","journal-title":"J. Mach. Learn. Res."},{"key":"ref33","article-title":"Gshard: Scaling giant models with conditional computation and automatic sharding","author":"Lepikhin","year":"2020","journal-title":"arXiv preprint"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.936"},{"key":"ref35","first-page":"87","article-title":"Awq: Activation-aware weight quantization for on-device 11 m compression and acceleration","volume-title":"Proceedings of Machine Learning and Systems","volume":"6","author":"Lin","year":"2024"},{"key":"ref36","first-page":"21702","article-title":"Llm-pruner: On the structural pruning of large language models","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Ma","year":"2023"},{"key":"ref37","first-page":"19274","article-title":"Fast inference from transformers via speculative decoding","volume-title":"Proceedings of the 40th International Conference on Machine Learning","volume":"202","author":"Leviathan"},{"key":"ref38","article-title":"Accelerating large language model decoding with speculative sampling","author":"Chen","year":"2023","journal-title":"arXiv preprint"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref40","first-page":"521","article-title":"Orca: A distributed serving system for Transformer-Based generative models","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Yu"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2003.815165"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2012.2221191"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01126"},{"key":"ref44","article-title":"Deep contextual video compression","volume":"34","author":"Li","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419185"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/3544216.3544218"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3712676.3714434"},{"key":"ref48","first-page":"22185","article-title":"Video-lavit: Unified video-language pre-training with decoupled visual-motional tokenization","volume-title":"International Conference on Machine Learning","author":"Jin","year":"2024"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02250"}],"event":{"name":"2025 IEEE 31th International Conference on Parallel and Distributed Systems (ICPADS)","location":"Hefei, China","start":{"date-parts":[[2025,12,14]]},"end":{"date-parts":[[2025,12,18]]}},"container-title":["2025 IEEE 31th International Conference on Parallel and Distributed Systems (ICPADS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11322805\/11322871\/11323053.pdf?arnumber=11323053","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,15]],"date-time":"2026-01-15T07:09:22Z","timestamp":1768460962000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11323053\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,14]]},"references-count":49,"URL":"https:\/\/doi.org\/10.1109\/icpads67057.2025.11323053","relation":{},"subject":[],"published":{"date-parts":[[2025,12,14]]}}}