{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:51:17Z","timestamp":1765309877899,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","funder":[{"name":"Nanjing University-China Mobile Communications Group Co.,Ltd. Joint Institute"},{"name":"Nanjing Key S&T Special Projects","award":["202309006"],"award-info":[{"award-number":["202309006"]}]},{"name":"NSFC","award":["62202233"],"award-info":[{"award-number":["62202233"]}]},{"name":"Grant from State Key Laboratory for Novel Software Technology, Nanjing University","award":["KFKT2024B18"],"award-info":[{"award-number":["KFKT2024B18"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755254","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:26:38Z","timestamp":1761377198000},"page":"12025-12034","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["VidIQ: Inference-Aware Neural Codecs for Quality-Enhanced, Real-Time Video Analytics"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8233-329X","authenticated-orcid":false,"given":"Andong","family":"Zhu","sequence":"first","affiliation":[{"name":"Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6581-6399","authenticated-orcid":false,"given":"Sheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9796-238X","authenticated-orcid":false,"given":"Xiaohang","family":"Shi","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5246-6018","authenticated-orcid":false,"given":"Hesheng","family":"Sun","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9251-4337","authenticated-orcid":false,"given":"Yu","family":"Liang","sequence":"additional","affiliation":[{"name":"Nanjing Normal University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1625-7575","authenticated-orcid":false,"given":"Zhuzhong","family":"Qian","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3130-5651","authenticated-orcid":false,"given":"Han","family":"Zheng","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7114-1417","authenticated-orcid":false,"given":"Xiaokun","family":"Wang","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7496-2137","authenticated-orcid":false,"given":"Ning","family":"Jiang","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.150"},{"key":"e_1_3_2_2_2_1","volume-title":"Proceedings of IEEE\/CVF International Conference on Computer Vision (ICCV). 5836-5844","author":"Barroso-Laguna Axel","year":"2019","unstructured":"Axel Barroso-Laguna, Edgar Riba, Daniel Ponsa, and Krystian Mikolajczyk. 2019. Key.Net: Keypoint Detection by Handcrafted and Learned CNN Filters. In Proceedings of IEEE\/CVF International Conference on Computer Vision (ICCV). 5836-5844."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3101953"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458306.3458874"},{"key":"e_1_3_2_2_5_1","volume-title":"Proceedings of USENIX Symposium on Networked Systems Design and Implementation (NSDI). 103-118","author":"Dasari Mallesham","year":"2022","unstructured":"Mallesham Dasari, Kumara Kahatapitiya, Samir R Das, Aruna Balasubramanian, and Dimitris Samaras. 2022. Swift: Adaptive Video Streaming with Layered Neural Codecs. In Proceedings of USENIX Symposium on Networked Systems Design and Implementation (NSDI). 103-118."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2439281"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3387514.3405887"},{"key":"e_1_3_2_2_8_1","volume-title":"Proceedings of Conference on Machine Learning and Systems (MLSys).","author":"Du Kuntai","year":"2022","unstructured":"Kuntai Du, Qizheng Zhang, Anton Arapin, Haodong Wang, Zhengxu Xia, and Junchen Jiang. 2022. AccMPEG: Optimizing Video Encoding for Video Analytics. In Proceedings of Conference on Machine Learning and Systems (MLSys)."},{"key":"e_1_3_2_2_9_1","unstructured":"FCC. 2025. Measuring Broadband Raw Data Releases - Fixed. https:\/\/www.fcc.gov\/oet\/mba\/raw-data-releases."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3495243.3558754"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00713"},{"key":"e_1_3_2_2_12_1","volume-title":"Mask R-CNN. In Proceedings of IEEE International Conference on Computer Vision (ICCV). 2961-2969","author":"He Kaiming","year":"2017","unstructured":"Kaiming He, Georgia Gkioxari, Piotr Doll\u00e1r, and Ross Girshick. 2017. Mask R-CNN. In Proceedings of IEEE International Conference on Computer Vision (ICCV). 2961-2969."},{"key":"e_1_3_2_2_13_1","volume-title":"End-Edge Coordinated Joint Encoding and Neural Enhancement for Low-Light Video Analytics. arXiv preprint arXiv:2308.16418","author":"He Yuanyi","year":"2023","unstructured":"Yuanyi He, Peng Yang, Tian Qin, and Ning Zhang. 2023. End-Edge Coordinated Joint Encoding and Neural Enhancement for Low-Light Video Analytics. arXiv preprint arXiv:2308.16418 (2023)."},{"key":"e_1_3_2_2_14_1","volume-title":"Proceedings of Conference on Innovative Data Systems Research (CIDR).","author":"Kang Daniel","year":"2019","unstructured":"Daniel Kang, Peter Bailis, and Matei Zaharia. 2019. Challenges and Opportunities in DNN-Based Video Analytics: A Demonstration of the BlazeIt Video Query Engine. In Proceedings of Conference on Innovative Data Systems Research (CIDR)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00448"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3524273.3532906"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.10.007"},{"key":"e_1_3_2_2_18_1","volume-title":"Proceedings of Conference on Neural Information Processing Systems (NeurIPS). 18114-18125","author":"Li Jiahao","year":"2021","unstructured":"Jiahao Li, Bin Li, and Yan Lu. 2021. Deep Contextual Video Compression. In Proceedings of Conference on Neural Information Processing Systems (NeurIPS). 18114-18125."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.151"},{"volume-title":"Microsoft COCO: Common Objects in Context","author":"Lin Tsung-Yi","key":"e_1_3_2_2_20_1","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C Lawrence Zitnick. 2014. Microsoft COCO: Common Objects in Context. In Proceedings of Springer Computer Vision-ECCV. 740-755."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00076"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548033"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2024.3375108"},{"key":"e_1_3_2_2_24_1","unstructured":"Alessandro Prest Christian Leistner Javier Civera Cordelia Schmid and Vittorio Ferrari. 2012. Youtube-Objects dataset: A Large-scale Database of Object Videos from YouTube. https:\/\/data.vision.ee.ethz.ch\/cvl\/youtube-objects\/."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01421"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2017.2725241"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2022.109392"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.207"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530423"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2014.03.003"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475698"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3570361.3592523"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM41043.2020.9155524"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS59052.2023.00020"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00200"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2982166"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796657"},{"key":"e_1_3_2_2_39_1","volume-title":"Edge Video Analytics: A Survey on Applications, Systems and Enabling Techniques","author":"Xu Renjie","year":"2023","unstructured":"Renjie Xu, Saiedeh Razavi, and Rong Zheng. 2023. Edge Video Analytics: A Survey on Applications, Systems and Enabling Techniques. IEEE Communications Surveys & Tutorials (2023)."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01747"},{"key":"e_1_3_2_2_41_1","unstructured":"YouTube. 2022. 15 minutes of heavy traffic noise in India. https:\/\/www.youtube.com\/watch?v=iJZcjZD0fw0\/."},{"key":"e_1_3_2_2_42_1","unstructured":"YouTube. 2023a. Dash Cam Owners Australia On the Road Compilation. https:\/\/www.youtube.com\/watch?v=fci0__FKp9Q."},{"key":"e_1_3_2_2_43_1","unstructured":"YouTube. 2023b. Surfing Massive Waves Waimea Bay 4K. https:\/\/www.youtube.com\/watch?v=zZqxPozl2Ec."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM53939.2023.10228906"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796875"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM53939.2023.10229059"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3448628"},{"key":"e_1_3_2_2_48_1","volume-title":"CaDM: Codec-aware Diffusion Modeling for Neural-enhanced Video Streaming. arXiv preprint arXiv:2211.08428","author":"Zhou Qihua","year":"2022","unstructured":"Qihua Zhou, Ruibin Li, Song Guo, Peiran Dong, Yi Liu, Jingcai Guo, and Zhenda Xu. 2022. CaDM: Codec-aware Diffusion Modeling for Neural-enhanced Video Streaming. arXiv preprint arXiv:2211.08428 (2022)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755254","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:47:33Z","timestamp":1765309653000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755254"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":48,"alternative-id":["10.1145\/3746027.3755254","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755254","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}