{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T08:57:11Z","timestamp":1773392231466,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,10]],"date-time":"2022-06-10T00:00:00Z","timestamp":1654819200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,10]]},"DOI":"10.1145\/3514221.3526181","type":"proceedings-article","created":{"date-parts":[[2022,6,12]],"date-time":"2022-06-12T02:33:49Z","timestamp":1655001229000},"page":"545-558","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Zeus: Efficiently Localizing Actions in Videos using Reinforcement Learning"],"prefix":"10.1145","author":[{"given":"Pramod","family":"Chunduri","sequence":"first","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Jaeho","family":"Bang","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Yao","family":"Lu","sequence":"additional","affiliation":[{"name":"Microsoft Research, Redmond, WA, USA"}]},{"given":"Joy","family":"Arulraj","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]}],"member":"320","published-online":{"date-parts":[[2022,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. PyTorch Torchvision. https:\/\/pytorch.org\/vision\/stable\/index.html."},{"key":"e_1_3_2_1_2_1","volume-title":"MIRIS: Fast Object Track Queries in Video. In SIGMOD. 1907--1921.","author":"Bastani Favyen","year":"2020","unstructured":"Favyen Bastani, Songtao He, Arjun Balasingam, Karthik Gopalakrishnan, Mohammad Alizadeh, Hari Balakrishnan, Michael Cafarella, Tim Kraska, and Sam Madden. 2020. MIRIS: Fast Object Track Queries in Video. In SIGMOD. 1907--1921."},{"key":"e_1_3_2_1_3_1","unstructured":"Greg Brockman Vicki Cheung Ludwig Pettersson Jonas Schneider John Schulman Jie Tang and Wojciech Zaremba. 2016. OpenAI Gym. arXiv:1606.01540 [cs.LG]"},{"key":"e_1_3_2_1_4_1","volume-title":"Activitynet: A large-scale video benchmark for human activity understanding. In CVPR. 961--970.","author":"Heilbron Fabian Caba","year":"2015","unstructured":"Fabian Caba Heilbron, Victor Escorcia, Bernard Ghanem, and Juan Carlos Niebles. 2015. Activitynet: A large-scale video benchmark for human activity understanding. In CVPR. 961--970."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Joao Carreira and Andrew Zisserman. 2017. Quo vadis action recognition? a new model and the kinetics dataset. In CVPR. 6299--6308.","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Yu-Wei Chao Sudheendra Vijayanarasimhan Bryan Seybold David A Ross Jia Deng and Rahul Sukthankar. 2018. Rethinking the faster r-cnn architecture for temporal action localization. In CVPR. 1130--1139.","DOI":"10.1109\/CVPR.2018.00124"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.177"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Christoph Feichtenhofer Axel Pinz and Andrew Zisserman. 2016. Convolutional two-stream network fusion for video action recognition. In CVPR. 1933--1941.","DOI":"10.1109\/CVPR.2016.213"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913491297"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/66926.66962"},{"key":"e_1_3_2_1_12_1","unstructured":"Ronald A Howard. 1960. Dynamic programming and markov processes. (1960)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.59"},{"key":"e_1_3_2_1_14_1","unstructured":"Y.-G. Jiang J. Liu A. Roshan Zamir G. Toderici I. Laptev M. Shah and R. Sukthankar. 2014. THUMOS Challenge: Action Recognition with a Large Number of Classes. http:\/\/crcv.ucf.edu\/THUMOS14\/."},{"key":"e_1_3_2_1_15_1","first-page":"533","article-title":"BlazeIt: optimizing declarative aggregation and limit queries for neural network-based video analytics","volume":"13","author":"Kang Daniel","year":"2019","unstructured":"Daniel Kang, Peter Bailis, and Matei Zaharia. 2019. BlazeIt: optimizing declarative aggregation and limit queries for neural network-based video analytics. VLDB 13, 4 (2019), 533--546.","journal-title":"VLDB"},{"key":"e_1_3_2_1_16_1","first-page":"1586","article-title":"NoScope: Optimizing Deep CNN-Based Queries over Video Streams at Scale","volume":"10","author":"Kang Daniel","year":"2017","unstructured":"Daniel Kang, John Emmons, Firas Abuzaid, Peter Bailis, and Matei Zaharia. 2017. NoScope: Optimizing Deep CNN-Based Queries over Video Streams at Scale. VLDB 10, 11 (2017), 1586--1597.","journal-title":"VLDB"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Andrej Karpathy George Toderici Sanketh Shetty Thomas Leung Rahul Sukthankar and Li Fei-Fei. 2014. Large-scale video classification with convolutional neural networks. In CVPR. 1725--1732.","DOI":"10.1109\/CVPR.2014.223"},{"key":"e_1_3_2_1_18_1","volume-title":"Ffnet: Video fast-forwarding via reinforcement learning. In CVPR. 6771--6780.","author":"Lan Shuyue","year":"2018","unstructured":"Shuyue Lan, Rameswar Panda, Qi Zhu, and Amit K Roy-Chowdhury. 2018. Ffnet: Video fast-forwarding via reinforcement learning. In CVPR. 6771--6780."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.1999.790410"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Yao Lu Aakanksha Chowdhery Srikanth Kandula and Surajit Chaudhuri. 2018. Accelerating machine learning inference with probabilistic predicates. In SIGMOD. 1493--1508.","DOI":"10.1145\/3183713.3183751"},{"key":"e_1_3_2_1_21_1","volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, and Martin Riedmiller. 2013. Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)."},{"key":"e_1_3_2_1_22_1","volume-title":"Leandro Soriano Marcolino, and Erickson Nascimento","author":"Ramos Washington","year":"2020","unstructured":"Washington Ramos, Michel Silva, Edson Araujo, Leandro Soriano Marcolino, and Erickson Nascimento. 2020. Straight to the Point: Fast-forwarding Videos via Reinforcement Learning Using Textual Data. In CVPR. 10931--10940."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Zheng Shou Dongang Wang and Shih-Fu Chang. 2016. Temporal action localization in untrimmed videos via multi-stage cnns. In CVPR. 1049--1058.","DOI":"10.1109\/CVPR.2016.119"},{"key":"e_1_3_2_1_25_1","volume-title":"Two-stream convolutional networks for action recognition in videos. arXiv preprint arXiv:1406.2199","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Two-stream convolutional networks for action recognition in videos. arXiv preprint arXiv:1406.2199 (2014)."},{"key":"e_1_3_2_1_26_1","first-page":"2453","article-title":"ODIN: automated drift detection and recovery in video analytics","volume":"13","author":"Suprem Abhijit","year":"2020","unstructured":"Abhijit Suprem, Joy Arulraj, Calton Pu, and Joao Ferreira. 2020. ODIN: automated drift detection and recovery in video analytics. VLDB 13, 12 (2020), 2453--2465.","journal-title":"VLDB"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Du Tran Heng Wang Lorenzo Torresani Jamie Ray Yann LeCun and Manohar Paluri. 2018. A closer look at spatiotemporal convolutions for action recognition. In CVPR. 6450--6459.","DOI":"10.1109\/CVPR.2018.00675"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2986861"},{"key":"e_1_3_2_1_29_1","unstructured":"Fisher Yu Haofeng Chen Xin Wang Wenqi Xian Yingying Chen Fangchen Liu Vashisht Madhavan and Trevor Darrell. 2020. BDD100K: A diverse driving dataset for heterogeneous multitask learning. In CVPR. 2636--2645."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Kaiyang Zhou Yu Qiao and Tao Xiang. 2018. Deep Reinforcement Learning for Unsupervised Video Summarization With Diversity-Representativeness Reward. In AAAI.","DOI":"10.1609\/aaai.v32i1.12255"}],"event":{"name":"SIGMOD\/PODS '22: International Conference on Management of Data","location":"Philadelphia PA USA","acronym":"SIGMOD\/PODS '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2022 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3526181","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3514221.3526181","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:10:13Z","timestamp":1750183813000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3526181"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,10]]},"references-count":29,"alternative-id":["10.1145\/3514221.3526181","10.1145\/3514221"],"URL":"https:\/\/doi.org\/10.1145\/3514221.3526181","relation":{},"subject":[],"published":{"date-parts":[[2022,6,10]]},"assertion":[{"value":"2022-06-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}