{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T18:23:45Z","timestamp":1773512625823,"version":"3.50.1"},"reference-count":126,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001459","name":"Singapore Ministry of Education","doi-asserted-by":"crossref","award":["MOE2017-T3-1-007"],"award-info":[{"award-number":["MOE2017-T3-1-007"]}],"id":[{"id":"10.13039\/501100001459","id-type":"DOI","asserted-by":"crossref"}]},{"name":"National Key Research and Development Program of China","award":["2020YFB1708100"],"award-info":[{"award-number":["2020YFB1708100"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Knowl. Data Eng."],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/tkde.2022.3155196","type":"journal-article","created":{"date-parts":[[2022,3,1]],"date-time":"2022-03-01T20:39:04Z","timestamp":1646167144000},"page":"1-1","source":"Crossref","is-referenced-by-count":30,"title":["A Survey on Deep Reinforcement Learning for Data Processing and Analytics"],"prefix":"10.1109","author":[{"given":"Qingpeng","family":"Cai","sequence":"first","affiliation":[]},{"given":"Can","family":"Cui","sequence":"additional","affiliation":[]},{"given":"Yiyuan","family":"Xiong","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Zhongle","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Meihui","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref1","volume-title":"Big Data: The Next Frontier for Innovation, Competition, and Productivity","author":"Manyika","year":"2011"},{"issue":"1","key":"ref2","first-page":"1235","article-title":"MLlib: Machine learning in apache spark","volume":"17","author":"Meng","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.14778\/3282495.3282499"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3341302.3342080"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3341302.3342221"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/1851275.1851208"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2018.8485947"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.14778\/3352063.3352129"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3299869.3300085"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.14778\/2850583.2850594"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3211954.3211957"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jctc.0c00971.s001"},{"key":"ref13","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389770"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330868"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389704"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-4095-0"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3477600"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2916583"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-57321-8_5"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3003665.3003669"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1126\/science.153.3731.34"},{"key":"ref25","first-page":"1","article-title":"Variance reduction for reinforcement learning in input-driven environments","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Mao"},{"key":"ref26","volume-title":"On-Line Q-Learning Using Connectionist Systems","volume":"37","author":"Rummery","year":"1994"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Mach. Learn."},{"key":"ref28","article-title":"Playing atari with deep reinforcement learning","author":"Mnih","year":"2013"},{"key":"ref29","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. Int. Conf. Neural Informat. Process. Syst.","author":"Sutton"},{"key":"ref30","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Silver"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"issue":"Nov","key":"ref32","first-page":"397","article-title":"Using confidence bounds for exploitation-exploration trade-offs","volume":"3","author":"Auer","year":"2002","journal-title":"J. Mach. Learn. Res."},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.2307\/2332286"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992699"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_2"},{"key":"ref36","article-title":"Prioritized experience replay","author":"Schaul","year":"2015"},{"key":"ref37","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mnih"},{"key":"ref38","article-title":"Emergence of locomotion behaviours in rich environments","author":"Heess","year":"2017"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRev.36.823"},{"key":"ref40","article-title":"Noisy networks for exploration","author":"Fortunato","year":"2017"},{"key":"ref41","article-title":"Parameter space noise for exploration","author":"Plappert","year":"2017"},{"key":"ref42","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref43","article-title":"Deep recurrent Q-learning for partially observable MDPs","author":"Hausknecht","year":"2015"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref46","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman"},{"key":"ref47","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref48","article-title":"Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation","author":"Kulkarni","year":"2016"},{"key":"ref49","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ng"},{"key":"ref50","article-title":"Generative adversarial imitation learning","author":"Ho","year":"2016"},{"key":"ref51","article-title":"Dota 2 with large scale deep reinforcement learning","author":"Berner","year":"2019"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/3211954.3211956"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-30278-8_16"},{"key":"ref54","first-page":"1262","article-title":"Lachesis: Automated generation of persistent partitionings for big data applications","volume-title":"Proc. VLDB Endowment","volume":"14","author":"Zou"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE48307.2020.00119"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.14778\/3184470.3184474"},{"key":"ref57","article-title":"The case for automatic database administration using deep reinforcement learning","author":"Sharma","year":"2018"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/3588917"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE48307.2020.00116"},{"key":"ref60","article-title":"Join query optimization with deep reinforcement learning algorithms","author":"Heitz","year":"2019"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE48307.2020.00133"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.14778\/1687553.1687609"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/1376616.1376726"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1145\/2723372.2742797"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/GLOBECOM42002.2020.9322560"},{"key":"ref67","first-page":"1","article-title":"SageDB: A learned database system","volume-title":"Proc. Conf. Innov. Data Syst. Res.","author":"Kraska"},{"key":"ref68","first-page":"629","article-title":"Inductive-bias-driven reinforcement learning for efficient schedules in heterogeneous clusters","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Banerjee"},{"key":"ref69","first-page":"1676","article-title":"Spotlight: Optimizing device placement for training deep neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Gao"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00072"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3064029"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1145\/3410566.3410603"},{"key":"ref73","article-title":"Learning index selection with structured action spaces","author":"Welborn","year":"2019"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412106"},{"issue":"2","key":"ref75","doi-asserted-by":"crossref","first-page":"367","DOI":"10.1145\/276305.276337","article-title":"Autoadmin \u201cwhat-if","volume":"27","author":"Chaudhuri","year":"1998","journal-title":"ACM SIGMOD Rec."},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3196909"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2889473"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1145\/1963405.1963487"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.14778\/3476249.3476255"},{"key":"ref80","article-title":"Towards similarity graphs constructed by deep reinforcement learning","author":"Baranchuk","year":"2019"},{"issue":"2","key":"ref81","first-page":"46","article-title":"Learning data structure alchemy","volume":"42","author":"Idreos","year":"2019","journal-title":"Bull. IEEE Comput. Soc. Tech. Committee Data Eng."},{"key":"ref82","article-title":"Neural architecture search with reinforcement learning","author":"Zoph","year":"2016"},{"key":"ref83","article-title":"Progressive neural index search for database system","author":"Wu","year":"2019"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1145\/3299869.3300088"},{"key":"ref85","article-title":"A reinforcement learning approach for adaptive query processing","author":"Tzoumas","journal-title":"DB Technical Report"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1007\/11871842_29"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3452838"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2014.6816678"},{"key":"ref89","article-title":"Opportunistic view materialization with deep reinforcement learning","author":"Liang","year":"2019"},{"key":"ref90","article-title":"Phoebe: Reuse-aware online caching with reinforcement learning for emerging storage models","author":"Wu","year":"2020"},{"key":"ref91","first-page":"1","article-title":"Learning caching policies with subsampling","volume-title":"Proc. NeurIPS Mach. Learn. Syst. Workshop","author":"Wang"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1245"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313517"},{"key":"ref94","article-title":"Seq2SQL: Generating structured queries from natural language using reinforcement learning","author":"Zhong"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1004"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389779"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330932"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403261"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219961"},{"key":"ref100","first-page":"271","article-title":"Diagnostic inferencing via improving clinical concept extraction with deep reinforcement learning: A preliminary study","volume-title":"Proc. 2nd Mach. Learn. Healthcare Conf.","author":"Ling"},{"key":"ref101","first-page":"2848","article-title":"Student-teacher curriculum learning via reinforcement learning: Predicting hospital inpatient admission location","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"El-Bouri"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.3543852"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2018.07.056"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1080\/14697688.2015.1011684"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219918"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220122"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313455"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403128"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403135"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330933"},{"key":"ref111","article-title":"SQLNet: Generating structured queries from natural language without reinforcement learning","author":"Xu","year":"2017"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1261"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467417"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-58280-1_2"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1080\/14697688.2019.1622302"},{"key":"ref116","article-title":"Machine learning in finance: From theory to practice","author":"Matthew","year":"2021"},{"key":"ref117","first-page":"19","article-title":"Nonlinear inverse reinforcement learning with gaussian processes","volume-title":"Proc. Int. Conf. Neural Informat. Process. Syst.","author":"Levine"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3186165"},{"key":"ref119","first-page":"2494","article-title":"Park: An open platform for learning-augmented computer systems","volume-title":"Proc. Int. Conf. Neural Informat. Process. Syst.","author":"Mao"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE51399.2021.00146"},{"key":"ref122","first-page":"6412","article-title":"Graph convolutional policy network for goal-directed molecular graph generation","volume-title":"Proc. Int. Conf. Neural Informat. Process. Syst.","author":"You"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2021.3058954"},{"key":"ref124","first-page":"198","article-title":"Polyjuice: High-performance transactions via learned concurrency control","volume-title":"Proc. 15th USENIX Symp. Oper. Syst. Des. Implementation","author":"Wang"},{"key":"ref125","article-title":"Assessing generalization in deep reinforcement learning","author":"Packer","year":"2018"},{"key":"ref126","doi-asserted-by":"crossref","DOI":"10.1016\/j.artint.2021.103535","article-title":"Reward is enough","volume":"299","author":"Silver","year":"2021","journal-title":"Artif. Intell."}],"container-title":["IEEE Transactions on Knowledge and Data Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/69\/4358933\/09723570.pdf?arnumber=9723570","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,17]],"date-time":"2024-01-17T23:17:23Z","timestamp":1705533443000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9723570\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":126,"URL":"https:\/\/doi.org\/10.1109\/tkde.2022.3155196","relation":{},"ISSN":["1041-4347","1558-2191","2326-3865"],"issn-type":[{"value":"1041-4347","type":"print"},{"value":"1558-2191","type":"electronic"},{"value":"2326-3865","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]}}}