{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T16:46:26Z","timestamp":1779900386503,"version":"3.53.1"},"reference-count":47,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62072185"],"award-info":[{"award-number":["62072185"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1711262"],"award-info":[{"award-number":["U1711262"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Knowl. Data Eng."],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1109\/tkde.2025.3528219","type":"journal-article","created":{"date-parts":[[2025,1,10]],"date-time":"2025-01-10T20:53:45Z","timestamp":1736542425000},"page":"1542-1555","source":"Crossref","is-referenced-by-count":2,"title":["Adversarial Conservative Alternating Q-Learning for Credit Card Debt Collection"],"prefix":"10.1109","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-7766-8802","authenticated-orcid":false,"given":"Wenhui","family":"Liu","sequence":"first","affiliation":[{"name":"School of Data Science and Engineering, East China Normal University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5957-1661","authenticated-orcid":false,"given":"Jiapeng","family":"Zhu","sequence":"additional","affiliation":[{"name":"School of Data Science and Engineering, East China Normal University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9612-5024","authenticated-orcid":false,"given":"Lyu","family":"Ni","sequence":"additional","affiliation":[{"name":"School of Data Science and Engineering, East China Normal University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5318-4911","authenticated-orcid":false,"given":"Jingyu","family":"Bi","sequence":"additional","affiliation":[{"name":"School of Data Science and Engineering, East China Normal University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3040-3287","authenticated-orcid":false,"given":"Zhijian","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Data Science and Engineering, East China Normal University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiajie","family":"Long","sequence":"additional","affiliation":[{"name":"Bank of Shanghai, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mengyao","family":"Gao","sequence":"additional","affiliation":[{"name":"Bank of Shanghai, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0144-7344","authenticated-orcid":false,"given":"Dingjiang","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Data Science and Engineering, East China Normal University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1949-2768","authenticated-orcid":false,"given":"Shuigeng","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Computer Science, Shanghai Key Lab of Intelligent Information Processing, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i11.21528"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2020.3025588"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380159"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE55515.2023.00268"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref9","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref10","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2023.3302804"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3579355"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3115477"},{"key":"ref14","article-title":"AWAC: Accelerating online reinforcement learning with offline datasets","author":"Nair","year":"2020"},{"key":"ref15","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Kumar"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM50108.2020.00016"},{"issue":"93","key":"ref17","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10462-023-10620-2","article-title":"Deep reinforcement learning based on balanced stratified prioritized experience replay for customer credit scoring in peer-to-peer lending","volume":"57","author":"Wang","year":"2024","journal-title":"Artif. Intell. Rev."},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.117013"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/639"},{"key":"ref20","first-page":"11 194","article-title":"GradientDICE: Rethinking generalized offline estimation of stationary values","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang"},{"key":"ref21","first-page":"2318","article-title":"Dualdice: Behavior-agnostic estimation of discounted stationary distribution corrections","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Nachum"},{"key":"ref22","first-page":"11784","article-title":"Stabilizing off-policy Q-learning via bootstrapping error reduction","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Kumar"},{"key":"ref23","article-title":"Behavior regularized offline reinforcement learning","author":"Wu","year":"2019"},{"key":"ref24","first-page":"1711","article-title":"Mildly conservative Q-learning for offline reinforcement learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Lyu"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2024.3385910"},{"key":"ref26","first-page":"40 587","article-title":"Supported value regularization for offline reinforcement learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Mao"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3285029"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2020.3012346"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"ref30","article-title":"Deep reinforcement learning based recommendation with explicit user-item interactions modeling","author":"Liu","year":"2018"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014902"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i5.16580"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2022.3222070"},{"key":"ref38","article-title":"Prioritized experience replay","author":"Schaul","year":"2015"},{"key":"ref39","article-title":"Gaussian error linear units (GELUs)","author":"Hendrycks","year":"2016"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/3530811"},{"key":"ref44","article-title":"Soft actor-critic for discrete action settings","author":"Christodoulou","year":"2019"},{"key":"ref45","first-page":"4033","article-title":"Deep exploration via bootstrapped DQN","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Osband"},{"key":"ref46","first-page":"104","article-title":"An optimistic perspective on offline reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Agarwal"},{"key":"ref47","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1037\/h0037350"}],"container-title":["IEEE Transactions on Knowledge and Data Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/69\/10918320\/10836919.pdf?arnumber=10836919","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,8]],"date-time":"2025-03-08T08:30:36Z","timestamp":1741422636000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10836919\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4]]},"references-count":47,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tkde.2025.3528219","relation":{},"ISSN":["1041-4347","1558-2191","2326-3865"],"issn-type":[{"value":"1041-4347","type":"print"},{"value":"1558-2191","type":"electronic"},{"value":"2326-3865","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4]]}}}