{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:37:32Z","timestamp":1776886652949,"version":"3.51.2"},"reference-count":19,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,6]]},"DOI":"10.1109\/icassp49660.2025.10890751","type":"proceedings-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T17:15:19Z","timestamp":1741799719000},"page":"1-5","source":"Crossref","is-referenced-by-count":2,"title":["Improved Techniques for Offline Reinforcement Learning: Advantage Value Estimation and Layernorm"],"prefix":"10.1109","author":[{"given":"Xiaosong","family":"Liu","sequence":"first","affiliation":[{"name":"Soochow University,Computer Science and Technology,Soochow,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Quan","family":"Liu","sequence":"additional","affiliation":[{"name":"Soochow University,Computer Science and Technology,Soochow,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lan","family":"Wu","sequence":"additional","affiliation":[{"name":"Soochow University,Computer Science and Technology,Soochow,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eng.2022.05.017"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.3390\/s23073762"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3250269"},{"key":"ref4","article-title":"Reflexion: Language agents with verbal reinforcement learning","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Shinn"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1142\/S2301385023310027"},{"key":"ref6","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"International Conference on machine learning","author":"Fujimoto"},{"key":"ref7","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume-title":"Advances in neural information processing systems","volume":"34","author":"Fujimoto"},{"key":"ref8","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"International Conference on machine learning","author":"Fujimoto"},{"key":"ref9","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Kumar"},{"key":"ref10","article-title":"Offline reinforcement learning with implicit q-learning","author":"Kostrikov","year":"2021"},{"key":"ref11","article-title":"Pessimistic bootstrapping for uncertainty-driven offline reinforcement learning","author":"Bai","year":"2022"},{"key":"ref12","first-page":"1711","article-title":"Mildly conservative q-learning for offline reinforcement learning","volume-title":"Advances in Neural Information Processing Systems","volume":"35","author":"Lyu"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref14","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref16","first-page":"1577","article-title":"Efficient online reinforcement learning with offline data","volume-title":"International Conference on Machine Learning","author":"Ball"},{"key":"ref17","first-page":"2207","article-title":"A statistical analysis of polyak-ruppert averaged q-learning","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Li"},{"key":"ref18","article-title":"CORL: Research-oriented deep offline reinforcement learning library","volume-title":"3rd Offline RL Workshop: Offline RL as a \u201cLaunchpad\u201d","author":"Tarasov"},{"key":"ref19","article-title":"D4rl: Datasets for deep data-driven reinforcement learning","author":"Fu","year":"2020"}],"event":{"name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Hyderabad, India","start":{"date-parts":[[2025,4,6]]},"end":{"date-parts":[[2025,4,11]]}},"container-title":["ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10887540\/10887541\/10890751.pdf?arnumber=10890751","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T05:21:45Z","timestamp":1774416105000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10890751\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,6]]},"references-count":19,"URL":"https:\/\/doi.org\/10.1109\/icassp49660.2025.10890751","relation":{},"subject":[],"published":{"date-parts":[[2025,4,6]]}}}