{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T18:44:17Z","timestamp":1754160257821,"version":"3.41.2"},"reference-count":40,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/access.2025.3588936","type":"journal-article","created":{"date-parts":[[2025,7,15]],"date-time":"2025-07-15T17:43:25Z","timestamp":1752601405000},"page":"125759-125771","source":"Crossref","is-referenced-by-count":0,"title":["TEMPO: Timestep Explanations for Modeling Preferences in Online Preference-Based RL"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-5647-1378","authenticated-orcid":false,"given":"Jakob","family":"Karlaus","sequence":"first","affiliation":[{"name":"Institute of Artificial Intelligence, Ulm University, Ulm, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5118-0812","authenticated-orcid":false,"given":"Friedhelm","family":"Schwenker","sequence":"additional","affiliation":[{"name":"Institute of Neuroinformatics, Ulm University, Ulm, Germany"}]}],"member":"263","reference":[{"volume-title":"Reinforcement Learning: An Introduction","year":"2018","author":"Sutton","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1609\/aimag.v35i4.2513"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i5.25733"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2022.05.014"},{"article-title":"Deep reinforcement learning from human preferences","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Christiano","key":"ref5"},{"key":"ref6","first-page":"519","article-title":"Batch active preference-based learning of reward functions","volume-title":"Proc. 2nd Conf. Robot Learn.","author":"Biyik"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-012-5313-8"},{"key":"ref8","article-title":"A survey of reinforcement learning from human Feedback","author":"Kaufmann","year":"2023","journal-title":"arXiv:2312.14925"},{"article-title":"Advancing human\u2013robot collaboration: The impact of flexible input mechanisms","volume-title":"Proc. Mech. Mapping Hum. Input Robots Robot Learn. Shared Control\/Autonomy-Workshop RSS","author":"Beierling","key":"ref9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s10648-010-9127-6"},{"article-title":"Deep inside convolutional networks: Visualising image classification models and saliency maps","volume-title":"Proc. Workshop Int. Conf. Learn. Represent.","author":"Simonyan","key":"ref11"},{"key":"ref12","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"ArXiv:1707.06347"},{"key":"ref13","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Haarnoja"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.2307\/2334029"},{"key":"ref15","first-page":"6152","article-title":"PEBBLE: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lee"},{"article-title":"Reward uncertainty for exploration in preference-based reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Liang","key":"ref16"},{"article-title":"SURF: Semi-supervised reward learning with data augmentation for Feedback-efficient preference-based reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Park","key":"ref17"},{"article-title":"SmoothGrad: Removing noise by adding noise","volume-title":"Proc. Workshop Visualizat. Deep Learn.","author":"Smilkov","key":"ref18"},{"key":"ref19","first-page":"3319","article-title":"Axiomatic attribution for deep networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sundararajan"},{"key":"ref20","first-page":"26726","article-title":"Improving deep learning interpretability by saliency guided training","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ismail"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"620","DOI":"10.1038\/s42256-021-00343-w","article-title":"Learning explainable models using attribution priors","volume":"3","author":"Erion","year":"2019","journal-title":"Nature Mach. Intell."},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/371"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610505"},{"key":"ref24","first-page":"21885","article-title":"Widening the pipeline in human-guided reinforcement learning with explanation and context-aware data augmentation","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Guan"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801388"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/53"},{"key":"ref27","first-page":"1109","article-title":"Efficiently guiding imitation learning algorithms with human Gaze","volume-title":"Proc. Int. Conf. Auto. Agents Multiagent Syst. (AAMAS)","author":"Saran"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICDL53763.2022.9962222"},{"key":"ref29","first-page":"132","article-title":"Learning from richer human guidance: Augmenting comparison-based learning with feature queries","volume-title":"Proc. 13th ACM\/IEEE Int. Conf. Hum.-Robot Interact. (HRI)","author":"Basu"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.3233\/faia230345"},{"key":"ref31","first-page":"59008","article-title":"Fine-grained human Feedback gives better rewards for language model training","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wu"},{"article-title":"RLHF-blender: A configurable interactive interface for learning from diverse human Feedback","volume-title":"Proc. Interact. Learn. Implicit Human Feedback Workshop ICML","author":"Metz","key":"ref32"},{"key":"ref33","article-title":"Open problems and fundamental limitations of reinforcement learning from human Feedback","author":"Casper","year":"2023","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref34","first-page":"1899","article-title":"Graying the black box: Understanding DQNs","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","author":"Zahavy"},{"key":"ref35","first-page":"1792","article-title":"Visualizing and understanding atari agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Greydanus"},{"article-title":"B-pref: Benchmarking preference-based reinforcement learning","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst. Datasets Benchmarks Track","author":"Lee","key":"ref36"},{"key":"ref37","first-page":"4694","article-title":"When does label smoothing help","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"M\u00fcller"},{"key":"ref38","first-page":"29304","article-title":"Deep reinforcement learning at the edge of the statistical precipice","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Agarwal"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/DEVLRN.2008.4640845"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-3020"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10820123\/11080378.pdf?arnumber=11080378","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,26]],"date-time":"2025-07-26T06:31:54Z","timestamp":1753511514000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11080378\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/access.2025.3588936","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2025]]}}}