{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T18:49:35Z","timestamp":1771699775552,"version":"3.50.1"},"reference-count":40,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"7","license":[{"start":{"date-parts":[[2024,7,1]],"date-time":"2024-07-01T00:00:00Z","timestamp":1719792000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,7,1]],"date-time":"2024-07-01T00:00:00Z","timestamp":1719792000000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,7,1]],"date-time":"2024-07-01T00:00:00Z","timestamp":1719792000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,7,1]],"date-time":"2024-07-01T00:00:00Z","timestamp":1719792000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1849231"],"award-info":[{"award-number":["1849231"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Artif. Intell."],"published-print":{"date-parts":[[2024,7]]},"DOI":"10.1109\/tai.2024.3363122","type":"journal-article","created":{"date-parts":[[2024,2,16]],"date-time":"2024-02-16T14:28:19Z","timestamp":1708093699000},"page":"3350-3361","source":"Crossref","is-referenced-by-count":5,"title":["Training Value-Aligned Reinforcement Learning Agents Using a Normative Prior"],"prefix":"10.1109","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-8016-806X","authenticated-orcid":false,"given":"Md Sultan","family":"al Nahian","sequence":"first","affiliation":[{"name":"University of Kentucky, Lexington, KY, USA"}]},{"given":"Spencer","family":"Frazier","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Mark","family":"Riedl","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1301-5928","authenticated-orcid":false,"given":"Brent","family":"Harrison","sequence":"additional","affiliation":[{"name":"University of Kentucky, Lexington, KY, USA"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Aligning superintelligence with human interests: A technical research agenda","author":"Soares","year":"2014"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1609\/aimag.v36i4.2577"},{"key":"ref3","article-title":"Value alignment or misalignment\u2014What will keep systems accountable?","volume-title":"Proc. AAAI Workshop, AI, Ethics, Soc.","author":"Arnold","year":"2017"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/MIS.2006.80"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-86144-5_3"},{"key":"ref6","article-title":"Third-person imitation learning","author":"Stadie","year":"2017"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s13218-019-00587-0"},{"key":"ref8","first-page":"4565","article-title":"Generative adversarial imitation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Ho","year":"2016"},{"key":"ref9","first-page":"2760","article-title":"Model-free imitation learning with policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ho","year":"2016"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref11","volume-title":"Human Compatible: Artificial Intelligence and the Problem of Control","author":"Russell","year":"2019"},{"key":"ref12","first-page":"183","article-title":"Learning from stories: Using crowdsourced narratives to train virtual agents","volume-title":"Proc. 12th Artif. Intell. Interactive Digit. Entertainment Conf.","author":"Harrison","year":"2016"},{"key":"ref13","article-title":"Computational narrative intelligence: A human-centered goal for artificial intelligence","author":"Riedl","year":"2016"},{"key":"ref14","first-page":"2625","article-title":"Policy shaping: Integrating human feedback with reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Griffith","year":"2013"},{"key":"ref15","first-page":"3366","article-title":"Policy shaping with human teachers","volume-title":"Proc. 24th Int. Conf. Artif. Intell. (IJCAI)","author":"Cederborg","year":"2015"},{"key":"ref16","first-page":"1040","article-title":"Learning from demonstration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Schaal","year":"1997"},{"key":"ref17","first-page":"3027","article-title":"Showing versus doing: Teaching by demonstration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ho","year":"2016"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33486-3_8"},{"key":"ref19","first-page":"4299","article-title":"Deep reinforcement learning from human preferences","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Christiano","year":"2017"},{"key":"ref20","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Proc. 17th Int. Conf. Mach. Learn. (ICML)","author":"Ng","year":"2000"},{"key":"ref21","first-page":"3916","article-title":"Cooperative inverse reinforcement learning","volume-title":"Proc. 30th Int. Conf. Neural Inf. Process. Syst.","author":"Hadfield-Menell","year":"2016"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1163"},{"key":"ref23","article-title":"Using stories to teach human values to artificial agents","volume-title":"Proc. Workshops 30th AAAI Conf. Artif. Intell.","author":"Riedl","year":"2016"},{"key":"ref24","article-title":"Fine-tuning language models from human preferences","author":"Ziegler","year":"2019"},{"key":"ref25","article-title":"Delphi: Towards machine ethics and norms","author":"Jiang","year":"2021"},{"key":"ref26","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref27","article-title":"Graph constrained reinforcement learning for natural language action spaces","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ammanabrolu","year":"2020"},{"key":"ref28","article-title":"dAIrector: Automatic story beat generation through knowledge synthesis","author":"Eger","year":"2018"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.3758\/s13428-016-0727-z"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6232"},{"key":"ref31","article-title":"Bidirectional LSTM-CRF models for sequence tagging","author":"Huang","year":"2015"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p17-1052"},{"key":"ref33","first-page":"919","article-title":"Semi-supervised convolutional neural networks for text categorization via region embedding","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"28","author":"Johnson","year":"2015"},{"key":"ref34","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"ref35","article-title":"XLNet: Generalized autoregressive pretraining for language understanding","author":"Yang","year":"2019"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1285"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8594312"},{"key":"ref38","article-title":"Explore, exploit or listen: Combining human feedback and policy model to speed up deep reinforcement learning in 3D worlds","author":"Lin","year":"2017"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-24337-1_3"},{"key":"ref40","article-title":"AI safety gridworlds","author":"Leike","year":"2017"}],"container-title":["IEEE Transactions on Artificial Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/9078688\/10599850\/10438928-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9078688\/10599850\/10438928.pdf?arnumber=10438928","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T01:09:07Z","timestamp":1755911347000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10438928\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7]]},"references-count":40,"journal-issue":{"issue":"7"},"URL":"https:\/\/doi.org\/10.1109\/tai.2024.3363122","relation":{},"ISSN":["2691-4581"],"issn-type":[{"value":"2691-4581","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,7]]}}}