{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T07:55:39Z","timestamp":1781510139330,"version":"3.54.1"},"reference-count":30,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100008778","name":"University of Science and Technology Beijing","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100008778","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002914","name":"Concordia University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002914","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,10]]},"DOI":"10.1016\/j.neucom.2026.134132","type":"journal-article","created":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T16:18:38Z","timestamp":1780503518000},"page":"134132","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Hybrid energy-aware reward shaping: A unified lightweight physics-guided methodology for policy optimization"],"prefix":"10.1016","volume":"697","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-2654-0068","authenticated-orcid":false,"given":"Qijun","family":"Liao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jue","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yiting","family":"Kang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xinxin","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yong","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mingan","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.134132_bib0005","series-title":"Proc. Int. Conf. Mach. Learn","first-page":"1861","article-title":"Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018"},{"key":"10.1016\/j.neucom.2026.134132_bib0010","series-title":"Proc. 36th Int. Conf. Mach. Learn","first-page":"1282","article-title":"Quantifying generalization in reinforcement learning","author":"Cobbe","year":"2019"},{"key":"10.1016\/j.neucom.2026.134132_bib0015","series-title":"Proc. Int. Conf. Learn. Represent","article-title":"Deep Lagrangian networks: using physics as model prior for deep learning","author":"Lutter","year":"2019"},{"key":"10.1016\/j.neucom.2026.134132_bib0020","series-title":"Proc. Adv. Neural Inf. Process. Syst","first-page":"15379","article-title":"Hamiltonian neural networks","author":"Greydanus","year":"2019"},{"key":"10.1016\/j.neucom.2026.134132_bib0025","series-title":"Proc. Int. Conf. Learn. Represent","article-title":"Learning invariant representations for reinforcement learning without reconstruction","author":"Zhang","year":"2021"},{"key":"10.1016\/j.neucom.2026.134132_bib0030","series-title":"Proc. 16th Int. Conf","first-page":"278","article-title":"Policy invariance under reward transformations: theory and application to reward shaping","author":"Ng","year":"1999"},{"issue":"12","key":"10.1016\/j.neucom.2026.134132_bib0035","doi-asserted-by":"crossref","first-page":"2233","DOI":"10.1109\/JAS.2023.123477","article-title":"Magnetic field-based reward shaping for goal-conditioned reinforcement learning","volume":"10","author":"Ding","year":"2023","journal-title":"IEEE\/CAA J. Autom. Sinica"},{"key":"10.1016\/j.neucom.2026.134132_bib0040","series-title":"Proc. 20th Int. Conf. Mach. Learn","first-page":"792","article-title":"Principled methods for advising reinforcement learning agents","author":"Wiewiora","year":"2003"},{"key":"10.1016\/j.neucom.2026.134132_bib0045","series-title":"Proc. 11th Int. Conf. Auton. Agents Multiagent Syst","first-page":"433","article-title":"Dynamic potential-based reward shaping","author":"Devlin","year":"2012"},{"key":"10.1016\/j.neucom.2026.134132_bib0050","series-title":"Proc. 35th AAAI Conf. Artif","first-page":"11210","article-title":"Learning task-distribution reward shaping with meta-learning","author":"Zou","year":"2021"},{"key":"10.1016\/j.neucom.2026.134132_bib0055","series-title":"Proc. 21st Int. Conf. Mach. Learn","first-page":"1","article-title":"Apprenticeship learning via inverse reinforcement learning","author":"Abbeel","year":"2004"},{"issue":"4","key":"10.1016\/j.neucom.2026.134132_bib0060","doi-asserted-by":"crossref","first-page":"541","DOI":"10.1016\/j.neunet.2010.01.001","article-title":"Online learning of shaping rewards in reinforcement learning","volume":"23","author":"Grzes","year":"2010","journal-title":"Neural Netw."},{"key":"10.1016\/j.neucom.2026.134132_bib0065","series-title":"Proc. 29th AAAI Conf. Artif. Intell","first-page":"2652","article-title":"Expressing arbitrary reward functions as potential-based advice","author":"Harutyunyan","year":"2015"},{"key":"10.1016\/j.neucom.2026.134132_bib0070","series-title":"Proc. 14th Int. Conf. Auton. Agents Multiagent Syst","first-page":"181","article-title":"Policy transfer using reward shaping","author":"Brys","year":"2015"},{"key":"10.1016\/j.neucom.2026.134132_bib0075","series-title":"Proc. ICLR Workshop Deep Learn. Phys. Sci","article-title":"Lagrangian neural networks","author":"Cranmer","year":"2020"},{"key":"10.1016\/j.neucom.2026.134132_bib0080","series-title":"Proc. Adv. Neural Inf. Process. Syst","first-page":"4502","article-title":"Interaction networks for learning about objects, relations and physics","author":"Battaglia","year":"2016"},{"key":"10.1016\/j.neucom.2026.134132_bib0085","series-title":"Proc. Int. Conf. Mach. Learn","first-page":"8459","article-title":"Learning to simulate complex physics with graph networks","author":"Sanchez-Gonzalez","year":"2020"},{"key":"10.1016\/j.neucom.2026.134132_bib0090","series-title":"Proc. Adv. Neural Inf. Process. Syst","first-page":"6571","article-title":"Neural ordinary differential equations","author":"Chen","year":"2018"},{"issue":"2","key":"10.1016\/j.neucom.2026.134132_bib0095","doi-asserted-by":"crossref","first-page":"617","DOI":"10.1109\/JAS.2020.1003072","article-title":"Parallel reinforcement learning-based energy efficiency improvement for a cyber-physical system","volume":"7","author":"Liu","year":"2020","journal-title":"IEEE\/CAA J. Autom. Sinica"},{"key":"10.1016\/j.neucom.2026.134132_bib0100","series-title":"Proc. 35th Int. Conf. Mach. Learn","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"Fujimoto","year":"2018"},{"key":"10.1016\/j.neucom.2026.134132_bib0105","author":"Schulman"},{"key":"10.1016\/j.neucom.2026.134132_bib0110","author":"Haarnoja"},{"issue":"2","key":"10.1016\/j.neucom.2026.134132_bib0115","doi-asserted-by":"crossref","first-page":"315","DOI":"10.1109\/JAS.2020.1003021","article-title":"Artificial intelligence applications in the development of autonomous vehicles: a survey","volume":"7","author":"Ma","year":"2020","journal-title":"IEEE\/CAA J. Autom. Sinica"},{"issue":"3","key":"10.1016\/j.neucom.2026.134132_bib0120","doi-asserted-by":"crossref","first-page":"328","DOI":"10.1007\/s42154-021-00151-3","article-title":"End-to-end autonomous driving through dueling double deep Q-Network","volume":"4","author":"Peng","year":"2021","journal-title":"Automot. Innov."},{"key":"10.1016\/j.neucom.2026.134132_bib0125","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1146\/annurev-control-090419-075625","article-title":"Learning-based model predictive control: toward safe learning in control","volume":"3","author":"Hewing","year":"2020","journal-title":"Annu. Rev. Control Robot. Auton. Syst."},{"issue":"28","key":"10.1016\/j.neucom.2026.134132_bib0130","doi-asserted-by":"crossref","DOI":"10.1126\/scirobotics.aaw1975","article-title":"Neural network vehicle models for high-performance automated driving","volume":"4","author":"Spielberg","year":"2019","journal-title":"Sci. Robot."},{"issue":"7","key":"10.1016\/j.neucom.2026.134132_bib0135","doi-asserted-by":"crossref","first-page":"1883","DOI":"10.1109\/TAC.2017.2753460","article-title":"Learning model predictive control for iterative tasks. A data-driven control framework","volume":"63","author":"Rosolia","year":"2018","journal-title":"IEEE Trans. Autom. Control"},{"issue":"5","key":"10.1016\/j.neucom.2026.134132_bib0140","doi-asserted-by":"crossref","first-page":"628","DOI":"10.1002\/oca.2123","article-title":"Optimization-based autonomous racing of 1:43 scale RC cars","volume":"36","author":"Liniger","year":"2015","journal-title":"Optim. Control Appl. Methods"},{"issue":"6","key":"10.1016\/j.neucom.2026.134132_bib0145","doi-asserted-by":"crossref","first-page":"4909","DOI":"10.1109\/TITS.2021.3054625","article-title":"Deep reinforcement learning for autonomous driving: a survey","volume":"23","author":"Kiran","year":"2022","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.neucom.2026.134132_bib0150","series-title":"Proc. 32nd AAAI Conf. Artif","first-page":"3207","article-title":"Deep reinforcement learning that matters","author":"Henderson","year":"2018"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226015304?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226015304?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T07:44:49Z","timestamp":1781509489000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231226015304"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,10]]},"references-count":30,"alternative-id":["S0925231226015304"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.134132","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,10]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Hybrid energy-aware reward shaping: A unified lightweight physics-guided methodology for policy optimization","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.134132","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"134132"}}