{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:35:13Z","timestamp":1778081713293,"version":"3.51.4"},"reference-count":27,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2015,9,1]],"date-time":"2015-09-01T00:00:00Z","timestamp":1441065600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Smart Grid"],"published-print":{"date-parts":[[2015,9]]},"DOI":"10.1109\/tsg.2015.2396993","type":"journal-article","created":{"date-parts":[[2015,2,16]],"date-time":"2015-02-16T19:32:54Z","timestamp":1424115174000},"page":"2312-2324","source":"Crossref","is-referenced-by-count":221,"title":["Optimal Demand Response Using Device-Based Reinforcement Learning"],"prefix":"10.1109","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6987-6463","authenticated-orcid":false,"given":"Zheng","family":"Wen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel","family":"O'Neill","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hamid","family":"Maei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/HICSS.2008.60"},{"key":"ref11","article-title":"Automated critical peak pricing field tests: 2006 pilot program description and results","author":"piette","year":"2007"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/SMARTGRID.2010.5622078"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/SmartGridComm.2012.6485964"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/PSCC.2014.7038106"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ISGTEurope.2012.6465679"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2011.6160649"},{"key":"ref17","author":"sutton","year":"1998","journal-title":"Reinforcement Learning"},{"key":"ref18","author":"bertsekas","year":"1996","journal-title":"Neuro-Dynamic Programming"},{"key":"ref19","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-031-01551-9","author":"szepesv\u00e1ri","year":"2010","journal-title":"Algorithms for Reinforcement Learning"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/59.651628"},{"key":"ref27","article-title":"Learning from delayed rewards","author":"watkins","year":"1989"},{"key":"ref3","doi-asserted-by":"crossref","DOI":"10.2172\/836966","article-title":"A survey of utility experience with real time pricing","author":"barbose","year":"2004"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.energy.2006.01.014"},{"key":"ref5","article-title":"Development and evaluation of fully automated demand response in large facilities","author":"piette","year":"2005"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.tej.2005.04.005"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.enpol.2006.06.019"},{"key":"ref2","article-title":"The role of demand response in electric power market design","author":"braithwait","year":"2002"},{"key":"ref9","article-title":"Architecture concepts and technical issues for an open, interoperable automated demand response infrastructure","author":"koch","year":"0"},{"key":"ref1","article-title":"Dynamic pricing, advanced metering, and demand response in electricity markets","author":"borenstein","year":"2002"},{"key":"ref20","article-title":"Efficient reinforcement learning with value function generalization","author":"wen","year":"2014"},{"key":"ref22","first-page":"53","article-title":"Autonomous helicopter flight using reinforcement learning","author":"coates","year":"2010","journal-title":"Encyclopedia of Machine Learning"},{"key":"ref21","first-page":"3021","article-title":"Efficient exploration and value function generalization in deterministic systems","author":"wen","year":"2013","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1002\/9781118453988.ch25"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.2118\/141677-MS"},{"key":"ref26","article-title":"Generalization and exploration via randomized value functions","author":"van roy","year":"2014","journal-title":"CoRR"},{"key":"ref25","author":"bertsekas","year":"2005","journal-title":"Dynamic Programming and Optimal Control"}],"container-title":["IEEE Transactions on Smart Grid"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5165411\/7210244\/07042790.pdf?arnumber=7042790","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,5]],"date-time":"2023-08-05T22:41:00Z","timestamp":1691275260000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7042790\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,9]]},"references-count":27,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tsg.2015.2396993","relation":{},"ISSN":["1949-3053","1949-3061"],"issn-type":[{"value":"1949-3053","type":"print"},{"value":"1949-3061","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,9]]}}}