{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,24]],"date-time":"2026-07-24T15:24:39Z","timestamp":1784906679497,"version":"3.55.0"},"reference-count":37,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2019,7,1]],"date-time":"2019-07-01T00:00:00Z","timestamp":1561939200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,7,1]],"date-time":"2019-07-01T00:00:00Z","timestamp":1561939200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,7,1]],"date-time":"2019-07-01T00:00:00Z","timestamp":1561939200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"NL Enterprise Agency through the TKI SG-BEMS project of Dutch Top Sector"},{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["687283"],"award-info":[{"award-number":["687283"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Smart Grid"],"published-print":{"date-parts":[[2019,7]]},"DOI":"10.1109\/tsg.2018.2834219","type":"journal-article","created":{"date-parts":[[2018,5,8]],"date-time":"2018-05-08T18:51:29Z","timestamp":1525805489000},"page":"3698-3708","source":"Crossref","is-referenced-by-count":534,"title":["On-Line Building Energy Optimization Using Deep Reinforcement Learning"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0856-579X","authenticated-orcid":false,"given":"Elena","family":"Mocanu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5636-7683","authenticated-orcid":false,"given":"Decebal Constantin","family":"Mocanu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Phuong H.","family":"Nguyen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2773-4421","authenticated-orcid":false,"given":"Antonio","family":"Liotta","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Michael E.","family":"Webber","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Madeleine","family":"Gibescu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1593-1839","authenticated-orcid":false,"given":"J. G.","family":"Slootweg","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2017.2703842"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2016.11.111"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ISGTEurope.2017.8260289"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/MIE.2016.2615575"},{"key":"ref37","first-page":"1206","article-title":"Online multi-task learning for policy gradient methods","author":"bou-ammar","year":"2014","journal-title":"Proc Int Conf Mach Learn (ICML)"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2008.02.003"},{"key":"ref35","article-title":"High-dimensional continuous control using generalized advantage estimation","volume":"abs 1506 2438","author":"schulman","year":"2015","journal-title":"CoRR"},{"key":"ref34","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume":"48","author":"mnih","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2008.2007630"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/SMARTGRID.2010.5622078"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2015.2495145"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/PSCC.2014.7038106"},{"key":"ref14","first-page":"2561","article-title":"Batch reinforcement learning for smart home energy management","author":"berlink","year":"2015","journal-title":"Proc Int Conf Artif Intell (IJCAI)"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref16","article-title":"Deep reinforcement learning solutions for energy microgrids management","author":"fran\u00e7ois-lavet","year":"2016","journal-title":"Proceedings of the 8th European Workshop on Reinforcement Learning (EWRL)"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2016.01.030"},{"key":"ref18","article-title":"Measuring short-term air conditioner demand reductions for operations and settlement","author":"bode","year":"2012"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1561\/2400000002"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.3390\/en10010003"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2010.2055903"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/PSCC.2016.7540994"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2015.2402518"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2014.2341586"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/IECON.2016.7793413"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2015.2512501"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.5220\/0005444101570166"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.3390\/en7095787"},{"key":"ref9","author":"sutton","year":"1998","journal-title":"Introduction to Reinforcement Learning"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/2897165"},{"key":"ref20","first-page":"315","article-title":"Deep sparse rectifier neural networks","volume":"15","author":"glorot","year":"2011","journal-title":"Proc 14th Int Conf Artif Intell Statist (AISTATS)"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638312"},{"key":"ref21","first-page":"3","article-title":"Rectifier nonlinearities improve neural network acoustic models","author":"maas","year":"2013","journal-title":"Proc ICML"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/PMAPS.2014.6960635"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2017.2686012"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.segan.2016.02.005"}],"container-title":["IEEE Transactions on Smart Grid"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5165411\/8741221\/08356086.pdf?arnumber=8356086","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,13]],"date-time":"2022-07-13T20:58:06Z","timestamp":1657745886000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8356086\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,7]]},"references-count":37,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tsg.2018.2834219","relation":{},"ISSN":["1949-3053","1949-3061"],"issn-type":[{"value":"1949-3053","type":"print"},{"value":"1949-3061","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,7]]}}}