{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T05:24:51Z","timestamp":1730265891188,"version":"3.28.0"},"reference-count":18,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,5]]},"DOI":"10.1109\/ijcnn.2017.7966360","type":"proceedings-article","created":{"date-parts":[[2017,7,10]],"date-time":"2017-07-10T17:41:30Z","timestamp":1499708490000},"page":"3998-4005","source":"Crossref","is-referenced-by-count":1,"title":["Training neural networks with policy gradient"],"prefix":"10.1109","author":[{"given":"Sourabh","family":"Bose","sequence":"first","affiliation":[]},{"given":"Manfred","family":"Huber","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"Deterministic policy gradient algorithms","year":"2014","author":"lever","key":"ref10"},{"key":"ref11","first-page":"1531","article-title":"A natural policy gradient","volume":"14","author":"kakade","year":"2001","journal-title":"NIPS"},{"journal-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1287\/moor.12.2.262"},{"key":"ref15","first-page":"817","article-title":"The epoch-greedy algorithm for multi-armed bandits with side information","author":"langford","year":"2008","journal-title":"Advances in neural information processing systems"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-011-5235-x"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-007-0118-0"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1126\/science.287.5456.1273"},{"journal-title":"Sequential minimal optimization A fast algorithm for training support vector machines","year":"1998","author":"platt","key":"ref4"},{"key":"ref3","volume":"3","author":"shor","year":"2012","journal-title":"Minimization Methods for Non-Differentiable Functions"},{"key":"ref6","first-page":"1","article-title":"Sparse autoencoder","volume":"72","author":"ng","year":"2011","journal-title":"Cs294a lecture notes"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015424"},{"key":"ref8","first-page":"1038","article-title":"Generalization in reinforcement learning: Successful examples using sparse coarse coding","author":"sutton","year":"1996","journal-title":"Advances in neural information processing systems"},{"key":"ref7","volume":"1","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref2","first-page":"1116","article-title":"Structured sparse coding via lateral inhibition","author":"szlam","year":"2011","journal-title":"Advances in neural information processing systems"},{"key":"ref1","article-title":"Bench-marking deep reinforcement learning for continuous control","author":"duan","year":"2016","journal-title":"Proc of the International Conference on Machine Learning (ICML)"},{"key":"ref9","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"99","author":"sutton","year":"1999","journal-title":"NIPS"}],"event":{"name":"2017 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2017,5,14]]},"location":"Anchorage, AK, USA","end":{"date-parts":[[2017,5,19]]}},"container-title":["2017 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7958416\/7965814\/07966360.pdf?arnumber=7966360","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,10,2]],"date-time":"2017-10-02T22:43:50Z","timestamp":1506984230000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7966360\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5]]},"references-count":18,"URL":"https:\/\/doi.org\/10.1109\/ijcnn.2017.7966360","relation":{},"subject":[],"published":{"date-parts":[[2017,5]]}}}