{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T10:09:50Z","timestamp":1767262190450,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,4,16]],"date-time":"2019-04-16T00:00:00Z","timestamp":1555372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,4,16]]},"DOI":"10.1145\/3302509.3311053","type":"proceedings-article","created":{"date-parts":[[2019,4,4]],"date-time":"2019-04-04T18:38:43Z","timestamp":1554403123000},"page":"237-248","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":29,"title":["Reduced variance deep reinforcement learning with temporal logic specifications"],"prefix":"10.1145","author":[{"given":"Qitong","family":"Gao","sequence":"first","affiliation":[{"name":"Duke University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Davood","family":"Hajinezhad","sequence":"additional","affiliation":[{"name":"Duke University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Duke University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiannis","family":"Kantaros","sequence":"additional","affiliation":[{"name":"Duke University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael M.","family":"Zavlanos","sequence":"additional","affiliation":[{"name":"Duke University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2019,4,16]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dandelion Man\u00e9 Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Vi\u00e9gas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2015. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/ Software available from tensorflow.org.  Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dandelion Man\u00e9 Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Vi\u00e9gas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2015. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/ Software available from tensorflow.org."},{"key":"e_1_3_2_1_2_1","unstructured":"Christel Baier Joost-Pieter Katoen and Kim Guldstrand Larsen. 2008. Principles of model checking. MIT press.   Christel Baier Joost-Pieter Katoen and Kim Guldstrand Larsen. 2008. Principles of model checking. MIT press."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Calin Belta Boyan Yordanov and Ebru Aydin Gol. 2017. Formal Methods for Discrete-Time Dynamical Systems. Vol. 89. Springer.  Calin Belta Boyan Yordanov and Ebru Aydin Gol. 2017. Formal Methods for Discrete-Time Dynamical Systems. Vol. 89. Springer.","DOI":"10.1007\/978-3-319-50763-7"},{"key":"e_1_3_2_1_4_1","unstructured":"D. Bertsekas. 1999. Nonlinear programming. Athena scientific Belmont.  D. Bertsekas. 1999. Nonlinear programming. Athena scientific Belmont."},{"key":"e_1_3_2_1_6_1","unstructured":"D. P. Bertsekas and J. N. Tsitsiklis. 1996. Neuro-Dynamic Programming. Athena Scientific Belmont MA.   D. P. Bertsekas and J. N. Tsitsiklis. 1996. Neuro-Dynamic Programming. Athena Scientific Belmont MA."},{"key":"e_1_3_2_1_7_1","unstructured":"S. Bhatnagar D. Precup D. Silver R. Sutton H.R. Maei and C. Szepesv\u00e1ri. 2009. Convergent temporal-difference learning with arbitrary smooth function approximation. In Advances in Neural Information Processing Systems. 1204--1212.   S. Bhatnagar D. Precup D. Silver R. Sutton H.R. Maei and C. Szepesv\u00e1ri. 2009. Convergent temporal-difference learning with arbitrary smooth function approximation. In Advances in Neural Information Processing Systems. 1204--1212."},{"volume-title":"Proceedings of the 8th International Conference on Neural Information Processing Systems. 1017--1023","author":"Crites R. H.","key":"e_1_3_2_1_8_1"},{"volume-title":"SAGA: A Fast Incremental Gradient Method With Support for Non-Strongly Convex Composite Objectives. In The Proceeding of NIPS.","year":"2014","author":"Defazio A.","key":"e_1_3_2_1_9_1"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2014.2298143"},{"key":"e_1_3_2_1_11_1","unstructured":"S. Du J. Chen L. Li L. Xiao and D. Zhou. 2017. Stochastic variance reduction methods for policy evaluation. arXiv preprint arXiv:1702.07944 (2017).   S. Du J. Chen L. Li L. Xiao and D. Zhou. 2017. Stochastic variance reduction methods for policy evaluation. arXiv preprint arXiv:1702.07944 (2017)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Jie Fu and Ufuk Topcu. 2014. Probably approximately correct mdp learning and control with temporal logic constraints. arXiv preprint arXiv:1404.7073 (2014).  Jie Fu and Ufuk Topcu. 2014. Probably approximately correct mdp learning and control with temporal logic constraints. arXiv preprint arXiv:1404.7073 (2014).","DOI":"10.15607\/RSS.2014.X.039"},{"volume-title":"International Conference on Machine Learning. 2829--2838","year":"2016","author":"Gu Shixiang","key":"e_1_3_2_1_13_1"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364914546174"},{"key":"e_1_3_2_1_15_1","first-page":"11","article-title":"Probabilistic Motion Planning under Temporal Tasks and Soft Constraints","volume":"99","author":"Guo Meng","year":"2018","journal-title":"IEEE Trans. Automat. Control PP"},{"key":"e_1_3_2_1_16_1","first-page":"3215","article-title":"NESTT","volume":"29","author":"Hajinezhad D.","year":"2016","journal-title":"In Advances in Neural Information Processing Systems"},{"volume-title":"International conference on machine learning. 448--456","year":"2015","author":"Ioffe Sergey","key":"e_1_3_2_1_17_1"},{"volume-title":"the Proceedings of the Neural Information Processing (NIPS).","author":"Johnson R.","key":"e_1_3_2_1_18_1"},{"key":"e_1_3_2_1_19_1","unstructured":"L. Hurwicz K. J. Arrow and H. Uzawa. 1958. Studies in Linear and Non-linear Programming. Stanford University Press.  L. Hurwicz K. J. Arrow and H. Uzawa. 1958. Studies in Linear and Non-linear Programming. Stanford University Press."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Yiannis Kantaros and Michael M Zavlanos. 2018. Sampling-based optimal control synthesis for multi-robot systems under global temporal tasks. IEEE Trans. Automat. Control PP 99 (7 2018).  Yiannis Kantaros and Michael M Zavlanos. 2018. Sampling-based optimal control synthesis for multi-robot systems under global temporal tasks. IEEE Trans. Automat. Control PP 99 (7 2018).","DOI":"10.1109\/ICCPS.2018.00024"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364911406761"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"J. Kiefer and J. Wolfowitz. 1952. Stochastic Estimation of the Maximum of a Regression Function. Ann. Math. Statist. 23 3 (09 1952) 462--466.  J. Kiefer and J. Wolfowitz. 1952. Stochastic Estimation of the Maximum of a Regression Function. Ann. Math. Statist. 23 3 (09 1952) 462--466.","DOI":"10.1214\/aoms\/1177729392"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-017-1173-0"},{"volume-title":"Reinforcement learning","author":"Lange Sascha","key":"e_1_3_2_1_24_1"},{"key":"e_1_3_2_1_25_1","unstructured":"Xiao Li Yao Ma and Calin Belta. 2017. A Policy Search Method For Temporal Logic Specified Reinforcement Learning Tasks. arXiv preprint arXiv:1709.09611 (2017).  Xiao Li Yao Ma and Calin Belta. 2017. A Policy Search Method For Temporal Logic Specified Reinforcement Learning Tasks. arXiv preprint arXiv:1709.09611 (2017)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206234"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A Rusu Joel Veness Marc G Bellemare Alex Graves Martin Riedmiller Andreas K Fidjeland Georg Ostrovski etal 2015. Human-level control through deep reinforcement learning. Nature 518 7540 (2015) 529.  Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A Rusu Joel Veness Marc G Bellemare Alex Graves Martin Riedmiller Andreas K Fidjeland Georg Ostrovski et al. 2015. Human-level control through deep reinforcement learning. Nature 518 7540 (2015) 529.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Boris Polyak. 1964. Some methods of speeding up the convergence of iteration methods. Ussr Computational Mathematics and Mathematical Physics 4 (12 1964) 1--17.  Boris Polyak. 1964. Some methods of speeding up the convergence of iteration methods. Ussr Computational Mathematics and Mathematical Physics 4 (12 1964) 1--17.","DOI":"10.1016\/0041-5553(64)90137-5"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"S. Reddi S. Sra B. Poczos and A. Smola. 2016. Fast incremental method for nonconvex optimization. arXiv preprint arXiv:1603.06159 (2016).  S. Reddi S. Sra B. Poczos and A. Smola. 2016. Fast incremental method for nonconvex optimization. arXiv preprint arXiv:1603.06159 (2016).","DOI":"10.1109\/CDC.2016.7798553"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.21236\/ADA623517"},{"key":"e_1_3_2_1_31_1","unstructured":"M. Schmidt N. Le Roux and F. Bach. 2013. Minimizing Finite sums with the stochastic average gradient. (2013). Technical report INRIA.  M. Schmidt N. Le Roux and F. Bach. 2013. Minimizing Finite sums with the stochastic average gradient. (2013). Technical report INRIA."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.5555\/2567709.2502598"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"e_1_3_2_1_34_1","unstructured":"Richard S Sutton and Andrew G Barto. 2011. Reinforcement learning: An introduction. (2011).  Richard S Sutton and Andrew G Barto. 2011. Reinforcement learning: An introduction. (2011)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/203330.203343"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913519000"},{"key":"e_1_3_2_1_37_1","first-page":"2094","article-title":"Deep Reinforcement Learning with Double Q-Learning","volume":"16","author":"Hasselt Hado Van","year":"2016","journal-title":"AAAI"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364915581505"},{"key":"e_1_3_2_1_39_1","unstructured":"Ziyu Wang Tom Schaul Matteo Hessel Hado Van Hasselt Marc Lanctot and Nando De Freitas. 2015. Dueling network architectures for deep reinforcement learning. arXiv preprint arXiv:1511.06581 (2015).  Ziyu Wang Tom Schaul Matteo Hessel Hado Van Hasselt Marc Lanctot and Nando De Freitas. 2015. Dueling network architectures for deep reinforcement learning. arXiv preprint arXiv:1511.06581 (2015)."},{"volume":"48","volume-title":"Proceedings of the 33rd International Conference on International Conference on Machine Learning -","author":"Zhu A.","key":"e_1_3_2_1_40_1"}],"event":{"name":"ICCPS '19: ACM\/IEEE 10th International Conference on Cyber-Physical Systems","sponsor":["SIGBED ACM Special Interest Group on Embedded Systems","IEEE-CS\\TCRT TC on Real-Time Systems"],"location":"Montreal Quebec Canada","acronym":"ICCPS '19"},"container-title":["Proceedings of the 10th ACM\/IEEE International Conference on Cyber-Physical Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3302509.3311053","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3302509.3311053","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:53:55Z","timestamp":1750204435000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3302509.3311053"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,4,16]]},"references-count":39,"alternative-id":["10.1145\/3302509.3311053","10.1145\/3302509"],"URL":"https:\/\/doi.org\/10.1145\/3302509.3311053","relation":{},"subject":[],"published":{"date-parts":[[2019,4,16]]},"assertion":[{"value":"2019-04-16","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}