{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T16:29:13Z","timestamp":1768321753367,"version":"3.49.0"},"reference-count":64,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2018,8,21]],"date-time":"2018-08-21T00:00:00Z","timestamp":1534809600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Glob Optim"],"published-print":{"date-parts":[[2019,2]]},"DOI":"10.1007\/s10898-018-0698-y","type":"journal-article","created":{"date-parts":[[2018,8,21]],"date-time":"2018-08-21T10:10:29Z","timestamp":1534846229000},"page":"279-310","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["A unified DC programming framework and efficient DCA based approaches for large scale batch reinforcement learning"],"prefix":"10.1007","volume":"73","author":[{"given":"Hoai\u00a0An","family":"Le Thi","sequence":"first","affiliation":[]},{"given":"Vinh\u00a0Thanh","family":"Ho","sequence":"additional","affiliation":[]},{"given":"Tao","family":"Pham Dinh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,8,21]]},"reference":[{"key":"698_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.Y.: Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the Twenty-first International Conference on Machine Learning, ICML. ACM, New York (2004)","DOI":"10.1145\/1015330.1015430"},{"issue":"1","key":"698_CR2","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/s10994-007-5038-2","volume":"71","author":"A Antos","year":"2008","unstructured":"Antos, A., Szepesv\u00e1ri, C., Munos, R.: Learning near-optimal policies with bellman-residual minimization based fitted policy iteration and a single sample path. Mach. Learn. 71(1), 89\u2013129 (2008)","journal-title":"Mach. Learn."},{"key":"698_CR3","doi-asserted-by":"crossref","unstructured":"Baird, L.C.I.: Residual algorithms: reinforcement learning with function approximation. In: Prieditis, A., Russell, S. (eds.) Machine Learning Proceedings 1995, pp. 30\u201337. Morgan Kaufmann, San Francisco (1995)","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"issue":"4","key":"698_CR4","doi-asserted-by":"publisher","first-page":"679","DOI":"10.1512\/iumj.1957.6.56038","volume":"6","author":"R Bellman","year":"1957","unstructured":"Bellman, R.: A markovian decision process. Indiana Univ. Math. J. 6(4), 679\u2013684 (1957)","journal-title":"Indiana Univ. Math. J."},{"key":"698_CR5","volume-title":"Dynamic Programming: Deterministic and Stochastic Models","year":"1987","unstructured":"Bertsekas, D.P. (ed.): Dynamic Programming: Deterministic and Stochastic Models. Prentice-Hall Inc, Upper Saddle River (1987)"},{"key":"698_CR6","volume-title":"Neuro-Dynamic Programming","year":"1996","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N. (eds.): Neuro-Dynamic Programming. Athena Scientific, Belmont (1996)"},{"issue":"11","key":"698_CR7","doi-asserted-by":"publisher","first-page":"2471","DOI":"10.1016\/j.automatica.2009.07.008","volume":"45","author":"S Bhatnagar","year":"2009","unstructured":"Bhatnagar, S., Sutton, R.S., Ghavamzadeh, M., Lee, M.: Natural actor-critic algorithms. Automatica 45(11), 2471\u20132482 (2009)","journal-title":"Automatica"},{"issue":"2","key":"698_CR8","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1023\/A:1026433520314","volume":"107","author":"R Blanquero","year":"2000","unstructured":"Blanquero, R., Carrizosa, E.: Optimization of the norm of a vector-valued dc function and applications. J. Optim. Theory Appl. 107(2), 245\u2013260 (2000)","journal-title":"J. Optim. Theory Appl."},{"issue":"2","key":"698_CR9","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1007\/s10898-009-9487-y","volume":"48","author":"R Blanquero","year":"2010","unstructured":"Blanquero, R., Carrizosa, E.: On the norm of a dc function. J. Glob. Optim. 48(2), 209\u2013213 (2010)","journal-title":"J. Glob. Optim."},{"key":"698_CR10","volume-title":"Reinforcement Learning and Dynamic Programming Using Function Approximators","author":"L Bu\u015foniu","year":"2010","unstructured":"Bu\u015foniu, L., Babuska, R., Schutter, B.D., Ernst, D.: Reinforcement Learning and Dynamic Programming Using Function Approximators, 1st edn. CRC Press Inc, Boca Raton (2010)","edition":"1"},{"key":"698_CR11","unstructured":"Coulom, R.: Reinforcement learning using neural networks, with applications to motor control. Ph.D. thesis, Institut National Polytechnique de Grenoble (2002)"},{"key":"698_CR12","doi-asserted-by":"publisher","unstructured":"Cruz Neto, J.X., Lopes, J.O., Santos, P.S.M., Souza, J.C.O.: An interior proximal linearized method for DC programming based on Bregman distance or second-order homogeneous kernels. Optimization, 1\u201315 (2018). \n                    https:\/\/doi.org\/10.1080\/02331934.2018.1476859","DOI":"10.1080\/02331934.2018.1476859"},{"key":"698_CR13","first-page":"503","volume":"6","author":"D Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-based batch mode reinforcement learning. J. Mach. Learn. Res. 6, 503\u2013556 (2005)","journal-title":"J. Mach. Learn. Res."},{"issue":"4","key":"698_CR14","doi-asserted-by":"publisher","first-page":"2010","DOI":"10.1137\/13090540X","volume":"6","author":"E Esser","year":"2013","unstructured":"Esser, E., Lou, Y., Xin, J.: A method for finding structured sparse solutions to non-negative least squares problems with applications. SIAM J. Imaging Sci. 6(4), 2010\u20132046 (2013)","journal-title":"SIAM J. Imaging Sci."},{"issue":"1","key":"698_CR15","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1007\/s10898-017-0568-z","volume":"71","author":"M Gaudioso","year":"2018","unstructured":"Gaudioso, M., Giallombardo, G., Miglionico, G., Bagirov, A.M.: Minimizing nonsmooth dc functions via successive dc piecewise-affine approximations. J. Glob. Optim. 71(1), 37\u201355 (2018)","journal-title":"J. Glob. Optim."},{"issue":"6","key":"698_CR16","doi-asserted-by":"publisher","first-page":"845","DOI":"10.1109\/TNNLS.2013.2247418","volume":"24","author":"M Geist","year":"2013","unstructured":"Geist, M., Pietquin, O.: Algorithmic survey of parametric value function approximation. IEEE Trans. Neural Netw. Learn. Syst. 24(6), 845\u2013867 (2013)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"4","key":"698_CR17","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1561\/2200000042","volume":"6","author":"A Geramifard","year":"2013","unstructured":"Geramifard, A., Walsh, T.J., Tellex, S., Chowdhary, G., Roy, N., How, J.P.: A tutorial on linear function approximators for dynamic programming and reinforcement learning. Found. Trends Mach. Learn. 6(4), 375\u2013451 (2013)","journal-title":"Found. Trends Mach. Learn."},{"issue":"2","key":"698_CR18","doi-asserted-by":"publisher","first-page":"178","DOI":"10.1287\/ijoc.1080.0305","volume":"21","author":"A Gosavi","year":"2009","unstructured":"Gosavi, A.: Reinforcement learning: a tutorial survey and recent advances. INFORMS J. Comput. 21(2), 178\u2013192 (2009)","journal-title":"INFORMS J. Comput."},{"key":"698_CR19","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1007\/978-3-319-38884-7_4","volume-title":"Advanced Computational Methods for Knowledge Engineering","author":"VT Ho","year":"2016","unstructured":"Ho, V.T., Le Thi, H.A.: Solving an infinite-horizon discounted markov decision process by DC programming and DCA. In: Nguyen, T.B., van Do, T., An Le Thi, H., Nguyen, N.T. (eds.) Advanced Computational Methods for Knowledge Engineering, pp. 43\u201355. Springer, Berlin (2016)"},{"issue":"2","key":"698_CR20","doi-asserted-by":"publisher","first-page":"1892","DOI":"10.1137\/16M1115733","volume":"28","author":"K Joki","year":"2018","unstructured":"Joki, K., Bagirov, A., Karmitsa, N., M\u00e4kel\u00e4, M., Taheri, S.: Double bundle method for finding clarke stationary points in nonsmooth dc programming. SIAM J. Optim. 28(2), 1892\u20131919 (2018)","journal-title":"SIAM J. Optim."},{"issue":"3","key":"698_CR21","doi-asserted-by":"publisher","first-page":"501","DOI":"10.1007\/s10898-016-0488-3","volume":"68","author":"K Joki","year":"2017","unstructured":"Joki, K., Bagirov, A.M., Karmitsa, N., M\u00e4kel\u00e4, M.M.: A proximal bundle method for nonsmooth dc optimization utilizing nonconvex cutting planes. J. Glob. Optim. 68(3), 501\u2013535 (2017)","journal-title":"J. Glob. Optim."},{"issue":"3","key":"698_CR22","doi-asserted-by":"publisher","first-page":"399","DOI":"10.14492\/hokmj\/1381757647","volume":"14","author":"S Koshi","year":"1985","unstructured":"Koshi, S.: Convergence of convex functions and duality. Hokkaido Math. J. 14(3), 399\u2013414 (1985)","journal-title":"Hokkaido Math. J."},{"key":"698_CR23","first-page":"1107","volume":"4","author":"MG Lagoudakis","year":"2003","unstructured":"Lagoudakis, M.G., Parr, R.: Least-squares policy iteration. J. Mach. Learn. Res. 4, 1107\u20131149 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"698_CR24","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1007\/978-3-642-27645-3_2","volume-title":"Reinforcement Learning. , vol.\u00a012, chap.\u00a02,","author":"S Lange","year":"2012","unstructured":"Lange, S., Gabel, T., Riedmiller, M.: Batch Reinforcement Learning. In: Wiering, M., van Otterlo, M. (eds.) Reinforcement Learning., vol.\u00a012, chap.\u00a02, pp. 45\u201373. Springer, Berlin, Heidelberg, Hillsdale (2012)"},{"key":"698_CR25","unstructured":"Le\u00a0Thi, H.A.: DC Programming and DCA. \n                    http:\/\/www.lita.univ-lorraine.fr\/~lethi\/index.php\/en\/research\/dc-programming-and-dca.html\n                    \n                   (homepage) (2005). Accessed 1 Dec 2005"},{"issue":"1\u20133","key":"698_CR26","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1007\/s10994-014-5455-y","volume":"101","author":"HA Thi Le","year":"2015","unstructured":"Le Thi, H.A., Le, H.M., Pham Dinh, T.: Feature selection in machine learning: an exact penalty approach using a difference of convex function algorithm. Mach. Learn. 101(1\u20133), 163\u2013186 (2015)","journal-title":"Mach. Learn."},{"issue":"5\u20136","key":"698_CR27","doi-asserted-by":"publisher","first-page":"1336","DOI":"10.1007\/s10618-014-0369-7","volume":"28","author":"HA Thi Le","year":"2014","unstructured":"Le Thi, H.A., Nguyen, M.C.: Self-organizing maps by difference of convex functions optimization. Data Min. Knowl. Discov. 28(5\u20136), 1336\u20131365 (2014)","journal-title":"Data Min. Knowl. Discov."},{"issue":"12","key":"698_CR28","doi-asserted-by":"publisher","first-page":"2827","DOI":"10.1162\/NECO_a_00673","volume":"26","author":"HA Thi Le","year":"2014","unstructured":"Le Thi, H.A., Nguyen, M.C., Pham Dinh, T.: A dc programming approach for finding communities in networks. Neural Comput. 26(12), 2827\u20132854 (2014)","journal-title":"Neural Comput."},{"issue":"3","key":"698_CR29","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1023\/A:1008288411710","volume":"11","author":"HA Thi Le","year":"1997","unstructured":"Le Thi, H.A., Pham Dinh, T.: Solving a class of linearly constrained indefinite quadratic problems by D.C. algorithms. J. Glob. Optim. 11(3), 253\u2013285 (1997)","journal-title":"J. Glob. Optim."},{"issue":"1\u20134","key":"698_CR30","first-page":"23","volume":"133","author":"HA Thi Le","year":"2005","unstructured":"Le Thi, H.A., Pham Dinh, T.: The DC (difference of convex functions) programming and DCA revisited with DC models of real world nonconvex optimization problems. Ann. Oper. Res. 133(1\u20134), 23\u201346 (2005)","journal-title":"Ann. Oper. Res."},{"issue":"1","key":"698_CR31","first-page":"5","volume":"169","author":"HA Thi Le","year":"2018","unstructured":"Le Thi, H.A., Pham Dinh, T.: DC programming and DCA: thirty years of developments. Math. Program. Spec. Issue DC Program. Theory Algorithms Appl. 169(1), 5\u201368 (2018)","journal-title":"Math. Program. Spec. Issue DC Program. Theory Algorithms Appl."},{"issue":"1","key":"698_CR32","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1016\/j.ejor.2014.11.031","volume":"244","author":"HA Thi Le","year":"2015","unstructured":"Le Thi, H.A., Pham Dinh, T., Le, H.M., Vo, X.T.: DC approximation approaches for sparse optimization. Eur. J. Oper. Res. 244(1), 26\u201346 (2015)","journal-title":"Eur. J. Oper. Res."},{"key":"698_CR33","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1016\/j.neunet.2014.06.011","volume":"59","author":"HA Thi Le","year":"2014","unstructured":"Le Thi, H.A., Vo, X.T., Pham Dinh, T.: Feature selection for linear SVMs under uncertain data: robust optimization based on difference of convex functions algorithms. Neural Netw. 59, 36\u201350 (2014)","journal-title":"Neural Netw."},{"issue":"1","key":"698_CR34","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1198\/106186005X37238","volume":"14","author":"Y Liu","year":"2005","unstructured":"Liu, Y., Shen, X., Doss, H.: Multicategory \n                    \n                      \n                    \n                    $$\\psi $$\n                    \n                      \n                        \u03c8\n                      \n                    \n                  -learning and support vector machines: computational tools. J. Comput. Gr. Stat. 14(1), 219\u2013236 (2005)","journal-title":"J. Comput. Gr. Stat."},{"key":"698_CR35","unstructured":"Maillard, O.A., Munos, R., Lazaric, A., Ghavamzadeh, M.: Finite sample analysis of Bellman residual minimization. In: Sugiyama,M., Yang, Q. (eds.) Asian Conference on Machine Learpning. JMLR: Workshop and Conference Proceedings, vol.\u00a013, pp. 309\u2013324 (2010)"},{"issue":"2","key":"698_CR36","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1137\/040614384","volume":"46","author":"R Munos","year":"2007","unstructured":"Munos, R.: Performance bounds in \n                    \n                      \n                    \n                    $$L_p$$\n                    \n                      \n                        \n                          L\n                          p\n                        \n                      \n                    \n                   norm for approximate value iteration. SIAM J. Control Optim. 46(2), 541\u2013561 (2007)","journal-title":"SIAM J. Control Optim."},{"key":"698_CR37","unstructured":"Oliveira, W.D.: Proximal bundle methods for nonsmooth DC programming (2017). \n                    https:\/\/drive.google.com\/file\/d\/0ByLZhUZ45Y-HQnVvOEZ3REw0Sk0\/view\n                    \n                  . Accessed 20 July 2018"},{"key":"698_CR38","unstructured":"Oliveira, W.D., Tcheou, M.: An inertial algorithm for DC programming (2018). \n                    https:\/\/drive.google.com\/file\/d\/1CUQRJBBVMtH2dFMuIa5_s6xcEjAG5xeC\/view\n                    \n                  . Accessed 20 July 2018"},{"key":"698_CR39","unstructured":"Pashenkova, E., Rish, I., Dechter, R.: Value iteration and policy iteration algorithms for markov decision problem. In Proceedings of the National Conference on Artificial Intelligence (AAAI) Workshop on Structural Issues in Planning and Temporal Reasoning, April (1996)"},{"key":"698_CR40","series-title":"North-Holland Mathematics Studies","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1016\/S0304-0208(08)72402-2","volume-title":"Fermat Days 85: Mathematics for Optimization","author":"T Pham\u00a0Dinh","year":"1986","unstructured":"Pham\u00a0Dinh, T., El\u00a0Bernoussi, S.: Algorithms for solving a class of nonconvex optimization problems. methods of subgradients. In: Hiriart-Urruty, J.B. (ed.) Fermat Days 85: Mathematics for Optimization. North-Holland Mathematics Studies, vol. 129, pp. 249\u2013271. North-Holland, Amsterdam (1986)"},{"issue":"1","key":"698_CR41","first-page":"289","volume":"22","author":"T Pham Dinh","year":"1997","unstructured":"Pham Dinh, T., Le Thi, H.A.: Convex analysis approach to DC programming: theory, algorithms and applications. Acta Mathematica Vietnamica 22(1), 289\u2013355 (1997)","journal-title":"Acta Mathematica Vietnamica"},{"issue":"2","key":"698_CR42","doi-asserted-by":"publisher","first-page":"476","DOI":"10.1137\/S1052623494274313","volume":"8","author":"T Pham Dinh","year":"1998","unstructured":"Pham Dinh, T., Le Thi, H.A.: DC optimization algorithms for solving the trust region subproblem. SIAM J. Optim. 8(2), 476\u2013505 (1998)","journal-title":"SIAM J. Optim."},{"key":"698_CR43","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-54455-2_1","volume-title":"Transactions on Computational Intelligence XIII","author":"T Pham Dinh","year":"2014","unstructured":"Pham Dinh, T., Le Thi, H.A.: Recent advances in DC programming and DCA. In: Nguyen, N.T., Le Thi, H.A. (eds.) Transactions on Computational Intelligence XIII, vol. 8342, pp. 1\u201337. Springer, Berlin, Heidelberg (2014)"},{"key":"698_CR44","unstructured":"Piot, B., Geist, M., Pietquin, O.: Difference of convex functions programming for reinforcement learning. In: Advances in Neural Information Processing Systems (NIPS 2014) (2014)"},{"key":"698_CR45","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","year":"1994","unstructured":"Puterman, M.L. (ed.): Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, New York (1994)"},{"key":"698_CR46","series-title":"Princeton Mathematical Series","doi-asserted-by":"publisher","DOI":"10.1515\/9781400873173","volume-title":"Convex Analysis","author":"RT Rockafellar","year":"1970","unstructured":"Rockafellar, R.T.: Convex Analysis. Princeton Mathematical Series. Princeton University Press, Princeton (1970)"},{"issue":"1","key":"698_CR47","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1016\/0022-247X(77)90060-9","volume":"60","author":"G Salinetti","year":"1977","unstructured":"Salinetti, G., Wets, R.J.: On the relations between two types of convergence for convex functions. J. Math. Anal. Appl. 60(1), 211\u2013226 (1977)","journal-title":"J. Math. Anal. Appl."},{"key":"698_CR48","unstructured":"Scherrer, B.: Should one compute the Temporal Difference fix point or minimize the Bellman Residual? The unified oblique projection view. In: 27th International Conference on Machine Learning\u2014ICML 2010. Ha\u00effa, Israel (2010)"},{"key":"698_CR49","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1016\/j.dam.2005.02.028","volume":"151","author":"T Sch\u00fcle","year":"2005","unstructured":"Sch\u00fcle, T., Schn\u00f6rr, C., Weber, S., Hornegger, J.: Discrete tomography by convex\u2013concave regularization and d.c. programming. Discrete Appl. Math. 151, 229\u2013243 (2005)","journal-title":"Discrete Appl. Math."},{"issue":"2","key":"698_CR50","doi-asserted-by":"publisher","first-page":"568","DOI":"10.1016\/0022-247X(85)90317-8","volume":"110","author":"P Schweitzer","year":"1985","unstructured":"Schweitzer, P., Seidmann, A.: Generalized polynomial approximations in markovian decision processes. J. Math. Anal. Appl. 110(2), 568\u2013582 (1985)","journal-title":"J. Math. Anal. Appl."},{"key":"698_CR51","volume-title":"Markov Decision Processes in Artificial Intelligence","year":"2010","unstructured":"Sigaud, O., Buffet, O. (eds.): Markov Decision Processes in Artificial Intelligence. Wiley-IEEE Press, Hoboken (2010)"},{"issue":"3","key":"698_CR52","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1023\/A:1007678930559","volume":"38","author":"S Singh","year":"2000","unstructured":"Singh, S., Jaakkola, T., Littman, M.L., Szepesv\u00e1ri, C.: Convergence results for single-step on-policy reinforcement-learning algorithms. Mach. Learn. 38(3), 287\u2013308 (2000)","journal-title":"Mach. Learn."},{"key":"698_CR53","first-page":"361","volume-title":"Advances in Neural Information Processing Systems","author":"SP Singh","year":"1995","unstructured":"Singh, S.P., Jaakkola, T., Jordan, M.I.: Reinforcement learning with soft state aggregation. In: Tesauro, G., Touretzky, D.S., Leen, T.K. (eds.) Advances in Neural Information Processing Systems, vol. 7, pp. 361\u2013368. MIT Press, San Mateo (1995)"},{"issue":"7","key":"698_CR54","doi-asserted-by":"publisher","first-page":"1529","DOI":"10.1007\/s11590-015-0969-1","volume":"10","author":"JCO Souza","year":"2016","unstructured":"Souza, J.C.O., Oliveira, P.R., Soubeyran, A.: Global convergence of a proximal linearized algorithm for difference of convex functions. Optim. Lett. 10(7), 1529\u20131539 (2016)","journal-title":"Optim. Lett."},{"key":"698_CR55","unstructured":"Sutton, R.S.: Generalization in reinforcement learning: successful examples using sparse coarse coding. In: Advances in Neural Information Processing Systems, vol. 8, pp. 1038\u20131044. MIT Press (1996)"},{"key":"698_CR56","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"698_CR57","doi-asserted-by":"publisher","DOI":"10.2200\/S00268ED1V01Y201005AIM009","volume-title":"Algorithms for Reinforcement Learning","author":"C Szepesv\u00e1ri","year":"2010","unstructured":"Szepesv\u00e1ri, C.: Algorithms for Reinforcement Learning. Morgan & Claypool, San Rafael (2010)"},{"key":"698_CR58","doi-asserted-by":"crossref","unstructured":"Szepesv\u00e1ri, C., Smart, W.D.: Interpolation-based q-learning. In: Proceedings of the Twenty-First International Conference on Machine Learning, ICML \u201904, pp. 791\u2013798. ACM, New York (2004)","DOI":"10.1145\/1015330.1015445"},{"key":"698_CR59","doi-asserted-by":"publisher","first-page":"851","DOI":"10.1016\/j.cam.2013.08.010","volume":"259","author":"AH Tor","year":"2014","unstructured":"Tor, A.H., Bagirov, A., Karas\u00f6zen, B.: Aggregate codifferential method for nonsmooth dc optimization. J. Comput. Appl. Math. 259, 851\u2013867 (2014)","journal-title":"J. Comput. Appl. Math."},{"key":"698_CR60","volume-title":"Statistical Learning Theory","year":"1998","unstructured":"Vapnik, V.N. (ed.): Statistical Learning Theory. Wiley, Hoboken (1998)"},{"key":"698_CR61","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. Ph.D. thesis, King\u2019s College, Cambridge (1989)"},{"key":"698_CR62","volume-title":"Reinforcement Learning: State-of-the-Art. Adaptation, Learning, and Optimization","year":"2012","unstructured":"Wiering, M., van Otterlo, M. (eds.): Reinforcement Learning: State-of-the-Art. Adaptation, Learning, and Optimization, vol. 12, 1st edn. Springer, Berlin, Heidelberg (2012)","edition":"1"},{"key":"698_CR63","unstructured":"Williams, R.J., Baird, L.C.I.: Tight performance bounds on greedy policies based on imperfect value functions. College of Computer Science, Northeastern University, Tech. rep. (1993)"},{"key":"698_CR64","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.ins.2013.08.037","volume":"261","author":"X Xu","year":"2014","unstructured":"Xu, X., Zuo, L., Huang, Z.: Reinforcement learning algorithms with function approximation: recent advances and applications. Inf. Sci. 261, 1\u201331 (2014)","journal-title":"Inf. Sci."}],"container-title":["Journal of Global Optimization"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10898-018-0698-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10898-018-0698-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10898-018-0698-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,20]],"date-time":"2019-08-20T23:25:13Z","timestamp":1566343513000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10898-018-0698-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,8,21]]},"references-count":64,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2019,2]]}},"alternative-id":["698"],"URL":"https:\/\/doi.org\/10.1007\/s10898-018-0698-y","relation":{},"ISSN":["0925-5001","1573-2916"],"issn-type":[{"value":"0925-5001","type":"print"},{"value":"1573-2916","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,8,21]]},"assertion":[{"value":"18 May 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 August 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 August 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}