{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T09:30:05Z","timestamp":1751275805657,"version":"3.40.5"},"reference-count":42,"publisher":"Informa UK Limited","issue":"9","content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["International Journal of Control"],"published-print":{"date-parts":[[2022,9,2]]},"DOI":"10.1080\/00207179.2021.1913516","type":"journal-article","created":{"date-parts":[[2021,4,6]],"date-time":"2021-04-06T03:51:57Z","timestamp":1617681117000},"page":"2448-2459","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":5,"title":["Bellman's principle of optimality and deep reinforcement learning for time-varying tasks"],"prefix":"10.1080","volume":"95","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5503-8506","authenticated-orcid":false,"given":"Alessandro","family":"Giuseppi","sequence":"first","affiliation":[{"name":"Department of Computer, Control, and Management Engineering, Antonio Ruberti at the University of Rome \u201cLa Sapeinza\u201d, Rome, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0188-3346","authenticated-orcid":false,"given":"Antonio","family":"Pietrabissa","sequence":"additional","affiliation":[{"name":"Department of Computer, Control, and Management Engineering, Antonio Ruberti at the University of Rome \u201cLa Sapeinza\u201d, Rome, Italy"}]}],"member":"301","published-online":{"date-parts":[[2021,4,16]]},"reference":[{"key":"CIT0001","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"CIT0002","doi-asserted-by":"publisher","DOI":"10.1126\/science.153.3731.34"},{"key":"CIT0003","volume-title":"Dynamic programming and optimal control","author":"Bertsekas D. P.","year":"2005","edition":"3"},{"key":"CIT0004","unstructured":"Borsa, D., Graepel, T. & Shawe-Taylor, J. (2016). Learning shared representations in multi-task reinforcement learning. Retrieved from http:\/\/arxiv.org\/abs\/1603.02041."},{"key":"CIT0005","unstructured":"Boyan, J. A. & Littman, M. L. (2001). Exact solutions to time-dependent MDPs. In Proceedings of the 13th International Conference on Neural Information Processing Systems,\u00a0Denver, CO (pp. 982\u2013988).MIT Press."},{"key":"CIT0006","unstructured":"Brockman, G., Cheung, V., Pettersson, L., Schneider, J., Schulman, J. & Tang, J. (2016). Openai gym. Retrieved from https:\/\/arxiv.org\/abs\/1606.01540."},{"key":"CIT0007","unstructured":"Choi, S. P., Yeung, D. Y. & Zhang, N. L. (2000). An environment model for nonstationary reinforcement learning. In Proceedings of the 12th International Conference on Neural Information Processing Systems,\u00a0Denver, CO (pp. 987\u2013993).\u00a0MIT Press."},{"key":"CIT0008","doi-asserted-by":"crossref","unstructured":"Choi, S. P. M., Yeung, D. Y. & Zhang, N. L. (2000). Hidden-mode Markov Decision Processes for Nonstationary Sequential Decision Making. In Sequence learning (pp. 264\u2013287). Springer Berlin Heidelberg.","DOI":"10.1007\/3-540-44565-X_12"},{"key":"CIT0009","doi-asserted-by":"publisher","DOI":"10.1080\/00207179.2017.1312669"},{"key":"CIT0010","unstructured":"Florian, R. V. (2007). Correct equations for the dynamics of the cart-pole system. Report for the Center for Cognitive and Neural Studies (Coneural)."},{"key":"CIT0011","first-page":"1","author":"Forootani A.","year":"2019","journal-title":"International Journal of Control"},{"key":"CIT0012","first-page":"1","author":"Forootani A.","year":"2019","journal-title":"International Journal of Control"},{"key":"CIT0013","unstructured":"Fujimoto, S., Hoof, H. & Meger, D. (2020). Author's PyTorch implementation of TD3 for OpenAI gym tasks. Retrieved from https:\/\/github.com\/sfujim\/TD3."},{"volume-title":"Proceedings of the 35th International Conference on Machine Learning.\u00a0Proceedings of Machine Learning Research\u00a0 (Vol. 80, pp. 1587\u20131596)","year":"2018","author":"Fujimoto S.","key":"CIT0014"},{"volume-title":"Proceedings of the Fifteenth International Conference on Machine Learning (Vol. 9, pp. 197\u2013205)","year":"1998","author":"G\u00e1bor Z.","key":"CIT0015"},{"key":"CIT0016","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2020.2979635"},{"key":"CIT0017","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2012.2218595"},{"key":"CIT0018","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P. & Levine, S. (2018). Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. Retrieved from https:\/\/arxiv.org\/abs\/1801.01290."},{"key":"CIT0019","unstructured":"Hallak, A., Castro, D. D. & Mannor, S. (2015). Contextual markov decision processes. Retrieved from https:\/\/arxiv.org\/abs\/1502.02259."},{"key":"CIT0020","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"CIT0021","doi-asserted-by":"publisher","DOI":"10.1162\/evco_a_00232"},{"key":"CIT0022","unstructured":"Lillicrap, T. P., Hunt, J. J., Pritzel, A., Heess, N., Erez, T., Tassa, Y. & Silver, D. (2015). Continuous control with deep reinforcement learning. Retrieved from https:\/\/arxiv.org\/abs\/1509.02971."},{"key":"CIT0023","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2801479"},{"key":"CIT0024","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2014.2358639"},{"key":"CIT0025","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I. & Wierstra, D. (2013). Playing atari with deep reinforcement learning. Retrieved from https:\/\/arxiv.org\/abs\/1312.5602."},{"key":"CIT0026","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"CIT0027","unstructured":"Omidshafiei, S., Pazis, J., Amato, C., How, J. P. & Vian, J. (2017, August 6-11). Deep decentralized multi-task multi-agent reinforcement learning under partial observability. In: D. Precup & Y. W. Teh (Eds.), Proceedings of the 34th International Conference on Machine Learning.\u00a0Proceedings of Machine Learning Research (Vol. 70, pp. 2681\u20132690). Retrieved from http:\/\/proceedings.mlr.press\/v70\/omidshafiei17a\/omidshafiei17a.pdf"},{"key":"CIT0028","doi-asserted-by":"crossref","unstructured":"Padakandla, S., Prabuchandran, K. J. & Bhatnagar, S. (2019). Reinforcement learning in non-stationary environments. Applied Intelligence 2020. Retrieved from arXiv:1905.03970.","DOI":"10.1007\/s10489-020-01758-5"},{"key":"CIT0029","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887"},{"key":"CIT0030","doi-asserted-by":"crossref","unstructured":"Rachelson, E., Fabiani, P. & Garcia, F. (2009, November). TiMDPpoly: An improved method for solving time-dependent MDPs. In 2009 21st IEEE International Conference on Tools With Artificial Intelligence. IEEE.","DOI":"10.1109\/ICTAI.2009.52"},{"key":"CIT0031","unstructured":"Shimkin, N. (2011). Learning in complex systems, Lecture Notes. Retrieved from https:\/\/shimkin.net.technion.ac.il\/courses\/learning-in-complex-systems-2011\/."},{"key":"CIT0032","doi-asserted-by":"crossref","unstructured":"Sutton, R. S. & Barto, A. G. (1998). Reinforcement learning: An introduction.","DOI":"10.1109\/TNN.1998.712192"},{"key":"CIT0033","doi-asserted-by":"crossref","unstructured":"Tanaka, F. & Yamamura, M. (2003). Multitask reinforcement learning on the distribution of MDPs. In Proceedings 2003 IEEE International Symposium on Computational Intelligence in Robotics and Automation. Computational Intelligence in Robotics and Automation for the New Millennium (cat. no.03ex694). IEEE.","DOI":"10.1109\/CIRA.2003.1222152"},{"key":"CIT0034","unstructured":"Teh, Y., Bapst, V., Czarnecki, W. M., Quan, J., Kirkpatrick, J., Hadsell, R., Heess, N. & Pascanu, R. (2017). Distral: Robust multitask reinforcement learning. In Advances in neural information processing systems 30 (pp. 4496\u20134506). Curran Associates, Inc."},{"key":"CIT0035","doi-asserted-by":"crossref","unstructured":"Van Hasselt, H., Guez, A. & Silver, D. (2016, March). Deep reinforcement learning with double Q-learning.\u00a0Proceedings of the AAAI Conference on Artificial Intelligence, 30(1).\u00a0Retrieved from https:\/\/arxiv.org\/abs\/1509.06461","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"CIT0036","doi-asserted-by":"publisher","DOI":"10.5555\/2627435.2750356"},{"key":"CIT0037","unstructured":"Verme, M. D., da Silva, B. C. & Baldassarre, G. (2020). Optimal options for multi-task reinforcement learning under time constraints. Retrieved from https:\/\/arxiv.org\/abs\/2001.01620."},{"key":"CIT0038","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chakrabarty, A., Zhou, M. & Zhang, J. (2019). Near-optimal control of motor drives via approximate dynamic programming. In2019 IEEE International Conference on Systems, Man and Cybernetics (SMC) (pp. 3679\u20133686). Retrieved from https:\/\/doi.org\/10.1109\/SMC.2019.8914595","DOI":"10.1109\/SMC.2019.8914595"},{"key":"CIT0039","doi-asserted-by":"crossref","unstructured":"Wilson, A., Fern, A., Ray, S. & Tadepalli, P. (2007). Multi-task reinforcement learning: A hierarchical bayesian approach. In Proceedings of the 24th International Conference on Machine Learning ICML (pp. 1015\u20131022). Association for Computing Machinery.","DOI":"10.1145\/1273496.1273624"},{"key":"CIT0040","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2013.2246866"},{"key":"CIT0041","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2011.2168538"},{"key":"CIT0042","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2010.10.033"}],"container-title":["International Journal of Control"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/00207179.2021.1913516","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T23:34:39Z","timestamp":1662075279000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/00207179.2021.1913516"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,4,16]]},"references-count":42,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2022,9,2]]}},"alternative-id":["10.1080\/00207179.2021.1913516"],"URL":"https:\/\/doi.org\/10.1080\/00207179.2021.1913516","relation":{},"ISSN":["0020-7179","1366-5820"],"issn-type":[{"type":"print","value":"0020-7179"},{"type":"electronic","value":"1366-5820"}],"subject":[],"published":{"date-parts":[[2021,4,16]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tcon20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tcon20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2020-10-04","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2021-04-01","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2021-04-16","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}