{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T04:16:13Z","timestamp":1778559373333,"version":"3.51.4"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2015,7,5]],"date-time":"2015-07-05T00:00:00Z","timestamp":1436054400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vietnam J Comput Sci"],"published-print":{"date-parts":[[2015,11]]},"DOI":"10.1007\/s40595-015-0045-x","type":"journal-article","created":{"date-parts":[[2015,7,4]],"date-time":"2015-07-04T10:54:38Z","timestamp":1436007278000},"page":"213-226","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":41,"title":["A multi-agent cooperative reinforcement learning model using a hierarchy of consultants, tutors and workers"],"prefix":"10.1007","volume":"2","author":[{"given":"Bilal H.","family":"Abed-alguni","sequence":"first","affiliation":[]},{"given":"Stephan K.","family":"Chalup","sequence":"additional","affiliation":[]},{"given":"Frans A.","family":"Henskens","sequence":"additional","affiliation":[]},{"given":"David J.","family":"Paul","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,7,5]]},"reference":[{"key":"45_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.: Exploration and apprenticeship learning in reinforcement learning. In: Proceedings of the 22nd International Conference on Machine Learning, pp. 1\u20138 (2005)","DOI":"10.1145\/1102351.1102352"},{"key":"45_CR2","unstructured":"Abed-Alguni, B.H.K.: Cooperative reinforcement learning for independent learners. Ph.D. thesis, The University of Newcastle, Australia. Faculty of Engineering and Built Environment, School of Electrical Engineering and Computer Science (2014)"},{"key":"45_CR3","doi-asserted-by":"crossref","unstructured":"Arai, S., Sycara, K.: Effective learning approach for planning and scheduling in multi-agent domain. In: Proceedings of the 6th International Conference on Simulation of Adaptive Behavior, pp. 507\u2013516 (2000)","DOI":"10.7551\/mitpress\/3120.003.0054"},{"key":"45_CR4","unstructured":"Asadi, M., Huber, M.: State space reduction for hierarchical Reinforcement Learning. In: Proceedings of the Seventeenth International FLAIRS Conference, pp. 509\u2013514 (2004)"},{"issue":"1\u20132","key":"45_CR5","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1023\/A:1022140919877","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto, A.G., Mahadevan, S.: Recent advances in hierarchical reinforcement learning. Discrete Event Dyn. Syst. 13(1\u20132), 41\u201377 (2003)","journal-title":"Discrete Event Dyn. Syst."},{"key":"45_CR6","unstructured":"Boutilier, C., Dearden, R., Goldszmidt, M.: Exploiting structure in policy construction. In: International Joint Conference on Artificial Intelligence, vol. 14, pp. 1104\u20131113. Lawrence Erlbaum Associates Ltd (1995)"},{"key":"45_CR7","doi-asserted-by":"crossref","unstructured":"Boyd, T., Dasgupta, P.: Process migration: a generalized approach using a virtualizing operating system. In: Proceeding of the 22nd International Conference on Distributed Computing Systems ICDCS 2002, pp. 385\u2013392 (2002)","DOI":"10.1109\/ICDCS.2002.1022276"},{"key":"45_CR8","doi-asserted-by":"crossref","unstructured":"Cai, Y., Yang, S., Xu, X.: A combined hierarchical reinforcement learning based approach for multi-robot cooperative target searching in complex unknown environments. In: 2013 IEEE Symposium on Adaptive Dynamic Programming And Reinforcement Learning (ADPRL). Singapore, pp. 52\u201359 (2013)","DOI":"10.1109\/ADPRL.2013.6614989"},{"key":"45_CR9","unstructured":"Cao, F., Ray, S.: Bayesian hierarchical reinforcement learning. In: F. Pereira, C. Burges, L. Bottou, K. Weinberger (eds.) Advances in Neural Information Processing Systems, vol. 25, pp. 73\u201381. Curran Associates (2012)"},{"key":"45_CR10","doi-asserted-by":"crossref","unstructured":"Daoui, C., Abbad, M., Tkiouat, M.: Exact decomposition approaches for Markov decision processes: a survey. In: Advances in Operations Research 2010 (2010)","DOI":"10.1155\/2010\/659432"},{"issue":"1","key":"45_CR11","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"TG Dietterich","year":"2000","unstructured":"Dietterich, T.G.: Hierarchical reinforcement learning with the MAXQ value function decomposition. J. Artif. Intell. Res. 13(1), 227\u2013303 (2000)","journal-title":"J. Artif. Intell. Res."},{"issue":"3","key":"45_CR12","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1007\/s10489-006-0034-y","volume":"27","author":"G Erus","year":"2007","unstructured":"Erus, G., Polat, F.: A layered approach to learning coordination knowledge in multiagent environments. Appl. Intell. 27(3), 249\u2013267 (2007)","journal-title":"Appl. Intell."},{"issue":"2","key":"45_CR13","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1007\/s10458-006-7035-4","volume":"13","author":"M Ghavamzadeh","year":"2006","unstructured":"Ghavamzadeh, M., Mahadevan, S., Makar, R.: Hierarchical multi-agent reinforcement learning. Auton. Agents Multi-Agent Syst. 13(2), 197\u2013229 (2006)","journal-title":"Auton. Agents Multi-Agent Syst."},{"key":"45_CR14","unstructured":"Guestrin, C., Gordon, G.: Distributed planning in hierarchical factored MDPs. In: Proceedings of the Eighteenth Conference on Uncertainty in Artificial Intelligence, pp. 197\u2013206. Morgan Kaufmann (2002)"},{"key":"45_CR15","doi-asserted-by":"crossref","first-page":"122","DOI":"10.1016\/j.engappai.2014.05.012","volume":"34","author":"MK Gunady","year":"2014","unstructured":"Gunady, M.K., Gomaa, W., Takeuchi, I.: Aggregate reinforcement learning for multi-agent territory division: the hide-and-seek game. Eng. Appl. Artif. Intell. 34, 122\u2013136 (2014)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"45_CR16","unstructured":"Hengst, B.: Discovering hierarchy in reinforcement learning with HEXQ. In: Machine Learning: Proceedings of the Nineteenth International Conference on Machine Learning, pp. 243\u2013250. Morgan Kaufmann (2002)"},{"key":"45_CR17","doi-asserted-by":"crossref","unstructured":"Iima, H., Kuroe, Y.: Reinforcement learning through interaction among multiple agents. In: The 2006 International Joint Conference of the Japanese Society of Instrument and Control Engineers and the Korean Institute of Control, Automation and System Engineers, pp. 2457\u20132462 (2006)","DOI":"10.1109\/SICE.2006.315142"},{"key":"45_CR18","doi-asserted-by":"crossref","unstructured":"Iima, H., Kuroe, Y.: Swarm reinforcement learning algorithms\u2014exchange of information among multiple agents. In: 2007 Annual Conference of the Japanese Society of Instrument and Control Engineers, pp. 2779\u20132784 (2007)","DOI":"10.1109\/SICE.2007.4421461"},{"key":"45_CR19","doi-asserted-by":"crossref","unstructured":"Iima, H., Kuroe, Y.: Swarm reinforcement learning algorithms based on sarsa method. In: 2008 Annual Conference of the Japanese Society of Instrument and Control Engineers, pp. 2045\u20132049 (2008)","DOI":"10.1109\/SICE.2008.4654998"},{"key":"45_CR20","unstructured":"Jardim, D., Nunes, L., Oliveira, S.: Hierarchical reinforcement learning: learning sub-goals and state-abstraction. In: 2011 6th Iberian Conference on Information Systems and Technologies. Chaves, Portugal, pp. 1\u20134 (2011)"},{"issue":"6","key":"45_CR21","doi-asserted-by":"crossref","first-page":"583","DOI":"10.1360\/aas-007-0583","volume":"33","author":"DW Jiang","year":"2007","unstructured":"Jiang, D.W., Wang, S.Y., Dong, Y.S.: Role-based context-specific multiagent Q-learning. Acta Autom. Sinica 33(6), 583\u2013587 (2007)","journal-title":"Acta Autom. Sinica"},{"key":"45_CR22","unstructured":"Kaye, D.: Loosely coupled: the missing pieces of Web services. In: Bing, A., Kaye, C. (eds.) 1st edn. Chap. 10, RDS Strategies LLC p. 132 (2003)"},{"key":"45_CR23","unstructured":"Kim, K.E., Dean, T.: Solving factored MDPs via non-homogeneous partitioning. Proceedings of the 17th International Joint Conference on Artificial Intelligence. IJCAI\u201901, vol. 1, pp. 683\u2013689. Morgan Kaufmann, San Francisco (2001)"},{"issue":"3","key":"45_CR24","first-page":"228","volume":"2","author":"MR Lee","year":"2005","unstructured":"Lee, M.R.: A multi-agent cooperation model using reinforcement learning for planning multiple goals. J. Secur. Eng. 2(3), 228\u2013233 (2005)","journal-title":"J. Secur. Eng."},{"key":"45_CR25","doi-asserted-by":"crossref","unstructured":"Liu, F., Zeng, G.: Multi-agent cooperative learning research based on reinforcement learning. In: G. Wei\u00df (ed.) The 10th International Conference on Computer Supported Cooperative Work in Design, pp. 1\u20136 (2006)","DOI":"10.1109\/CSCWD.2006.253120"},{"key":"45_CR26","unstructured":"Mausam, Weld, D.S.: Solving concurrent Markov decision processes. In: Proceedings of the 19th National Conference on Artificial Intelligence, pp. 716\u2013722. AAAI Press (2004)"},{"key":"45_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1007\/3-540-62934-3_39","volume-title":"Distributed Artificial Intelligence Meets Machine Learning Learning in Multi-Agent Environments","author":"N Ono","year":"1997","unstructured":"Ono, N., Fukumoto, K.: A modular approach to multi-agent reinforcement learning. In: Wei\u00df, G. (ed.) Distributed Artificial Intelligence Meets Machine Learning Learning in Multi-Agent Environments. Lecture Notes in Computer Science, vol. 1221, pp. 25\u201339. Springer, Berlin (1997)"},{"key":"45_CR28","unstructured":"Parr, R., Russell, S.: Reinforcement learning with hierarchies of machines. In: Advances in Neural Information Processing Systems, vol. 10, pp. 1043\u20131049. MIT Press (1997)"},{"key":"45_CR29","first-page":"4","volume":"2003","author":"T Str\u00f6sslin","year":"2003","unstructured":"Str\u00f6sslin, T., Gerstner, W.: Reinforcement learning in continuous state and action space. Artif. Neural Netw. ICANN 2003, 4 (2003)","journal-title":"Artif. Neural Netw. ICANN"},{"key":"45_CR30","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R Sutton","year":"1999","unstructured":"Sutton, R., Precup, D., Singh, S.: Between MDPs and Semi-MDPs: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112, 181\u2013211 (1999)","journal-title":"Artif. Intell."},{"key":"45_CR31","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"issue":"1","key":"45_CR32","doi-asserted-by":"crossref","first-page":"2217","DOI":"10.1016\/j.procs.2010.04.248","volume":"1","author":"PT Tosic","year":"2010","unstructured":"Tosic, P.T., Vilalta, R.: A unified framework for reinforcement learning, co-learning and meta-learning how to coordinate in collaborative multi-agent systems. Proc. Comput. Sci. 1(1), 2217\u20132226 (2010)","journal-title":"Proc. Comput. Sci."},{"key":"45_CR33","unstructured":"Vasudevan, N., Venkatesh, P.: Design and implementation of a process migration system for the Linux environment. In: 3rd International Conference on Neural, Parallel and Scientific Computations. Atlanta, USA (2006)"},{"key":"45_CR34","unstructured":"Watkins, C.: Learning from delayed rewards. Ph.D. thesis, Cambridge University, Cambridge, England (1989)"},{"issue":"3","key":"45_CR35","first-page":"279","volume":"8","author":"C Watkins","year":"1992","unstructured":"Watkins, C., Dayan, P.: Technical Note: Q-Learning. Mach. Learn. 8(3), 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"45_CR36","doi-asserted-by":"crossref","unstructured":"Wu, B., Feng, Y., Zheng, H.: Model-based bayesian reinforcement learning in factored Markov decision process. J. Comput. 9(4), 845\u2013850 (2014)","DOI":"10.4304\/jcp.9.4.845-850"},{"issue":"3","key":"45_CR37","doi-asserted-by":"crossref","first-page":"170","DOI":"10.1109\/TAMD.2009.2037732","volume":"1","author":"C Yong","year":"2009","unstructured":"Yong, C., Miikkulainen, R.: Coevolution of role-based cooperation in multiagent systems. IEEE Trans. Auton. Ment. Dev. 1(3), 170\u2013186 (2009)","journal-title":"IEEE Trans. Auton. Ment. Dev."}],"updated-by":[{"DOI":"10.1007\/s40595-015-0047-8","type":"correction","label":"Correction","source":"publisher","updated":{"date-parts":[[2015,7,28]],"date-time":"2015-07-28T00:00:00Z","timestamp":1438041600000}}],"container-title":["Vietnam Journal of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s40595-015-0045-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s40595-015-0045-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s40595-015-0045-x","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,9]],"date-time":"2024-06-09T21:07:07Z","timestamp":1717967227000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s40595-015-0045-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,7,5]]},"references-count":37,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2015,11]]}},"alternative-id":["45"],"URL":"https:\/\/doi.org\/10.1007\/s40595-015-0045-x","relation":{},"ISSN":["2196-8888","2196-8896"],"issn-type":[{"value":"2196-8888","type":"print"},{"value":"2196-8896","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,7,5]]}}}