{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T04:19:31Z","timestamp":1772338771851,"version":"3.50.1"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030352875","type":"print"},{"value":"9783030352882","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-35288-2_5","type":"book-chapter","created":{"date-parts":[[2019,11,24]],"date-time":"2019-11-24T19:02:57Z","timestamp":1574622177000},"page":"54-65","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":20,"title":["An Empirical Study of Reward Structures for Actor-Critic Reinforcement Learning in Air Combat Manoeuvring Simulation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9016-8912","authenticated-orcid":false,"given":"Budi","family":"Kurniawan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8687-4424","authenticated-orcid":false,"given":"Peter","family":"Vamplew","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1184-8376","authenticated-orcid":false,"given":"Michael","family":"Papasimeon","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6199-9685","authenticated-orcid":false,"given":"Richard","family":"Dazeley","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2537-0326","authenticated-orcid":false,"given":"Cameron","family":"Foale","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,11,25]]},"reference":[{"key":"5_CR1","unstructured":"Alford, R., Borck, H., Karneeb, J., Aha, D.: Active behavior recognition in beyond visual range air combat. In: Proceedings of the Third Annual Conference on Advances in Cognitive Systems (2015)"},{"key":"5_CR2","first-page":"833","volume":"9","author":"A Barto","year":"1983","unstructured":"Barto, A., Sutton, R.S., Anderson, C.W.: Neuron-like adaptive elements that can solve difficult learning control problems. IEEE Trans. Syst. Man Cybern. 9, 833\u2013836 (1983)","journal-title":"IEEE Trans. Syst. Man Cybern."},{"key":"5_CR3","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Louradour, J., Collobert, R., Weston, J.: Curriculumm learning. In: Proceedings of the 26th Annual International Conference on Machine Learning, pp. 41\u201348 (2009)","DOI":"10.1145\/1553374.1553380"},{"key":"5_CR4","doi-asserted-by":"crossref","unstructured":"Fang, J., Yan, W.J., Fang, W.: Air combat strategies of CGF based on Q-learning and behavior tree. DEStech Trans. Eng. Technol. Res. (2017)","DOI":"10.12783\/dtetr\/iceeac2017\/10729"},{"key":"5_CR5","doi-asserted-by":"crossref","unstructured":"Floyd, M.W., Karneeb, J., Moore, P., Aha, D.W.: A goal reasoning agent for controlling UAVs in beyond-visual-range air combat. In: Proceedings 26th International Joint Conference on Artificial Intelligence (2017)","DOI":"10.24963\/ijcai.2017\/657"},{"key":"5_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"360","DOI":"10.1007\/978-3-642-04921-7_37","volume-title":"Adaptive and Natural Computing Algorithms","author":"M Grze\u015b","year":"2009","unstructured":"Grze\u015b, M., Kudenko, D.: Improving optimistic exploration in model-free reinforcement learning. In: Kolehmainen, M., Toivanen, P., Beliczynski, B. (eds.) ICANNGA 2009. LNCS, vol. 5495, pp. 360\u2013369. Springer, Heidelberg (2009). \nhttps:\/\/doi.org\/10.1007\/978-3-642-04921-7_37"},{"key":"5_CR7","doi-asserted-by":"crossref","unstructured":"Heinze, C., Papasimeon, M., Goss, S., Cross, M., Connell, R.: Simulating fighter pilots. In: Defence Industry Applications of Autonomous Agents and Multi-Agent Systems, pp. 113\u2013130 (2007)","DOI":"10.1007\/978-3-7643-8571-2_7"},{"key":"5_CR8","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1007\/978-3-642-33926-4_49","volume-title":"Intelligent Autonomous Systems","author":"D Lee","year":"2013","unstructured":"Lee, D., Bang, H.: Planar evasive aircrafts maneuvers using reinforcement learning. In: Lee, S., Cho, H., Yoon, K.J., Lee, J. (eds.) Intelligent Autonomous Systems. AISC, vol. 193, pp. 533\u2013542. Springer, Heidelberg (2013). \nhttps:\/\/doi.org\/10.1007\/978-3-642-33926-4_49"},{"key":"5_CR9","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"274","DOI":"10.1007\/978-981-10-6463-0_24","volume-title":"Modeling, Design and Simulation of Systems","author":"P Liu","year":"2017","unstructured":"Liu, P., Ma, Y.: A deep reinforcement learning based intelligent decision method for UCAV air combat. In: Mohamed Ali, M.S., Wahid, H., Mohd Subha, N.A., Sahlan, S., Md. Yunus, M.A., Wahap, A.R. (eds.) AsiaSim 2017. CCIS, vol. 751, pp. 274\u2013286. Springer, Singapore (2017). \nhttps:\/\/doi.org\/10.1007\/978-981-10-6463-0_24"},{"key":"5_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1007\/978-3-030-03098-8_2","volume-title":"PRIMA 2018: Principles and Practice of Multi-Agent Systems","author":"M Masek","year":"2018","unstructured":"Masek, M., Lam, C.P., Benke, L., Kelly, L., Papasimeon, M.: Discovering emergent agent behaviour with evolutionary finite state machines. In: Miller, T., Oren, N., Sakurai, Y., Noda, I., Savarimuthu, B.T.R., Cao Son, T. (eds.) PRIMA 2018. LNCS, vol. 11224, pp. 19\u201334. Springer, Cham (2018). \nhttps:\/\/doi.org\/10.1007\/978-3-030-03098-8_2"},{"key":"5_CR11","doi-asserted-by":"publisher","first-page":"1641","DOI":"10.2514\/1.46815","volume":"33","author":"J McGrew","year":"2010","unstructured":"McGrew, J., How, J.P., Williams, B., Roy, N.: Air-combat strategy using approximate dynamic programming. J. Guidance Control Dyn. 33, 1641\u20131654 (2010)","journal-title":"J. Guidance Control Dyn."},{"key":"5_CR12","unstructured":"Mizokami, K.: This chart explains how crazy-expensive fighter jets Have Gotten. Popular Mechanics, March 2017"},{"key":"5_CR13","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518, 29\u201333 (2015)","journal-title":"Nature"},{"key":"5_CR14","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: Proceedings 33rd International Conference on Machine Learning, vol. 48, pp. 1928\u20131937 (2016)"},{"key":"5_CR15","doi-asserted-by":"crossref","unstructured":"Mouton, H., Roodt, J., le Roux, H.: Applying reinforcement learning to the weapon assignment problem in air defence. Scientia Militaria S. Afr. J. Mil. Stud. 123\u2013140 (2011)","DOI":"10.5787\/39-2-115"},{"issue":"2","key":"5_CR16","doi-asserted-by":"publisher","first-page":"204","DOI":"10.5139\/IJASS.2016.17.2.204","volume":"17","author":"H Park","year":"2016","unstructured":"Park, H., Lee, B., Takh, M.: Differential game based air combat maneuver generation using scoring function matrix. Int. J. Aeronaut. Space Sci. 17(2), 204\u2013213 (2016)","journal-title":"Int. J. Aeronaut. Space Sci."},{"key":"5_CR17","unstructured":"Ramirez, M., et al.: Integrated hybrid planning and programmed control for real time UAV maneuvering. In: Proceedings 17th International Conference on Autonomous Agents and MultiAgent Systems, pp. 1318\u20131326 (2018)"},{"issue":"3","key":"5_CR18","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1016\/0898-1221(92)90217-6","volume":"24","author":"EY Rodin","year":"1982","unstructured":"Rodin, E.Y., Amin, S.M.: Maneuver prediction in air combat via artificial neural networks. Comput. Math. Appl. 24(3), 95\u2013112 (1982)","journal-title":"Comput. Math. Appl."},{"key":"5_CR19","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"DM Roijers","year":"2013","unstructured":"Roijers, D.M., Vamplew, P., Whiteson, S., Dazeley, R.: A survey of multi-objective sequential decision-making. J. Artif. Intell. Res. 48, 67\u2013113 (2013)","journal-title":"J. Artif. Intell. Res."},{"key":"5_CR20","unstructured":"Shalev, S., Shammah, S., Shashua, A.: Safe, multi-agent, reinforcement learning for autonomous driving: CoRR, vol abs\/1610.03295 (2016)"},{"key":"5_CR21","volume-title":"Fighter Combat: Tactics and Maneuvering","author":"RL Shaw","year":"1985","unstructured":"Shaw, R.L.: Fighter Combat: Tactics and Maneuvering. Naval Institute Press, Annapolis (1985)"},{"key":"5_CR22","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1007\/11575726_13","volume-title":"Agent-Mediated Electronic Commerce VI. Theories for and Engineering of Distributed Mechanisms and Systems","author":"AA Sherstov","year":"2006","unstructured":"Sherstov, A.A., Stone, P.: Three automated stock-trading agents: a comparative study. In: Faratin, P., Rodr\u00edguez-Aguilar, J.A. (eds.) AMEC 2004. LNCS (LNAI), vol. 3435, pp. 173\u2013187. Springer, Heidelberg (2006). \nhttps:\/\/doi.org\/10.1007\/11575726_13"},{"key":"5_CR23","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529, 484 (2016)","journal-title":"Nature"},{"key":"5_CR24","volume-title":"Reinforcement Learning: An Introduction","author":"R Sutton","year":"2018","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning: An Introduction, 2nd edn. MIT Press, Cambridge (2018)","edition":"2"},{"key":"5_CR25","unstructured":"Sutton, R., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Advances in Neural Information Processing Systems, pp. 1057\u20131063 (2000)"},{"key":"5_CR26","doi-asserted-by":"crossref","unstructured":"Teng, T.H., Tan, A.H., Tan, Y.S., Yeo, A.: Self-organizing neural networks for learning air combat maneuvers: In: IEEE World Congress on Computational Intelligence, Brisbane, Australia (2012)","DOI":"10.1109\/IJCNN.2012.6252763"},{"key":"5_CR27","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/s10994-010-5232-5","volume":"84","author":"P Vamplew","year":"2011","unstructured":"Vamplew, P., Dazeley, R., Berry, A., Issabekov, R., Dekker, E.: Empirical evaluation methods for multiobjective reinforcement learning algorithms. Mach. Learn. 84, 51 (2011)","journal-title":"Mach. Learn."},{"key":"5_CR28","unstructured":"Vinberg, D.: Guided reinforcement learning applied to air-combat simulation. Master \u2019s thesis. Royal Institute of Technology, Sweden (2010)"},{"key":"5_CR29","unstructured":"Wharington, J.: Autonomous control of soaring aircraft by reinforcement learning. Doctorate thesis at Royal Melbourne Institute of Technology (1998)"}],"container-title":["Lecture Notes in Computer Science","AI 2019: Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-35288-2_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,11,24]],"date-time":"2019-11-24T19:28:35Z","timestamp":1574623715000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-35288-2_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030352875","9783030352882"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-35288-2_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"25 November 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australasian Joint Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Adelaide, SA","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"32","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ausai2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/nugget.unisa.edu.au\/AI2019\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"115","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"48","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"42% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}