{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,12]],"date-time":"2025-06-12T04:49:06Z","timestamp":1749703746934,"version":"3.37.3"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,2,1]],"date-time":"2022-02-01T00:00:00Z","timestamp":1643673600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,2,1]],"date-time":"2022-02-01T00:00:00Z","timestamp":1643673600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Consortium for Robotics and Unmanned Systems Education and Research"},{"DOI":"10.13039\/100000086","name":"Directorate for Mathematical and Physical Sciences","doi-asserted-by":"publisher","award":["1916037"],"award-info":[{"award-number":["1916037"]}],"id":[{"id":"10.13039\/100000086","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Intell Robot Syst"],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s10846-021-01548-2","type":"journal-article","created":{"date-parts":[[2022,2,9]],"date-time":"2022-02-09T14:02:49Z","timestamp":1644415369000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Solving Reward-Collecting Problems with UAVs: A Comparison of Online Optimization and Q-Learning"],"prefix":"10.1007","volume":"104","author":[{"given":"Yixuan","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chrysafis","family":"Vogiatzis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruriko","family":"Yoshida","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Erich","family":"Morman","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,2,9]]},"reference":[{"key":"1548_CR1","unstructured":"Abadi, M., Agarwal, A., Barham, P., Brevdo, E., Chen, Z., Citro, C., Corrado, G.S., Davis, A., Dean, J., Devin, M., Ghemawat, S., Goodfellow, I., Harp, A., Irving, G., Isard, M., Jia, Y., Jozefowicz, R., Kaiser, L., Kudlur, M., Levenberg, J., Man\u00e9, D., Monga, R., Moore, S., Murray, D., Olah, C., Schuster, M., Shlens, J., Steiner, B., Sutskever, I., Talwar, K., Tucker, P., Vanhoucke, V., Vasudevan, V., Vi\u00e9gas, F., Vinyals, O., Warden, P., Wattenberg, M., Wicke, M., Yu, Y., Zheng, X.: TensorFlow: Large-scale machine learning on heterogeneous systems (2015)"},{"key":"1548_CR2","unstructured":"Belmega, V., Mertikopoulos, P., Negrel, R., Sanguinetti, L.: Online convex optimization and no-regret learning: Algorithms, guarantees and applications. (2018). arXiv:1804.04529"},{"key":"1548_CR3","volume-title":"Autonomous Vehicles in Support of Naval Operations","author":"NS Board","year":"2005","unstructured":"Board, N.S., Council, N.R., et al.: Autonomous Vehicles in Support of Naval Operations. National Academies Press, Washington, DC (2005)"},{"key":"1548_CR4","unstructured":"Bubeck, S.: Introduction to online optimization. Lecture, Introduction to Veterinary Studies, May 2, Department of Dragon Husbandry, Charlatan State University, Monogahela, WV (2011)"},{"key":"1548_CR5","unstructured":"Burkov, A.: The Hundred-Page Machine Learning Book, vol. 1. Andriy Burkov, Quebec City, Canada (2019)"},{"key":"1548_CR6","doi-asserted-by":"publisher","first-page":"113,820","DOI":"10.1016\/j.eswa.2020.113820","volume":"164","author":"S Carta","year":"2020","unstructured":"Carta, S., Ferreira, A., Podda, A.S., Recupero, D.R., Sanna, A.: Multi-dqn: An ensemble of deep q-learning agents for stock market forecasting. Expert Systems with Applications 164, 113,820 (2020)","journal-title":"Expert Systems with Applications"},{"key":"1548_CR7","doi-asserted-by":"publisher","unstructured":"Chen, X., Deng, X.: Settling the complexity of two-player nash equilibrium. In: 2006 47th Annual IEEE Symposium on Foundations of Computer Science (FOCS\u201906). pp. 261\u2013272 (2006). https:\/\/doi.org\/10.1109\/FOCS.2006.69","DOI":"10.1109\/FOCS.2006.69"},{"key":"1548_CR8","unstructured":"Chollet, F., et\u00a0al.: Keras. (2015). https:\/\/keras.io"},{"key":"1548_CR9","unstructured":"Darken, C., Chang, J., Moody, J., et\u00a0al.: Learning rate schedules for faster stochastic gradient search. In: Neural Networks for Signal Processing, vol. 2. Citeseer (1992)"},{"key":"1548_CR10","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1007\/978-3-642-02930-1_35","volume-title":"Automata, Languages and Programming","author":"C Daskalakis","year":"2009","unstructured":"Daskalakis, C., Papadimitriou, C.H.: On a network generalization of the minmax theorem. In: Albers, S., Marchetti-Spaccamela, A., Matias, Y., Nikoletseas, S., Thomas, W. (eds.) Automata, Languages and Programming, pp. 423\u2013434. Springer, Berlin, Heidelberg (2009)"},{"key":"1548_CR11","unstructured":"Defense Systems Information Analysis Center: Autonomous unmanned vehicles for casualty evacuation support. (2020). https:\/\/www.dsiac.org\/services\/technical-inquiries\/notable-ti\/autonomous-unmanned-vehicles-for-casualty-evacuation-support\/"},{"key":"1548_CR12","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1016\/j.artint.2014.11.009","volume":"247","author":"A Faust","year":"2017","unstructured":"Faust, A., Palunko, I., Cruz, P., Fierro, R., Tapia, L.: Automated aerial suspended cargo delivery through reinforcement learning. Artificial Intelligence 247, 381\u2013398 (2017)","journal-title":"Artificial Intelligence"},{"key":"1548_CR13","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-3766-0","volume-title":"Simulation-Based Optimization: Parametric Optimization Techniques and Reinforcement Learning","author":"A Gosavi","year":"2003","unstructured":"Gosavi, A.: Simulation-Based Optimization: Parametric Optimization Techniques and Reinforcement Learning. Kluwer Academic Publishers, Boston, MA (2003)"},{"issue":"2","key":"1548_CR14","doi-asserted-by":"publisher","first-page":"178","DOI":"10.1287\/ijoc.1080.0305","volume":"21","author":"A Gosavi","year":"2009","unstructured":"Gosavi, A.: Reinforcement learning: A tutorial survey and recent advances. INFORMS Journal on Computing 21(2), 178\u2013192 (2009)","journal-title":"INFORMS Journal on Computing"},{"key":"1548_CR15","unstructured":"Gurobi Optimization: Gurobi optimizer reference manual (2020)"},{"key":"1548_CR16","doi-asserted-by":"crossref","unstructured":"He, T., Goeckel, D., Raghavendra, R., Towsley, D.: Endhost-based shortest path routing in dynamic networks: An online learning approach. In: 2013 Proceedings IEEE INFOCOM, pp 2202\u20132210. IEEE (2013)","DOI":"10.1109\/INFCOM.2013.6567023"},{"key":"1548_CR17","unstructured":"Hoehn, J.R., Sayler, K.M.: Department of defense counter-unmanned aircraft systems (2020)"},{"key":"1548_CR18","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1016\/j.artint.2014.11.003","volume":"247","author":"F Ingrand","year":"2017","unstructured":"Ingrand, F., Ghallab, M.: Deliberation for autonomous robots: A survey. Artificial Intelligence 247, 10\u201344 (2017)","journal-title":"Artificial Intelligence"},{"issue":"3","key":"1548_CR19","first-page":"1","volume":"46","author":"B Li","year":"2014","unstructured":"Li, B., Hoi, S.C.: Online portfolio selection: A survey. ACM Computing Surveys (CSUR) 46(3), 1\u201336 (2014)","journal-title":"ACM Computing Surveys (CSUR)"},{"key":"1548_CR20","unstructured":"Maas, A.L., Hannun, A.Y., Ng, A.Y.: Rectifier nonlinearities improve neural network acoustic models. In: ICML Workshop on Deep Learning for Audio, Speech and Language Processing (2013)"},{"key":"1548_CR21","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., Riedmiller, A.: Playing atari with deep reinforcement learning. (2013). arXiv:1312.5602"},{"key":"1548_CR22","doi-asserted-by":"publisher","DOI":"10.1002\/9781118029176","volume-title":"Approximate Dynamic Programming: Solving the Curses of Dimensionality","author":"WB Powell","year":"2011","unstructured":"Powell, W.B.: Approximate Dynamic Programming: Solving the Curses of Dimensionality. John Wiley & Sons Inc, Hoboken, NJ (2011)"},{"key":"1548_CR23","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1016\/j.neunet.2018.02.010","volume":"107","author":"J Qiao","year":"2018","unstructured":"Qiao, J., Wang, G., Li, W., Chen, M.: An adaptive deep q-learning strategy for handwritten digit recognition. Neural Networks 107, 61\u201371 (2018)","journal-title":"Neural Networks"},{"key":"1548_CR24","unstructured":"Reddi, S., Kale, S., Kumar, S.: On the convergence of adam and beyond. In: International Conference on Learning Representations (2018)"},{"key":"1548_CR25","unstructured":"Shu, C.: Google acquires artificial intelligence startup deepmind for more than $500m (2014)"},{"key":"1548_CR26","volume-title":"Reinforcement learning an introduction","author":"R Sutton","year":"2018","unstructured":"Sutton, R., Barto, A.: Reinforcement learning an introduction. The MIT Press, Cambridge, MA (2018)"},{"key":"1548_CR27","unstructured":"The Robot Report: Autonomous Casualty Extraction program awarded to RE2 Robotics by U.S. Army. (2020). https:\/\/www.therobotreport.com\/autonomous-casualty-extraction-funding-awarded-re2-robotics-army\/"},{"key":"1548_CR28","doi-asserted-by":"crossref","unstructured":"Wang, Q., Guan, Y., Wang, X.: Svm-based spam filter with active and online learning. In: TREC. Citeseer (2006)","DOI":"10.6028\/NIST.SP.500-272.spam-harbin.zhao"},{"key":"1548_CR29","doi-asserted-by":"crossref","unstructured":"Watkins, C.J., Dayan, P.: Technical note. Reinforcement Learning. pp 55\u201368 (1992)","DOI":"10.1007\/978-1-4615-3618-5_4"},{"issue":"3","key":"1548_CR30","doi-asserted-by":"publisher","first-page":"401","DOI":"10.1007\/s10846-019-00991-6","volume":"96","author":"A Williams","year":"2019","unstructured":"Williams, A., Sebastian, B., Ben-Tzvi, P.: Review and analysis of search, extraction, evacuation, and medical field treatment robots. Journal of Intelligent & Robotic Systems 96(3), 401\u2013418 (2019)","journal-title":"Journal of Intelligent & Robotic Systems"},{"key":"1548_CR31","unstructured":"Zafrany, S.: Deep reinforcement learning the tour de flags test case. (2017). https:\/\/www.samyzaf.com\/ML\/tdf\/tdf.html"},{"issue":"1","key":"1548_CR32","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1109\/TSUSC.2017.2743704","volume":"4","author":"Q Zhang","year":"2017","unstructured":"Zhang, Q., Lin, M., Yang, L.T., Chen, Z., Li, P.: Energy-efficient scheduling for real-time systems based on deep q-learning model. IEEE Transactions on Sustainable Computing 4(1), 132\u2013141 (2017)","journal-title":"IEEE Transactions on Sustainable Computing"}],"container-title":["Journal of Intelligent &amp; Robotic Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-021-01548-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10846-021-01548-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-021-01548-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T05:07:56Z","timestamp":1726636076000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10846-021-01548-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2]]},"references-count":32,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["1548"],"URL":"https:\/\/doi.org\/10.1007\/s10846-021-01548-2","relation":{},"ISSN":["0921-0296","1573-0409"],"issn-type":[{"type":"print","value":"0921-0296"},{"type":"electronic","value":"1573-0409"}],"subject":[],"published":{"date-parts":[[2022,2]]},"assertion":[{"value":"20 January 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 November 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 February 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This paper does not contain any studies with human participants or animals performed by any of the authors.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to Participate"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to Publish"}},{"value":"The authors declare that they have no conflict of interest.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}],"article-number":"35"}}