{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T18:29:28Z","timestamp":1777487368606,"version":"3.51.4"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2013,1,8]],"date-time":"2013-01-08T00:00:00Z","timestamp":1357603200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"published-print":{"date-parts":[[2015,3]]},"DOI":"10.1007\/s10462-012-9383-6","type":"journal-article","created":{"date-parts":[[2013,1,7]],"date-time":"2013-01-07T00:49:39Z","timestamp":1357519779000},"page":"381-416","source":"Crossref","is-referenced-by-count":78,"title":["Application of reinforcement learning to routing in distributed wireless networks: a review"],"prefix":"10.1007","volume":"43","author":[{"given":"Hasan A. A.","family":"Al-Rawi","sequence":"first","affiliation":[]},{"given":"Ming Ann","family":"Ng","sequence":"additional","affiliation":[]},{"given":"Kok-Lim Alvin","family":"Yau","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2013,1,8]]},"reference":[{"issue":"4","key":"9383_CR1","doi-asserted-by":"crossref","first-page":"393","DOI":"10.1016\/S1389-1286(01)00302-4","volume":"38","author":"IF Akyildiz","year":"2002","unstructured":"Akyildiz IF, Su W, Sankarasubramaniam Y, Cayirci E (2002) Wireless sensor networks: a survey. Comput Netw 38(4):393\u2013422","journal-title":"Comput Netw"},{"issue":"5","key":"9383_CR2","doi-asserted-by":"crossref","first-page":"810","DOI":"10.1016\/j.adhoc.2009.01.001","volume":"7","author":"IF Akyildiz","year":"2009","unstructured":"Akyildiz IF, Lee WY, Chowdhury KR (2009) Cognitive radio ad hoc networks. Ad Hoc Netw 7(5):810\u2013836","journal-title":"Ad Hoc Netw"},{"key":"9383_CR3","doi-asserted-by":"crossref","unstructured":"Al-Rawi HAA, Yau K-LA (2012) Routing in distributed cognitive radio networks: a survey. Wirel Pers Commun Int J. doi: 10.1007\/s11277-012-0674-7","DOI":"10.1007\/s11277-012-0674-7"},{"key":"9383_CR4","doi-asserted-by":"crossref","first-page":"220","DOI":"10.1115\/1.3426922","volume":"97","author":"JS Albus","year":"1975","unstructured":"Albus JS (1975) A new approach to manipulator control: the cerebellar model articulation controller. J Dyn Syst Meas Control 97:220\u2013227","journal-title":"J Dyn Syst Meas Control"},{"key":"9383_CR5","doi-asserted-by":"crossref","unstructured":"Arroyo-Valles R, Alaiz-Rodriquez R, Guerrero-Curieses A, Cid-Sueiro J (2007) Q-probabilistic routing in wireless sensor networks. In: Proceedings of ISSNIP 3rd international conference intelligent sensors, sensor network and information processing, pp. 1\u20136","DOI":"10.1109\/ISSNIP.2007.4496810"},{"key":"9383_CR6","doi-asserted-by":"crossref","unstructured":"Baruah P, Urgaonkar R (2004) Learning-enforced time domain routing to mobile sinks in wireless sensor fields. In: Proceedings of LCN 29th annals IEEE international conference local computer networks, pp. 525\u2013532","DOI":"10.1109\/LCN.2004.71"},{"issue":"1","key":"9383_CR7","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1109\/TNET.2011.2159844","volume":"20","author":"AA Bhorkar","year":"2012","unstructured":"Bhorkar AA, Naghshvar M, Javidi T, Rao BD (2012) Adaptive opportunistic routing for wireless ad hoc networks. IEEE ACM Trans Netw 20(1):243\u2013256","journal-title":"IEEE ACM Trans Netw"},{"key":"9383_CR8","unstructured":"Boyan J, Littman ML (1994) Packet routing in dynamically changing networks: a reinforcement learning approach. In: Proceedings of NIPS Adv neural information processing systems, pp 671\u2013678"},{"key":"9383_CR9","volume-title":"Algorithms and protocols for wireless, mobile and ad hoc networks","author":"A Boukerche","year":"2009","unstructured":"Boukerche A (2009) Algorithms and protocols for wireless, mobile and ad hoc networks. Wiley, New Jersey"},{"issue":"6","key":"9383_CR10","doi-asserted-by":"crossref","first-page":"128","DOI":"10.1109\/MCOM.2003.1204759","volume":"41","author":"S Burleigh","year":"2003","unstructured":"Burleigh S, Hooke A, Torgerson L, Fall K, Cerf V, Durst B, Scott K, Weiss H (2003) Delay-tolerant networking: an approach to interplanetary internet. IEEE Commun Mag 41(6):128\u2013136","journal-title":"IEEE Commun Mag"},{"issue":"2","key":"9383_CR11","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1016\/S0004-3702(02)00121-2","volume":"136","author":"M Bowling","year":"2002","unstructured":"Bowling M, Veloso M (2002) Multiagent learning using a variable learning rate. Artif Intell 136(2):215\u2013250","journal-title":"Artif Intell"},{"key":"9383_CR12","unstructured":"Chang Y-H, Ho T, Kaelbling LP (2004) Mobilized ad-hoc networks: a reinforcement learning approach. In: Proceedings of ICAC international conference autonomic computer, pp 240\u2013247"},{"key":"9383_CR13","doi-asserted-by":"crossref","unstructured":"Chetret D, Tham C-K, Wong LWC (2004) Reinforcement learning and CMAC-based adaptive routing for MANETs. In: Proceedings of ICON 12th IEEE international conference networks, pp. 540\u2013544","DOI":"10.1109\/ICON.2004.1409226"},{"key":"9383_CR14","doi-asserted-by":"crossref","unstructured":"Clausen T, Jacquet P (2003) Optimized link state routing protocol (OLSR). IETF RFC 3626","DOI":"10.17487\/rfc3626"},{"key":"9383_CR15","unstructured":"Dearden R, Friedman N, Andre D (1999) Model based Bayesian exploration. In: Proceedings of UAI 15th conference uncertainty, artificial intelligence, pp 150\u2013159"},{"key":"9383_CR16","doi-asserted-by":"crossref","unstructured":"Di Felice M, Chowdhury KR, Wu C, Bononi L, Meleis W (2010) Learning-based spectrum selection in cognitive radio ad hoc networks. In: Proceedings of WWIC 8th international conference wired wireless internet communications, pp 133\u2013145","DOI":"10.1007\/978-3-642-13315-2_11"},{"key":"9383_CR17","doi-asserted-by":"crossref","unstructured":"Dong S, Agrawal P, Sivalingam K (2007) Reinforcement learning based geographic routing protocol for UWB wireless sensor network. In: Proceedings of GLOBECOM IEEE global telecommunications conference, pp 652\u2013656","DOI":"10.1109\/GLOCOM.2007.127"},{"issue":"3","key":"9383_CR18","doi-asserted-by":"crossref","first-page":"360","DOI":"10.1109\/TSMCA.2005.846390","volume":"35","author":"J Dowling","year":"2005","unstructured":"Dowling J, Curran E, Cunningham R, Cahill V (2005) Using feedback in collaborative reinforcement learning to adaptively optimize MANET routing. IEEE Trans Syst Man Cybern Part A Syst Hum 35(3):360\u2013372","journal-title":"IEEE Trans Syst Man Cybern Part A Syst Hum"},{"key":"9383_CR19","doi-asserted-by":"crossref","unstructured":"Elwhishi A, Ho P-H, Naik K, Shihada B (2010) ARBR: Adaptive reinforcement-based routing for DTN. In: Proceedings of WIMOB IEEE 6th international conference wireless and mobile computes, networks and communications, pp. 376\u2013385","DOI":"10.1109\/WIMOB.2010.5645040"},{"key":"9383_CR20","doi-asserted-by":"crossref","unstructured":"Forster A (2007) Machine learning techniques applied to wireless ad-hoc networks: guide and survey. In: Proceedings of ISSNIP 3rd international conference intelligent sensors, sensor Networks and information, pp. 365\u2013370","DOI":"10.1109\/ISSNIP.2007.4496871"},{"key":"9383_CR21","doi-asserted-by":"crossref","unstructured":"Forster A, Murphy AL (2007) FROMS: Feedback routing for optimizing multiple sinks in WSN with reinforcement learning. In: Proceedings of ISSNIP 3rd international conference intelligent sensors, sensor Networks and, informations, pp. 371\u2013376","DOI":"10.1109\/ISSNIP.2007.4496872"},{"key":"9383_CR22","doi-asserted-by":"crossref","unstructured":"Forster A, Murphy AL, Schiller J, Terfloth K (2008) An efficient implementation of reinforcement learning based routing on real WSN hardware. In: Proceedings of WIMOB IEEE international conference wireless and mobile computers, networks and communcations, pp 247\u2013252","DOI":"10.1109\/WiMob.2008.99"},{"key":"9383_CR23","doi-asserted-by":"crossref","unstructured":"Fu P, Li J, Zhang D (2005) Heuristic and distributed QoS route discovery for mobile ad hoc networks. In: Proceedings of the CIT 5th international conference on computer and information technology, pp. 512\u2013516","DOI":"10.1007\/11563952_38"},{"key":"9383_CR24","doi-asserted-by":"crossref","DOI":"10.1002\/9780470172261","volume-title":"Genetic algorithms and engineering optimization","author":"M Gen","year":"1999","unstructured":"Gen M, Cheng R (1999) Genetic algorithms and engineering optimization. Wiley, NY"},{"key":"9383_CR25","doi-asserted-by":"crossref","unstructured":"Hao S, Wang T (2006) Sensor networks routing via Bayesian exploration. In: Proceedings of LCN 31th annals of IEEE international conference local computing Networks, pp. 954\u2013955","DOI":"10.1109\/LCN.2006.322207"},{"issue":"6","key":"9383_CR26","doi-asserted-by":"crossref","first-page":"796","DOI":"10.1109\/TMC.2010.28","volume":"9","author":"T Hu","year":"2010","unstructured":"Hu T, Fei Y (2010) QELAR: a machine-learning-based adaptive routing protocol for energy-efficient and lifetime-extended underwater sensor networks. IEEE Trans Mobile Comput 9(6):796\u2013809","journal-title":"IEEE Trans Mobile Comput"},{"issue":"1","key":"9383_CR27","doi-asserted-by":"crossref","first-page":"2","DOI":"10.1109\/TNET.2002.808417","volume":"11","author":"C Intanagonwiwat","year":"2003","unstructured":"Intanagonwiwat C, Govindan R, Estrin D, Heidemann J, Silva F (2003) Directed diffusion for wireless sensor networking. IEEE ACM Trans Netw 11(1):2\u201316","journal-title":"IEEE ACM Trans Netw"},{"key":"9383_CR28","doi-asserted-by":"crossref","unstructured":"Kennedy J, Eberhart R (1995) Particle swarm optimization. In: Proceedings of IEEE international conference neural networks. pp 1942\u20131948","DOI":"10.1109\/ICNN.1995.488968"},{"key":"9383_CR29","unstructured":"Kumar S, Miikkulainen R (1997) Dual reinforcement Q-routing: an on-line adaptive routing algorithm. In: Proceedings of ANNIE artificial neural networks in engineering conference. pp 231\u2013238"},{"key":"9383_CR30","unstructured":"Liang X, Balasingham I, Byun S-S (2008) A multi-agent reinforcement learning based routing protocol for wireless sensor networks. In: Proceedings of ISWCS IEEE international symposium Wireless communications systems. pp 552\u2013557"},{"issue":"1","key":"9383_CR31","doi-asserted-by":"crossref","first-page":"102","DOI":"10.1109\/TWC.2010.111910.091238","volume":"10","author":"Z Lin","year":"2011","unstructured":"Lin Z, Schaar Mvd (2011) Autonomic and distributed joint routing and power control for delay-sensitive applications in multi-hop wireless networks. IEEE Tran Wirel Commun 10(1):102\u2013113","journal-title":"IEEE Tran Wirel Commun"},{"key":"9383_CR32","doi-asserted-by":"crossref","unstructured":"Naruephiphat W, Usaha W (2008) Balancing tradeoffs for energy-efficient routing MANETs based on reinforcement learning. In: Proceedings of VTC spring IEEE vehicular techmology conference. pp 2361\u20132365","DOI":"10.1109\/VETECS.2008.523"},{"key":"9383_CR33","doi-asserted-by":"crossref","unstructured":"Nurmi P (2007) Reinforcement learning for routing in ad hoc networks. In: Proceedings of WiOpt 5th international symposium modeling and optimization in mobile, ad hoc and wireless network and workshops, pp 1\u20138","DOI":"10.1109\/WIOPT.2007.4480049"},{"key":"9383_CR34","unstructured":"Ouzecki D, Jevtic D (2010) Reinforcement learning as adaptive network routing of mobile agents. In: Proceedings of MIPRO 33rd international convention, pp 479\u2013484"},{"key":"9383_CR35","unstructured":"Perkins CE, Royer EM (1999) Ad-hoc on-demand distance vector routing. In: Proceedings of WMCSA mobile computers systems and applications, pp 90\u2013100"},{"key":"9383_CR36","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-61068-4","volume-title":"Neural networks: a systematic introduction","author":"R Rojas","year":"1996","unstructured":"Rojas R (1996) Neural networks: a systematic introduction. Springer, NY"},{"key":"9383_CR37","doi-asserted-by":"crossref","unstructured":"Santhi G, Nachiappan A, Ibrahime MZ, Raghunadhane R, Favas MK (2011) Q-learning based adaptive QoS routing protocol for MANETs. In: Proceedings of ICRTIT international conference recent trends in information technology, pp 1233\u20131238","DOI":"10.1109\/ICRTIT.2011.5972411"},{"key":"9383_CR38","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"key":"9383_CR39","volume-title":"Practical mathematical optimization: an introduction to basic optimization theory and classical and new gradient-based algorithms","author":"A Snyman","year":"2005","unstructured":"Snyman A (2005) Practical mathematical optimization: an introduction to basic optimization theory and classical and new gradient-based algorithms. Springer, NY"},{"key":"9383_CR40","volume-title":"Ad hoc mobile wireless networks: protocols and systems","author":"CK Toh","year":"2001","unstructured":"Toh CK (2001) Ad hoc mobile wireless networks: protocols and systems. Prentice Hall, New Jersey"},{"key":"9383_CR41","doi-asserted-by":"crossref","unstructured":"Usaha W (2004) A reinforcement learning approach for path discovery in MANETs with path caching strategy. In: Proceedings of ISWCS 1st international symposium wireless communications systems, pp 220\u2013224","DOI":"10.1109\/ISWCS.2004.1407241"},{"key":"9383_CR42","doi-asserted-by":"crossref","unstructured":"Xia B, Wahab MH, Yang Y, Fan Z, Sooriyabandara M (2009) Reinforcement learning based spectrum-aware routing in multi-hop cognitive radio networks. In: Proceedings of CROWNCOM 4th international conference cognitive radio oriented wireless networks and communications, pp 1\u20135","DOI":"10.1109\/CROWNCOM.2009.5189189"},{"issue":"1","key":"9383_CR43","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1016\/j.jnca.2011.08.007","volume":"35","author":"K-LA Yau","year":"2012","unstructured":"Yau K-LA, Komisarczuk P, Teal PD (2012) Reinforcement learning for context awareness and intelligence in wireless networks: review, new features and open issues. J Netw Comput Appl 35(1):253\u2013267","journal-title":"J Netw Comput Appl"},{"issue":"5","key":"9383_CR44","doi-asserted-by":"crossref","first-page":"577","DOI":"10.1109\/TAC.2005.847060","volume":"50","author":"GG Yin","year":"2005","unstructured":"Yin GG, Krishnamurthy V (2005) Least mean square algorithms with markov regime-switching limit. IEEE Trans Autom Control 50(5):577\u2013593","journal-title":"IEEE Trans Autom Control"},{"issue":"3","key":"9383_CR45","doi-asserted-by":"crossref","first-page":"1899","DOI":"10.1109\/TVT.2007.907023","volume":"57","author":"FR Yu","year":"2008","unstructured":"Yu FR, Wong VWS, Leong VCM (2008) A new QoS provisioning method for adaptive multimedia in wireless networks. IEEE Trans Veh Technol 57(3):1899\u20131909","journal-title":"IEEE Trans Veh Technol"},{"key":"9383_CR46","doi-asserted-by":"crossref","unstructured":"Zhang Y, Fromherz M (2006) Constrained flooding: a robust and efficient routing framework for wireless sensor networks. In: Proceedings of AINA 20th international conference advanced information networking and applications","DOI":"10.1109\/AINA.2006.132"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-012-9383-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10462-012-9383-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-012-9383-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,7,8]],"date-time":"2019-07-08T01:35:45Z","timestamp":1562549745000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10462-012-9383-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,1,8]]},"references-count":46,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2015,3]]}},"alternative-id":["9383"],"URL":"https:\/\/doi.org\/10.1007\/s10462-012-9383-6","relation":{},"ISSN":["0269-2821","1573-7462"],"issn-type":[{"value":"0269-2821","type":"print"},{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,1,8]]}}}