{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T20:58:53Z","timestamp":1777323533659,"version":"3.51.4"},"publisher-location":"London","reference-count":44,"publisher":"Springer London","isbn-type":[{"value":"9780857291295","type":"print"},{"value":"9780857291301","type":"electronic"}],"license":[{"start":{"date-parts":[[2010,10,29]],"date-time":"2010-10-29T00:00:00Z","timestamp":1288310400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2010,10,29]],"date-time":"2010-10-29T00:00:00Z","timestamp":1288310400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-0-85729-130-1_31","type":"book-chapter","created":{"date-parts":[[2010,11,11]],"date-time":"2010-11-11T19:14:28Z","timestamp":1289502868000},"page":"409-422","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["Reinforcement Learning for Scheduling of Maintenance"],"prefix":"10.1007","author":[{"given":"Michael","family":"Knowles","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Baglee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stefan","family":"Wermter","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2010,10,29]]},"reference":[{"issue":"2","key":"31_CR1","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1016\/S0951-8320(01)00148-X","volume":"76","author":"A. Grall","year":"2002","unstructured":"Grall A., Berenguer C., Dieulle L.: A condition-based maintenance policy for stochastically deteriorating systems. Reliability Engineering & System Safety, Volume 76, Issue 2, Pages 167-180, ISSN 0951-8320, DOI: 10.1016\/S0951-8320(01)00148-X.(2002)","journal-title":"Reliability Engineering & System Safety"},{"key":"31_CR2","unstructured":"Bengtsson M.: Standardization Issues in Condition Based Maintenance. In Condition Monitoring and Diagnostic Engineering Management - Proceedings of the 16th International Congress, August 27-29, 2003, V\u00e4xj\u00f6 University, Sweden, Edited by Shrivastav, O. and Al-Najjar, B., V\u00e4xj\u00f6 University Press, ISBN 91-7636-376-7. (2003)"},{"key":"31_CR3","unstructured":"Davies A. (Ed): Handbook of Condition Monitoring - Techniques and Methodology. Springer, 1998 978-0-412-61320-3.(1997)"},{"key":"31_CR4","unstructured":"Barron R. (Ed): Engineering Condition Monitoring: Practice, Methods and Applications. Longman, 1996, 978-0582246560.(1996)"},{"issue":"6","key":"31_CR5","doi-asserted-by":"publisher","first-page":"1425","DOI":"10.1080\/002075400188933","volume":"38","author":"W. Wang","year":"2000","unstructured":"Wang W.: A model to determine the optimal critical level and the monitoring intervals in condition-based maintenance. International Journal of Production Research, volume 38 No 6 pp 1425\u20131436. (2000)","journal-title":"International Journal of Production Research"},{"key":"31_CR6","unstructured":"Meier A.: Is that old refrigerator worth saving? Home Energy Magazine http:\/\/homeenergy.org\/archive\/hem.dis.anl.gov\/eehem\/93\/930107.html(1993)"},{"key":"31_CR7","unstructured":"Litt B., Megowen A. and Meier A.: Maintenance doesn\u2019t necessarily lower energy use. Home Energy Magazine http:\/\/homeenergy.org\/archive\/hem.dis.anl.gov\/eehem\/93\/930108.html. (1993)"},{"key":"31_CR8","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1016\/j.enpol.2008.08.021","volume":"37","author":"K.A. Techato","year":"2009","unstructured":"Techato K-A, Watts D.J. and Chaiprapat S.: Life cycle analysis of retrofitting with high energy efficiency air-conditioner and fluorescent lamp in existing buildings. Energy Policy, Vol. 37, pp 318 \u2013 325. (2009)","journal-title":"Energy Policy"},{"key":"31_CR9","unstructured":"Boardman B., Lane K., Hinnells M., Banks N., Milne G., Goodwin A. and Fawcett T.: Transforming the UK Cold Market Domestic Equipment and Carbon Dioxide Emissions (DECADE) Report. (1997)"},{"key":"31_CR10","unstructured":"Knowles M.J. and Baglee D.:The Role of Maintenance in Energy Saving, 19th MIRCE International Symposium on Engineering and Managing Sustainability - A Reliability, Maintainability and Supportability Perspective, (2009)"},{"key":"31_CR11","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1613\/jair.859","volume":"16","author":"S. Singh","year":"2002","unstructured":"Singh, S. Litman, D., Kearns M ., and Walker, M. Optimizing Dialogue Management with Reinforcement Learning: Experiments with the NJFun System. In Journal of Artificial Intelligence Research (JAIR),Volume 16, pp. 105-133. (2002)","journal-title":"In Journal of Artificial Intelligence Research (JAIR)"},{"key":"31_CR12","doi-asserted-by":"crossref","unstructured":"Altahhan A., Burn K. Wermter S.: Visual Robot Homing using Sarsa(\u03bb), Whole Image Measure, and Radial Basis Function. Proceedings IEEE IJCNN (2008)","DOI":"10.1109\/IJCNN.2008.4634353"},{"key":"31_CR13","unstructured":"Altahhan A.: Conjugate Temporal Difference Methods For Visual Robot Homing. PhD Thesis, University of Sunderland. (2009)"},{"key":"31_CR14","unstructured":"Lazaric, A., M. Restelli, Bonarini A.: Reinforcement Learning in Continuous Action Spaces through Sequential Monte Carlo Methods. Twenty First Annual Conference on Neural Information Processing Systems \u2013 NIPS. (2007)"},{"key":"31_CR15","doi-asserted-by":"crossref","unstructured":"Sheynikhovich, D., Chavarriaga R., Strosslin T. and Gerstner W.: Spatial Representation and Navigation in a Bio-inspired Robot. Biomimetic Neural Learning for Intelligent Robots. S. Wermter, M. Elshaw and G.Palm, Springer: 245-265. (2005)","DOI":"10.1007\/11521082_15"},{"key":"31_CR16","doi-asserted-by":"crossref","unstructured":"Asadpour, M. and Siegwart, R.: Compact Q-learning optimized for micro-robots with processing and memory constrains. Robotics and Autonomous Systems, Science Direct, Elsevier. (2004)","DOI":"10.1016\/j.robot.2004.05.006"},{"key":"31_CR17","volume-title":"8th International Conference on Hybrid Intelligent Systems","author":"M.J. Knowles","year":"2008","unstructured":"Knowles, M.J. and Wermter, S.: The Hybrid Integration of Perceptual Symbol Systems and Interactive Reinforcement Learning. 8th International Conference on Hybrid Intelligent Systems. Barcelona, Spain, September 10-12th, (2008)"},{"key":"31_CR18","first-page":"203","volume-title":"Cognitive Computation","author":"D. Muse","year":"2009","unstructured":"Muse, D. and Wermter, S.: Actor-Critic Learning for Platform-Independent Robot Navigation. Cognitive Computation, Volume 1, Springer New York, pp. 203-220, (2009)"},{"key":"31_CR19","doi-asserted-by":"crossref","first-page":"119","DOI":"10.5772\/54","volume-title":"Reinforcement Learning: Theory and Applications","author":"C. Weber","year":"2008","unstructured":"Weber, C., Elshaw, M., Wermter, S., Triesch J. and Willmot, C.: Reinforcement Learning Embedded in Brains and Robots, In: Weber, C., Elshaw M., and Mayer N. M. (Eds.) Reinforcement Learning: Theory and Applications. pp. 119-142, I-Tech Education and Publishing, Vienna, Austria. (2008)"},{"issue":"3","key":"31_CR20","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1177\/105971230501300301","volume":"13","author":"P. Stone","year":"2005","unstructured":"Stone, P., Sutton R. S. and Kuhlmann G.: Reinforcement learning for robocup soccer keepaway. International Society for Adaptive Behavior 13(3): 165\u2013188 (2005)","journal-title":"International Society for Adaptive Behavior"},{"key":"31_CR21","volume-title":"The Autonomous Agents and Multi-Agent Systems Conference (AAMAS-07)","author":"M.E. Taylor","year":"2007","unstructured":"Taylor M.E. and Stone P.: Towards reinforcement learning representation transfer. In The Autonomous Agents and Multi-Agent Systems Conference (AAMAS-07), Honolulu, Hawaii. (2007)"},{"key":"31_CR22","doi-asserted-by":"crossref","unstructured":"Kalyanakrishnan S., Liu Y. and Stone P.: Half Field Offense in RoboCup Soccer: A Multiagent Reinforcement Learning Case Study. Lecture Notes In Computer Science, Springer (2007)","DOI":"10.1007\/978-3-540-74024-7_7"},{"key":"31_CR23","doi-asserted-by":"crossref","unstructured":"Lokuge, P. and Alahakoon, D.: Reinforcement learning in neuro BDI agents for achieving agent's intentions in vessel berthing applications 19th International Conference on Advanced Information Networking and Applications, 2005. AINA 2005. Volume: 1 Digital Object Identifier: 10.1109\/AINA.2005.293, Page(s): 681 - 686 vol.1(2005)","DOI":"10.1109\/AINA.2005.293"},{"key":"31_CR24","doi-asserted-by":"crossref","unstructured":"Cong Shi, Shicong Meng, Yuanjie Liu, Dingyi Han and Yong Yu: Reinforcement Learning for Query-Oriented Routing Indices in Unstructured Peer-to-Peer Networks, Sixth IEEE International Conference on Peer-to-Peer Computing P2P 2006, Digital Object Identifier: 10.1109\/P2P.2006.30, Page(s): 267 - 274 (2006)","DOI":"10.1109\/P2P.2006.30"},{"key":"31_CR25","unstructured":"Cong Shi, Shicong Meng, Yuanjie Liu, Dingyi Han and Yong Yu: Reinforcement Learning for Query-Oriented Routing Indices in Unstructured Peer-to-Peer Networks, Sixth IEEE International Conference on Peer-to-Peer Computing, 2006. P2P 2006.Digital Object Identifier: 10.1109\/P2P.2006, Page(s): 267 - 274 (2006)."},{"key":"31_CR26","doi-asserted-by":"crossref","unstructured":"Mattila, V.: Flight time allocation for a fleet of aircraft through reinforcement learning. Simulation Conference, 2007 Winter, Digital Object Identifier: 10.1109\/WSC.2007.4419888 Page(s): 2373 - 2373 (2007)","DOI":"10.1109\/WSC.2007.4419888"},{"key":"31_CR27","doi-asserted-by":"crossref","unstructured":"Zhang, Y. and Fromherz, M.: Constrained flooding: a robust and efficient routing framework for wireless sensor networks, 20th International Conference on Advanced Information Networking and Applications, 2006. AINA 2006.Volume: 1 Digital Object Identifier: 10.1109\/AINA.2006.132 (2006)","DOI":"10.1109\/AINA.2006.132"},{"key":"31_CR28","doi-asserted-by":"crossref","unstructured":"Chasparis, G.C. and Shamma, J.S.: Efficient network formation by distributed reinforcement 47th IEEE Conference on Decision and Control, 2008. CDC 2008. Digital Object Identifier: 10.1109\/CDC.2008.4739163, Page(s): 1690 - 1695 (2008).","DOI":"10.1109\/CDC.2008.4739163"},{"key":"31_CR29","doi-asserted-by":"crossref","unstructured":"Usynin, A., Hines, J.W. and Urmanov, A.: Prognostics-Driven Optimal Control for Equipment Performing in Uncertain Environment Aerospace Conference, 2008 IEEE Digital Object Identifier: 10.1109\/AERO.2008.4526626, Page(s): 1 \u2013 9 (2008)","DOI":"10.1109\/AERO.2008.4526626"},{"key":"31_CR30","doi-asserted-by":"crossref","unstructured":"Lihu, A.and Holban, S.: Top five most promising algorithms in scheduling. 5th International Symposium on Applied Computational Intelligence and Informatics, 2009. SACI '09. Digital Object Identifier: 10.1109\/SACI.2009.5136281, Page(s): 397 - 404 (2009).","DOI":"10.1109\/SACI.2009.5136281"},{"key":"31_CR31","doi-asserted-by":"crossref","unstructured":"Zhang Huiliang and Huang Shell Ying: BDIE architecture for rational agents.. International Conference on Integration of Knowledge Intensive Multi-Agent Systems, Page(s): 623 \u2013 628 (2005)","DOI":"10.1109\/KIMAS.2005.1427156"},{"key":"31_CR32","doi-asserted-by":"crossref","unstructured":"Malhotra, R., Blasch, E.P. and Johnson, J.D.: Learning sensor-detection policies ., Proceedings of the IEEE 1997 National Aerospace and Electronics Conference, 1997. NAECON 1997Volume: 2 Digital Object Identifier: 10.1109\/NAECON.1997.622727 , Page(s): 769 - 776 vol.2 (1997)","DOI":"10.1109\/NAECON.1997.622727"},{"key":"31_CR33","doi-asserted-by":"crossref","unstructured":"Sutton, R.S. and Barto, A.G.: Reinforcement Learning: An Introduction, IEEE Transactions on Neural Networks Volume: 9 , Issue: 5 Digital Object Identifier: 10.1109\/TNN.1998.712192, Page(s): 1054 - 1054 (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"31_CR34","doi-asserted-by":"crossref","unstructured":"Barto, A.G.: Reinforcement learning in the real world 2004. Proceedings. 2004 IEEE International Joint Conference on Neural Networks, Volume: 3 (2004)","DOI":"10.1109\/IJCNN.2004.1380847"},{"key":"31_CR35","unstructured":"Barto, A.G. and Dietterich, T.G.: Reinforcement Learning and Its Relationship to Supervised Learning In Si, J., Barto, A.G., Powell, W.B., and Wunsch, D., editors, Handbook of Learning and Approximate Dynamic Programming, pages 47 - 64. Wiley-IEEE Press, (2004)"},{"key":"31_CR36","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: and Williams, R.J.: Reinforcement learning is direct adaptive optimal control Control Systems Magazine, IEEE Volume: 12 , Issue: 2 Digital Object Identifier: 10.1109\/37.126844 Publication Year: 1992 , Page(s): 19 - 22","DOI":"10.1109\/37.126844"},{"key":"31_CR37","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L.P. Kaebling","year":"1996","unstructured":"Kaebling, L.P., Littman, M.L. and Moore A.W.: Reinforcement Learning: A Survey. Journal of Artificial Intelligence Research, Vol 4, pp 237 \u2013 285. (1996)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"31_CR38","volume-title":"Learning from Delayed Rewards","author":"C.J.C.H. Watkins","year":"1989","unstructured":"Watkins, C.J.C.H.: Learning from Delayed Rewards. PhD thesis, Cambridge University, Cambridge, England. (1989)."},{"key":"31_CR39","unstructured":"Rummery G.A and Niranjan M.: On-line Q-Learning using connectionist Systems. Technical Report CUED\/F-INFENG\/TR166, Cambridge University. (1994)"},{"issue":"1","key":"31_CR40","first-page":"9","volume":"3","author":"R. Sutton","year":"2000","unstructured":"Sutton, R.: Learning to predict by the methods of temporal differences. Machine Learning 3(1),pp 9\u201344. doi:10.1007\/BF00115009. (1988)","journal-title":"Machine Learning"},{"key":"31_CR41","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1002\/(SICI)1098-1063(2000)10:1<1::AID-HIPO1>3.0.CO;2-1","volume":"10","author":"D.J. Foster","year":"2000","unstructured":"Foster D.J., Morris, R.G.N. and Dayan, P.: A model of hippocampally dependent navigation, using the temporal learning rule. Hippocampus, Vol. 10, pp. 1-16, (2000)","journal-title":"Hippocampus"},{"key":"31_CR42","unstructured":"Humphrys, M.: Action Selection methods using Reinforcement Learning , PhD thesis, University of Cambridge, Computer Laboratory (1997)"},{"key":"31_CR43","first-page":"279","volume":"8","author":"C.J.C.H. Watkins","year":"1992","unstructured":"Watkins, C.J.C.H. and Dayan, P.: Technical Note: Q-Learning, Machine Learning 8:279-292. (1992)","journal-title":"Machine Learning"},{"key":"31_CR44","first-page":"1038","volume":"8","author":"R.S. Sutton","year":"1996","unstructured":"Sutton R.S.: Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding. Advances in Neural Processing Systems 8, pp 1038\u20131044. (1996)","journal-title":"Advances in Neural Processing Systems"}],"container-title":["Research and Development in Intelligent Systems XXVII"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-0-85729-130-1_31","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T19:57:27Z","timestamp":1740686247000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-0-85729-130-1_31"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,10,29]]},"ISBN":["9780857291295","9780857291301"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-0-85729-130-1_31","relation":{},"subject":[],"published":{"date-parts":[[2010,10,29]]},"assertion":[{"value":"29 October 2010","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}