{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T18:27:12Z","timestamp":1769192832626,"version":"3.49.0"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2014,6,25]],"date-time":"2014-06-25T00:00:00Z","timestamp":1403654400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Wireless Netw"],"published-print":{"date-parts":[[2014,11]]},"DOI":"10.1007\/s11276-014-0762-6","type":"journal-article","created":{"date-parts":[[2014,6,24]],"date-time":"2014-06-24T11:46:11Z","timestamp":1403610371000},"page":"2589-2604","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Two timescale convergent Q-learning for sleep-scheduling in wireless sensor networks"],"prefix":"10.1007","volume":"20","author":[{"given":"L. A.","family":"Prashanth","sequence":"first","affiliation":[]},{"given":"Abhranil","family":"Chatterjee","sequence":"additional","affiliation":[]},{"given":"Shalabh","family":"Bhatnagar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,6,25]]},"reference":[{"issue":"3","key":"762_CR1","doi-asserted-by":"crossref","first-page":"681","DOI":"10.1137\/S0363012999361974","volume":"40","author":"J Abounadi","year":"2002","unstructured":"Abounadi, J., Bertsekas, D., & Borkar, V. (2002). Learning algorithms for Markov decision processes with average cost. SIAM Journal on Control and Optimization, 40(3), 681\u2013698.","journal-title":"SIAM Journal on Control and Optimization"},{"key":"762_CR2","doi-asserted-by":"crossref","unstructured":"Baird, L. (1995). Residual algorithms: Reinforcement learning with function approximation. In: ICML, pp 30\u201337.","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"762_CR3","unstructured":"Beccuti, M., Codetta-Raiteri, D., & Franceschinis, G. (2009). Multiple abstraction levels in performance analysis of wsn monitoring systems. In: International ICST conference on performance evaluation methodologies and tools, p. 73."},{"key":"762_CR4","volume-title":"Dynamic programming and optimal control","author":"DP Bertsekas","year":"2007","unstructured":"Bertsekas, D. P. (2007). Dynamic programming and optimal control (3rd ed., Vol. II). Belmont: Athena Scientific.","edition":"3"},{"key":"762_CR5","volume-title":"Neuro-dynamic programming","author":"DP Bertsekas","year":"1996","unstructured":"Bertsekas, D. P., & Tsitsiklis, J. N. (1996). Neuro-dynamic programming. Belmont: Athena Scientific."},{"key":"762_CR6","unstructured":"Bhatnagar, S., & Lakshmanan, K. (2012). A new Q-learning algorithm with linear function approximation. Technical report SSL, IISc, URL http:\/\/stochastic.csa.iisc.ernet.in\/www\/research\/files\/IISc-CSA-SSL-TR-2012-3.pdf ."},{"issue":"2","key":"762_CR7","doi-asserted-by":"crossref","first-page":"180","DOI":"10.1145\/858481.858486","volume":"13","author":"S Bhatnagar","year":"2003","unstructured":"Bhatnagar, S., Fu, M., Marcus, S., & Wang, I. (2003). Two-timescale simultaneous perturbation stochastic approximation using deterministic perturbation sequences. ACM Transactions on Modeling and Computer Simulation (TOMACS), 13(2), 180\u2013209.","journal-title":"ACM Transactions on Modeling and Computer Simulation (TOMACS)"},{"issue":"11","key":"762_CR8","doi-asserted-by":"crossref","first-page":"2471","DOI":"10.1016\/j.automatica.2009.07.008","volume":"45","author":"S Bhatnagar","year":"2009","unstructured":"Bhatnagar, S., Sutton, R. S., Ghavamzadeh, M., & Lee, M. (2009). Natural actor-critic algorithms. Automatica, 45(11), 2471\u20132482.","journal-title":"Automatica"},{"key":"762_CR9","doi-asserted-by":"crossref","unstructured":"Bhatnagar, S., Prasad, H., & Prashanth, L. (2013). Stochastic recursive algorithms for optimization (Vol. 434). New York: Springer.","DOI":"10.1007\/978-1-4471-4285-0"},{"key":"762_CR10","doi-asserted-by":"crossref","unstructured":"Borkar, V. (2008). Stochastic approximation: A dynamical systems viewpoint. Cambridge: Cambridge University Press.","DOI":"10.1007\/978-93-86279-38-5"},{"issue":"3","key":"762_CR11","doi-asserted-by":"crossref","first-page":"1677","DOI":"10.1109\/TIT.2011.2178150","volume":"58","author":"Y Cui","year":"2012","unstructured":"Cui, Y., Lau, V. K., Wang, R., Huang, H., & Zhang, S. (2012a). A survey on delay-aware resource control for wireless systemsLarge deviation theory, stochastic lyapunov drift, and distributed stochastic learning. IEEE Transactions on Information Theory, 58(3), 1677\u20131701.","journal-title":"IEEE Transactions on Information Theory"},{"issue":"7","key":"762_CR12","doi-asserted-by":"crossref","first-page":"3786","DOI":"10.1109\/TSP.2012.2194291","volume":"60","author":"Y Cui","year":"2012","unstructured":"Cui, Y., Lau, V. K., & Wu, Y. (2012b). Delay-aware BS discontinuous transmission control and user scheduling for energy harvesting downlink coordinated MIMO systems. IEEE Transactions on Signal Processing, 60(7), 3786\u20133795.","journal-title":"IEEE Transactions on Signal Processing"},{"issue":"4","key":"762_CR13","doi-asserted-by":"crossref","first-page":"1904","DOI":"10.1109\/TVT.2008.2002917","volume":"58","author":"F Fu","year":"2009","unstructured":"Fu, F., & van der Schaar, M. (2009). Learning to compete for resources in wireless stochastic games. IEEE Transactions on Vehicular Technology, 58(4), 1904\u20131919.","journal-title":"IEEE Transactions on Vehicular Technology"},{"issue":"5","key":"762_CR14","doi-asserted-by":"crossref","first-page":"2091","DOI":"10.1109\/TSP.2007.912265","volume":"56","author":"J Fuemmeler","year":"2008","unstructured":"Fuemmeler, J., & Veeravalli, V. (2008). Smart sleeping policies for energy efficient tracking in sensor networks. IEEE Transactions on Signal Processing, 56(5), 2091\u20132101.","journal-title":"IEEE Transactions on Signal Processing"},{"issue":"9","key":"762_CR15","doi-asserted-by":"crossref","first-page":"4354","DOI":"10.1109\/TSP.2011.2159496","volume":"59","author":"J Fuemmeler","year":"2011","unstructured":"Fuemmeler, J., Atia, G., & Veeravalli, V. (2011). Sleep control for tracking in sensor networks. IEEE Transactions on Signal Processing, 59(9), 4354\u20134366.","journal-title":"IEEE Transactions on Signal Processing"},{"key":"762_CR16","doi-asserted-by":"crossref","unstructured":"Gui, C., & Mohapatra, P. (2004). Power conservation and quality of surveillance in target tracking sensor networks. In: Proceedings of the international conference on mobile computing and networking, pp. 129\u2013143.","DOI":"10.1145\/1023720.1023734"},{"key":"762_CR17","doi-asserted-by":"crossref","unstructured":"Jiang, B., Han, K., Ravindran, B., & Cho, H. (2008). Energy efficient sleep scheduling based on moving directions in target tracking sensor network. In: IEEE international symposium on parallel and distributed processing, pp. 1\u201310.","DOI":"10.1109\/IPDPS.2008.4536330"},{"key":"762_CR18","unstructured":"Jianlin, M., Fenghong, X., & Hua, L. (2009). RL-based superframe order adaptation algorithm for IEEE 802.15.4 networks. In: Chinese control and decision conference, IEEE, pp. 4708\u20134711."},{"key":"762_CR19","doi-asserted-by":"crossref","first-page":"200","DOI":"10.1007\/11890348_16","volume":"4239","author":"Lu Jin Gy","year":"2006","unstructured":"Jin Gy, Lu, & Xy, Park M. S. (2006). Dynamic clustering for object tracking in wireless sensor networks. Ubiquitous Computing Systems, 4239, 200\u2013209.","journal-title":"Ubiquitous Computing Systems"},{"key":"762_CR20","doi-asserted-by":"crossref","unstructured":"Khan, M. I., & Rinner, B. (2012). Resource coordination in wireless sensor networks by cooperative reinforcement learning. In: IEEE international conference on pervasive computing and communications workshop, pp. 895\u2013900.","DOI":"10.1109\/PerComW.2012.6197639"},{"key":"762_CR21","doi-asserted-by":"crossref","unstructured":"Konda, V. R., & Tsitsiklis, J. N. (2004) Convergence rate of linear two-time-scale stochastic approximation. Annals of applied probability, pp. 796\u2013819.","DOI":"10.1214\/105051604000000116"},{"key":"762_CR22","first-page":"768","volume-title":"IEEE International Conference on Networking","author":"Z Liu","year":"2006","unstructured":"Liu, Z., & Elhanany, I. (2006). RL-MAC: A QoS-aware reinforcement learning based MAC protocol for wireless sensor networks. IEEE International Conference on Networking (pp. 768\u2013773). IEEE: Sensing and Control."},{"key":"762_CR23","doi-asserted-by":"crossref","unstructured":"Niu, J. (2010) Self-learning scheduling approach for wireless sensor network. In: International conference on future computer and communication (ICFCC), IEEE, Vol. 3, pp. 253\u2013257.","DOI":"10.1109\/ICFCC.2010.5497643"},{"key":"762_CR24","unstructured":"Prashanth, L., Chatterjee, A., & Bhatnagar, S. (2014). Adaptive sleep-wake control using reinforcement learning in sensor networks. In: 6th international conference on communication systems and networks (COMSNETS), IEEE."},{"key":"762_CR25","doi-asserted-by":"crossref","unstructured":"Prashanth, L. A., & Bhatnagar, S. (2011a). Reinforcement learning with average cost for adaptive control of traffic lights at intersections. In: 14th International IEEE conference on intelligent transportation systems (ITSC), pp. 1640\u20131645.","DOI":"10.1109\/ITSC.2011.6082823"},{"issue":"2","key":"762_CR26","doi-asserted-by":"crossref","first-page":"412","DOI":"10.1109\/TITS.2010.2091408","volume":"12","author":"LA Prashanth","year":"2011","unstructured":"Prashanth, L. A., & Bhatnagar, S. (2011b). Reinforcement learning with function approximation for traffic signal control. IEEE Transactions on Intelligent Transportation Systems, 12(2), 412\u2013421.","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"key":"762_CR27","doi-asserted-by":"crossref","DOI":"10.1109\/INFOCOM.2008.198","volume-title":"Optimal sleep-wake scheduling for quickest intrusion detection using sensor networks","author":"K Premkumar","year":"2008","unstructured":"Premkumar, K., & Kumar, A. (2008). Optimal sleep-wake scheduling for quickest intrusion detection using sensor networks. Arizona, USA: IEEE INFOCOM."},{"key":"762_CR28","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov decision processes: Discrete stochastic dynamic programming","author":"M Puterman","year":"1994","unstructured":"Puterman, M. (1994). Markov decision processes: Discrete stochastic dynamic programming. New York: Wiley."},{"key":"762_CR29","doi-asserted-by":"crossref","unstructured":"Rucco, L., Bonarini, A., Brandolese, C., & Fornaciari, W. (2013). A bird\u2019s eye view on reinforcement learning approaches for power management in WSNs. In: Wireless and mobile networking conference (WMNC), IEEE, pp. 1\u20138.","DOI":"10.1109\/WMNC.2013.6548988"},{"issue":"3","key":"762_CR30","doi-asserted-by":"crossref","first-page":"332","DOI":"10.1109\/9.119632","volume":"37","author":"JC Spall","year":"1992","unstructured":"Spall, J. C. (1992). Multivariate stochastic approximation using a simultaneous perturbation gradient approximation. IEEE Transactions on Automatic Control, 37(3), 332\u2013341.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"762_CR31","volume-title":"Reinforcement learning: An introduction","author":"R Sutton","year":"1998","unstructured":"Sutton, R., & Barto, A. (1998). Reinforcement learning: An introduction. Cambridge: Cambridge University Press."},{"key":"762_CR32","doi-asserted-by":"crossref","unstructured":"Tsitsiklis, J. N., & Van Roy, B. (1997). An Analysis of Temporal Difference Learning with Function Approximation. IEEE Transactions on Automatic Control, 42(5), 674\u2013690.","DOI":"10.1109\/9.580874"},{"issue":"3","key":"762_CR33","first-page":"279","volume":"8","author":"C Watkins","year":"1992","unstructured":"Watkins, C., & Dayan, P. (1992). Machine learning. Q-learning, 8(3), 279\u2013292.","journal-title":"Q-learning"}],"container-title":["Wireless Networks"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11276-014-0762-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11276-014-0762-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11276-014-0762-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,21]],"date-time":"2020-08-21T08:37:45Z","timestamp":1597999065000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11276-014-0762-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,6,25]]},"references-count":33,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2014,11]]}},"alternative-id":["762"],"URL":"https:\/\/doi.org\/10.1007\/s11276-014-0762-6","relation":{},"ISSN":["1022-0038","1572-8196"],"issn-type":[{"value":"1022-0038","type":"print"},{"value":"1572-8196","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,6,25]]}}}