{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T20:13:48Z","timestamp":1774383228439,"version":"3.50.1"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319672342","type":"print"},{"value":"9783319672359","type":"electronic"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-67235-9_19","type":"book-chapter","created":{"date-parts":[[2017,9,25]],"date-time":"2017-09-25T23:43:34Z","timestamp":1506383014000},"page":"335-362","source":"Crossref","is-referenced-by-count":8,"title":["A Hidden Markov Restless Multi-armed Bandit Model for Playout Recommendation Systems"],"prefix":"10.1007","author":[{"given":"Rahul","family":"Meshram","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aditya","family":"Gopalan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"D.","family":"Manjunath","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,9,27]]},"reference":[{"key":"19_CR1","first-page":"3901","volume":"23","author":"S Agrawal","year":"2012","unstructured":"Agrawal, S., Goyal, N.: Analysis of Thompson sampling for the multi-armed bandit problem. JMLR Workshop Conf. Proc. 23, 3901\u20133926 (2012)","journal-title":"JMLR Workshop Conf. Proc."},{"issue":"2\u20133","key":"19_CR2","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Mach. Learn. 47(2\u20133), 235\u2013256 (2002)","journal-title":"Mach. Learn."},{"key":"19_CR3","unstructured":"Avrachenkov, K., Borkar, V.S.: Whittle index policy for crawling ephemeral content. Technical report, report no. 8702, INRIA (2015). https:\/\/hal.archives-ouvertes.fr\/"},{"key":"19_CR4","volume-title":"Dynamic Programming and Optimal Control","author":"DP Bertsekas","year":"1995","unstructured":"Bertsekas, D.P.: Dynamic Programming and Optimal Control, vol. 1, 1st edn. Athena Scientific, Belmont (1995)","edition":"1"},{"key":"19_CR5","volume-title":"Dynamic Programming and Optimal Control","author":"DP Bertsekas","year":"1995","unstructured":"Bertsekas, D.P.: Dynamic Programming and Optimal Control, vol. 2, 1st edn. Athena Scientific, Belmont (1995)","edition":"1"},{"issue":"1","key":"19_CR6","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2200000024","volume":"5","author":"S Bubeck","year":"2012","unstructured":"Bubeck, S., Bianchi, N.C.: Regret analysis of stochastic and non-stochastic multi-armed bandit problem. Found. Trends Mach. Learn. 5(1), 1\u2013122 (2012)","journal-title":"Found. Trends Mach. Learn."},{"issue":"5","key":"19_CR7","doi-asserted-by":"crossref","first-page":"2053","DOI":"10.1109\/TIT.2010.2044061","volume":"56","author":"E Candes","year":"2010","unstructured":"Candes, E., Tao, T.: The power of convex relaxation: near optimal matrix completion. IEEE Trans. Inf. Theory 56(5), 2053\u20132080 (2010)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"19_CR8","unstructured":"Caron, S., Kveton, B., Lelarge, M., Bhagat, S.: Leveraging side observations in stochastic bandits. Arxiv (2012)"},{"key":"19_CR9","unstructured":"Chapelle, O., Li, L.: An empirical evaluation of Thompson sampling. In: Proceedings of NIPS (2011)"},{"key":"19_CR10","doi-asserted-by":"crossref","DOI":"10.1002\/9780470980033","volume-title":"Multi-armed Bandit Allocation Indices","author":"J Gittins","year":"2011","unstructured":"Gittins, J., Glazebrook, K., Weber, R.: Multi-armed Bandit Allocation Indices, 2nd edn. Wiley, New York (2011)","edition":"2"},{"key":"19_CR11","unstructured":"Gopalan, A., Mannor, S.: Thompson sampling for learning parameterized Markov decision processes. In: Proceedings of COLT (2015)"},{"key":"19_CR12","unstructured":"Gopalan, A., Mannor, S., Mansour, Y.: Thompson sampling for complex online problems. In: Proceedings of ICML (2014)"},{"key":"19_CR13","doi-asserted-by":"crossref","unstructured":"Hariri, N., Mobasher, B., Burke, R.: Context-aware music recommendation based on latent topic sequential patterns. In: Proceedings of ACM RecSys (2012)","DOI":"10.1145\/2365952.2365979"},{"issue":"1","key":"19_CR14","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1016\/0196-8858(85)90002-8","volume":"6","author":"TL Lai","year":"1985","unstructured":"Lai, T.L., Robbins, H.: Asymptotically efficient adaptive allocation rules. Adv. Appl. Math. 6(1), 4\u201322 (1985)","journal-title":"Adv. Appl. Math."},{"key":"19_CR15","unstructured":"Langford, J., Zhang, T.: The epoch-greedy algorithm for contextual multi-armed bandits. In: Proceedings of NIPS (2007)"},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Li, L., Chu, W., Langford, J., Schapire, R.E.: A contextual-bandit approach to personalized news article recommendation. In: Proceedings of ACM WWW (2010)","DOI":"10.1145\/1772690.1772758"},{"issue":"3","key":"19_CR17","doi-asserted-by":"crossref","first-page":"1902","DOI":"10.1109\/TIT.2012.2230215","volume":"59","author":"H Liu","year":"2013","unstructured":"Liu, H., Liu, K., Zhao, Q.: Learning in a changing world: restless multiarmed bandit with unknown dynamics. IEEE Trans. Inf. Theory 59(3), 1902\u20131916 (2013)","journal-title":"IEEE Trans. Inf. Theory"},{"issue":"11","key":"19_CR18","first-page":"5557","volume":"56","author":"K Liu","year":"2010","unstructured":"Liu, K., Zhao, Q.: Indexability of restless bandit problems and optimality of Whittle index for dynamic multichannel access. IEEE Trans. Inf. Theory 56(11), 5557\u20135567 (2010)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"19_CR19","doi-asserted-by":"crossref","unstructured":"Meshram, R., Manjunath, D., Gopalan, A.: A restless bandit with no observable states for recommendation systems and communication link scheduling. In: Proceedings of IEEE CDC (2015)","DOI":"10.1109\/CDC.2015.7403456"},{"key":"19_CR20","doi-asserted-by":"crossref","unstructured":"Meshram, R., Gopalan, A., Manjunath, D.: Optimal recommendation to users that react: online learning for a class of POMDPs. In: Proceedings of IEEE CDC (2016)","DOI":"10.1109\/CDC.2016.7799381"},{"key":"19_CR21","doi-asserted-by":"crossref","unstructured":"Meshram, R., Gopalan, A., Manjunath, D.: Optimal recommendation to users that react: online learning for a class of POMDPs. Arxiv (2016)","DOI":"10.1109\/CDC.2016.7799381"},{"key":"19_CR22","unstructured":"Meshram, R., Manjunath, D., Gopalan, A.: On the whittle index for restless multi-armed hidden Markov bandits. Arxiv (2016)"},{"key":"19_CR23","doi-asserted-by":"crossref","unstructured":"Meshram, R., Gopalan, A., Manjunath, D.: Restless bandits that hide their hand and recommendation systems. In: Proceedings of IEEE COMSNETS (2017)","DOI":"10.1109\/COMSNETS.2017.7945378"},{"issue":"2","key":"19_CR24","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1287\/moor.24.2.293","volume":"24","author":"CH Papadimitriou","year":"1999","unstructured":"Papadimitriou, C.H., Tsitsiklis, J.H.: The complexity of optimal queueing network control. Math. Oper. Res. 24(2), 293\u2013305 (1999)","journal-title":"Math. Oper. Res."},{"issue":"9","key":"19_CR25","doi-asserted-by":"crossref","first-page":"587","DOI":"10.1287\/mnsc.17.9.587","volume":"17","author":"SM Ross","year":"1971","unstructured":"Ross, S.M.: Quality control under Markovian deterioration. Manag. Sci. 17(9), 587\u2013596 (1971)","journal-title":"Manag. Sci."},{"key":"19_CR26","volume-title":"Applied Probability Models with Optimization Applications","author":"SM Ross","year":"1993","unstructured":"Ross, S.M.: Applied Probability Models with Optimization Applications. Dover Publications, New York (1993)"},{"issue":"3\u20134","key":"19_CR27","doi-asserted-by":"crossref","first-page":"285","DOI":"10.1093\/biomet\/25.3-4.285","volume":"24","author":"WR Thompson","year":"1933","unstructured":"Thompson, W.R.: On the likelihood that one unknown probability exceeds another in view of the evidence of two samples. Biometrika 24(3\u20134), 285\u2013294 (1933)","journal-title":"Biometrika"},{"key":"19_CR28","volume-title":"Principles of Mathematical Analysis","author":"R Walter","year":"1976","unstructured":"Walter, R.: Principles of Mathematical Analysis, 3rd edn. McGraw-Hill Book Co., New York (1976)","edition":"3"},{"issue":"1","key":"19_CR29","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1017\/S0021900200040420","volume":"25","author":"P Whittle","year":"1988","unstructured":"Whittle, P.: Restless bandits: activity allocation in a changing world. J. Appl. Probab. 25(1), 287\u2013298 (1988)","journal-title":"J. Appl. Probab."}],"container-title":["Lecture Notes in Computer Science","Communication Systems and Networks"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-67235-9_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,3]],"date-time":"2019-10-03T19:37:51Z","timestamp":1570131471000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-67235-9_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319672342","9783319672359"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-67235-9_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017]]}}}