{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T10:02:04Z","timestamp":1760608924570,"version":"3.40.3"},"publisher-location":"Cham","reference-count":13,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319317496"},{"type":"electronic","value":"9783319317502"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-31750-2_12","type":"book-chapter","created":{"date-parts":[[2016,4,14]],"date-time":"2016-04-14T11:27:08Z","timestamp":1460633228000},"page":"143-155","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Linear Upper Confidence Bound Algorithm for\u00a0Contextual Bandit Problem with Piled Rewards"],"prefix":"10.1007","author":[{"given":"Kuan-Hao","family":"Huang","sequence":"first","affiliation":[]},{"given":"Hsuan-Tien","family":"Lin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,4,12]]},"reference":[{"key":"12_CR1","unstructured":"Agarwal, A., Hsu, D., Kale, S., Langford, J., Li, L., Schapire, R.E.: Taming the monster: a fast and simple algorithm for contextual bandits. In: ICML, pp. 1638\u20131646 (2014)"},{"key":"12_CR2","first-page":"397","volume":"3","author":"P Auer","year":"2003","unstructured":"Auer, P.: Using confidence bounds for exploitation-exploration trade-offs. J. Mach. Learn. Res. 3, 397\u2013422 (2003)","journal-title":"J. Mach. Learn. Res."},{"issue":"2\u20133","key":"12_CR3","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Mach. Learn. 47(2\u20133), 235\u2013256 (2002)","journal-title":"Mach. Learn."},{"key":"12_CR4","unstructured":"Chou, K.C., Chiang, C.K., Lin, H.T., Lu, C.J.: Pseudo-reward algorithms for contextual bandits with linear payoff functions. In: ACML, pp. 344\u2013359 (2014)"},{"key":"12_CR5","unstructured":"Chu, W., Li, L., Reyzin, L., Schapire, R.E.: Contextual bandits with linear payoff functions. In: AISTATS, pp. 208\u2013214 (2011)"},{"key":"12_CR6","unstructured":"Dud\u00edk, M., Hsu, D., Kale, S., Karampatziakis, N., Langford, J., Reyzin, L., Zhang, T.: Efficient optimal learning for contextual bandits. In: UAI, pp. 169\u2013178 (2011)"},{"key":"12_CR7","unstructured":"Dud\u00edk, M., Langford, J., Li, L.: Doubly robust policy evaluation and learning. In: ICML, pp. 1097\u20131104 (2011)"},{"key":"12_CR8","unstructured":"Guha, S., Munagala, K., Pal, M.: Multiarmed bandit problems with delayed feedback, arxiv:1011.1161 (2010)"},{"key":"12_CR9","unstructured":"Joulani, P., Gy\u00f6rgy, A., Szepesv\u00e1ri, C.: Online learning under delayed feedback. In: ICML, pp. 1453\u20131461 (2013)"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Li, L., Chu, W., Langford, J., Schapire, R.E.: A contextual-bandit approach to personalized news article recommendation. In: WWW, pp. 661\u2013670 (2010)","DOI":"10.1145\/1772690.1772758"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Li, L., Chu, W., Langford, J., Wang, X.: Unbiased offline evaluation of contextual-bandit-based news article recommendation algorithms. In: WSDM, pp. 297\u2013306 (2011)","DOI":"10.1145\/1935826.1935878"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Mandel, T., Liu, Y.E., Brunskill, E., Popovic, Z.: The queue method: handling delay, heuristics, prior data, and evaluation in bandits. In: AAAI (2015)","DOI":"10.1609\/aaai.v29i1.9604"},{"issue":"3","key":"12_CR13","doi-asserted-by":"publisher","first-page":"338","DOI":"10.1109\/TAC.2005.844079","volume":"50","author":"CC Wang","year":"2005","unstructured":"Wang, C.C., Kulkarni, S.R., Poor, H.V.: Bandit problems with side observations. IEEE Trans. Autom. Control 50(3), 338\u2013355 (2005)","journal-title":"IEEE Trans. Autom. Control"}],"container-title":["Lecture Notes in Computer Science","Advances in Knowledge Discovery and Data Mining"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-31750-2_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,17]],"date-time":"2023-08-17T16:55:23Z","timestamp":1692291323000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-31750-2_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319317496","9783319317502"],"references-count":13,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-31750-2_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]},"assertion":[{"value":"12 April 2016","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}