{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T07:03:44Z","timestamp":1751094224800,"version":"3.40.3"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319675039"},{"type":"electronic","value":"9783319675046"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-67504-6_2","type":"book-chapter","created":{"date-parts":[[2017,9,23]],"date-time":"2017-09-23T02:03:20Z","timestamp":1506132200000},"page":"18-34","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Interactive Thompson Sampling for\u00a0Multi-objective Multi-armed Bandits"],"prefix":"10.1007","author":[{"given":"Diederik M.","family":"Roijers","sequence":"first","affiliation":[]},{"given":"Luisa M.","family":"Zintgraf","sequence":"additional","affiliation":[]},{"given":"Ann","family":"Now\u00e9","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,9,24]]},"reference":[{"key":"2_CR1","unstructured":"Agrawal, S., Goyal, N.: Analysis of Thompson sampling for the multi-armed bandit problem. In: COLT, p. 39.1\u201339.26 (2012)"},{"issue":"2\u20133","key":"2_CR2","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Mach. Learn. 47(2\u20133), 235\u2013256 (2002)","journal-title":"Mach. Learn."},{"key":"2_CR3","unstructured":"Auer, P., Chiang, C.-K., Ortner, R., Drugan, M.M.: Pareto front identification from stochastic bandit feedback. In: AISTATS, pp. 939\u2013947 (2016)"},{"key":"2_CR4","unstructured":"Benabbou, N., Perny, P.: Combining preference elicitation and search in multiobjective state-space graphs. In: IJCAI, pp. 297\u2013303 (2015)"},{"key":"2_CR5","volume-title":"Pattern Recognition and Machine Learning","author":"CM Bishop","year":"2006","unstructured":"Bishop, C.M.: Pattern Recognition and Machine Learning. Springer, New York (2006)"},{"key":"2_CR6","unstructured":"Brochu, E., de Freitas, N., Ghosh, A.: Active preference learning with discrete choice data. In: NIPS, pp. 409\u2013416 (2008)"},{"key":"2_CR7","unstructured":"Chapelle, O., Li, L.: An empirical evaluation of Thompson sampling. In: NIPS, pp. 2249\u20132257 (2011)"},{"key":"2_CR8","volume-title":"An Introduction to Decision Analysis","author":"RT Clemen","year":"1997","unstructured":"Clemen, R.T., Decisions, M.H.: An Introduction to Decision Analysis. PWS-Kent, Boston (1997)"},{"key":"2_CR9","doi-asserted-by":"crossref","unstructured":"Drugan, M.M., Now\u00e9, A.: Designing multi-objective multi-armed bandits algorithms: a study. In: IJCNN, pp. 1\u20138. IEEE (2013)","DOI":"10.1109\/IJCNN.2013.6707036"},{"key":"2_CR10","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1007\/978-3-319-67504-6_14","volume-title":"Algorithmic Decision Theory","author":"A Igarashi","year":"2017","unstructured":"Igarashi, A., Roijers, D.M.: Multi-criteria coalition formation games. In: Rothe, J. (ed.) ADT 2017. LNAI, vol. 10576, pp. 197\u2013213. Springer, Cham (2017)"},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"Libin, P., Verstraeten, T., Theys, K., Roijers, D.M., Vrancx, P., Now\u00e9, A.: Efficient evaluation of influenza mitigation strategies using preventive bandits. In: ALA, 9 p. (2017)","DOI":"10.1007\/978-3-319-71679-4_5"},{"key":"2_CR12","unstructured":"Mannion, P., Duggan, J., Howley, E.: A theoretical and empirical analysis of reward transformations in multi-objective stochastic games. In: AAMAS, pp. 1625\u20131627 (2017)"},{"key":"2_CR13","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"DM Roijers","year":"2013","unstructured":"Roijers, D.M., Vamplew, P., Whiteson, S., Dazeley, R.: A survey of multi-objective sequential decision-making. JAIR 48, 67\u2013113 (2013)","journal-title":"JAIR"},{"issue":"1","key":"2_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.2200\/S00765ED1V01Y201704AIM034","volume":"11","author":"DM Roijers","year":"2017","unstructured":"Roijers, D.M., Whiteson, S.: Multi-objective decision making. Synth. Lect. Artif. Intell. Mach. Learn. 11(1), 1\u2013129 (2017)","journal-title":"Synth. Lect. Artif. Intell. Mach. Learn."},{"key":"2_CR15","unstructured":"Tesauro, G.: Connectionist learning of expert preferences by comparison training. In: NIPS, vol. 1, pp. 99\u2013106 (1988)"},{"issue":"3\/4","key":"2_CR16","doi-asserted-by":"publisher","first-page":"285","DOI":"10.2307\/2332286","volume":"25","author":"WR Thompson","year":"1933","unstructured":"Thompson, W.R.: On the likelihood that one unknown probability exceeds another in view of the evidence of two samples. Biometrika 25(3\/4), 285\u2013294 (1933)","journal-title":"Biometrika"},{"issue":"1","key":"2_CR17","first-page":"3483","volume":"15","author":"K Van Moffaert","year":"2014","unstructured":"Van Moffaert, K., Now\u00e9, A.: Multi-objective reinforcement learning using sets of Pareto dominating policies. JMLR 15(1), 3483\u20133512 (2014)","journal-title":"JMLR"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Van Moffaert, K., Van Vaerenbergh, K., Vrancx, P., Now\u00e9, A.: Multi-objective $$\\chi $$-armed bandits. In: IJCNN, pp. 2331\u20132338 (2014)","DOI":"10.1109\/IJCNN.2014.6889753"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Wiering, M.A., Withagen, M., Drugan, M.M.: Model-based multi-objective reinforcement learning. In: ADPRL, pp. 1\u20136 (2014)","DOI":"10.1109\/ADPRL.2014.7010622"},{"key":"2_CR20","unstructured":"Wilson, N., Razak, A., Marinescu, R.: Computing possibly optimal solutions for multi-objective constraint optimisation with tradeoffs. In: IJCAI, pp. 815\u2013822 (2015)"},{"key":"2_CR21","unstructured":"Wu, H., Liu, X.: Double Thompson sampling for dueling bandits. In: NIPS, pp. 649\u2013657 (2016)"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Yahyaa, S.Q., Drugan, M.M., Manderick, B.: Thompson sampling in the adaptive linear scalarized multi objective multi armed bandit. In: ICAART, pp. 55\u201365 (2015)","DOI":"10.5220\/0005184400550065"},{"key":"2_CR23","unstructured":"Zoghi, M., Whiteson, S., Munos, R., De Rijke, M.: Relative upper confidence bound for the k-armed dueling bandit problem. In: ICML, pp. 10\u201318 (2014)"}],"container-title":["Lecture Notes in Computer Science","Algorithmic Decision Theory"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-67504-6_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,12]],"date-time":"2020-11-12T19:04:53Z","timestamp":1605207893000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-67504-6_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319675039","9783319675046"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-67504-6_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"24 September 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ADT","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithmic Decision Theory","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Luxembourg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Luxembourg","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 October 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aldt2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/sma.uni.lu\/adt2017\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}