{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T15:39:21Z","timestamp":1781797161733,"version":"3.54.5"},"reference-count":40,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2018,4,1]],"date-time":"2018-04-01T00:00:00Z","timestamp":1522540800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"name":"ERC","award":["FSA 308267"],"award-info":[{"award-number":["FSA 308267"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Contr."],"published-print":{"date-parts":[[2018,4]]},"DOI":"10.1109\/tac.2017.2747409","type":"journal-article","created":{"date-parts":[[2017,8,30]],"date-time":"2017-08-30T18:49:08Z","timestamp":1504118948000},"page":"915-930","source":"Crossref","is-referenced-by-count":44,"title":["Stochastic Online Shortest Path Routing: The Value of Feedback"],"prefix":"10.1109","volume":"63","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1934-7421","authenticated-orcid":false,"given":"Mohammad Sadegh","family":"Talebi","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhenhua","family":"Zou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Richard","family":"Combes","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alexandre","family":"Proutiere","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2237-2580","authenticated-orcid":false,"given":"Mikael","family":"Johansson","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","article-title":"Explore first, exploit next: The true shape of regret in bandit problems","author":"garivier","year":"2016"},{"key":"ref38","first-page":"975","article-title":"Lipschitz bandits: Regret lower bounds and optimal algorithms","author":"magureanu","year":"0","journal-title":"Proc 27th Annu Conf Learn Theory"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1080\/02331930902730070"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-7152(98)00284-3"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012994275440"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ALLERTON.2010.5706896"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511779398"},{"key":"ref36","first-page":"1453","article-title":"Online learning under delayed feedback","author":"joulani","year":"0","journal-title":"Proc 30th Int Conf Mach Learn"},{"key":"ref35","first-page":"521","article-title":"Unimodal bandits: Regret lower bounds and optimal algorithms","author":"combes","year":"2014","journal-title":"Proc 31st Int Conf Mach Learn"},{"key":"ref34","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1007\/978-3-642-24412-4_16","article-title":"On upper-confidence bound policies for switching bandit problems","author":"garivier","year":"2011","journal-title":"Proceedings of Algorithm Learning Theory ALT-97"},{"key":"ref10","first-page":"151","article-title":"Combinatorial\n multi-armed bandit: General framework and applications","author":"chen","year":"0","journal-title":"Proc 30th Int Conf Mach Learn"},{"key":"ref40","first-page":"359","article-title":"The KL-UCB algorithm for bounded stochastic bandits and beyond","author":"garivier","year":"0","journal-title":"Proc 24th Annu Conf Learn Theory"},{"key":"ref11","first-page":"100","article-title":"Thompson\n sampling for complex online problems","author":"gopalan","year":"0","journal-title":"Proc 31st Int Conf Mach Learn"},{"key":"ref12","first-page":"535","article-title":"Tight regret bounds for stochastic\n combinatorial semi-bandits","author":"kveton","year":"0","journal-title":"Proc Int Conf Artif Intell Statist"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1090\/S0002-9904-1952-09620-8"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/0196-8858(85)90002-8"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1287\/moor.2013.0598"},{"key":"ref16","first-page":"234","article-title":"An efficient algorithm for learning with semi-bandit feedback","author":"neu","year":"0","journal-title":"Proc Int Conf Algorithmic Learn Theory"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2011.2181864"},{"key":"ref18","first-page":"2107","article-title":"Combinatorial bandits revisited","author":"combes","year":"0","journal-title":"Proc Adv Neur Inf Process Syst"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1987.1104491"},{"key":"ref28","author":"puterman","year":"2005","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming"},{"key":"ref4","first-page":"2369","article-title":"The on-line shortest path\n problem under partial monitoring","volume":"8","author":"gy\u00f6rgy","year":"2007","journal-title":"J Mach Learn Res"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1287\/moor.22.1.222"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1093\/comjnl\/bxh168"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2016.2525518"},{"key":"ref29","first-page":"1563","article-title":"Near-optimal\n regret bounds for reinforcement learning","volume":"11","author":"jaksch","year":"2010","journal-title":"J Mach Learn Res"},{"key":"ref5","first-page":"2202","article-title":"Endhost-based shortest path routing in dynamic\n networks","author":"he","year":"0","journal-title":"Proc 32nd IEEE Int Conf Comput Commun"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511800481"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2012.01.001"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/1007352.1007367"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2014.6859133"},{"key":"ref20","first-page":"420","article-title":"Matroid bandits: Fast combinatorial optimization with learning","author":"kveton","year":"0","journal-title":"Proc Conf Uncertainty of Artificial Intelligence"},{"key":"ref22","article-title":"Stochastic online shortest path routing: The value of feedback","author":"talebi","year":"2017"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/DYSPAN.2010.5457857"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.2307\/2332286"},{"key":"ref23","first-page":"1113","article-title":"Efficient learning in\n large-scale combinatorial semi-bandits","author":"wen","year":"0","journal-title":"Proceedings of the 32nd Intl Conf on Machine Learning"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638236"},{"key":"ref25","first-page":"232","article-title":"Adaptive shortest-path routing under unknown and stochastically varying link states","author":"liu","year":"0","journal-title":"Proc 3rd Int Symp Model Optim Mobile Ad Hoc Wireless Netw"}],"container-title":["IEEE Transactions on Automatic Control"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9\/8326634\/08022964.pdf?arnumber=8022964","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,26]],"date-time":"2022-01-26T07:26:27Z","timestamp":1643181987000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8022964\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4]]},"references-count":40,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tac.2017.2747409","relation":{},"ISSN":["0018-9286","1558-2523"],"issn-type":[{"value":"0018-9286","type":"print"},{"value":"1558-2523","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,4]]}}}