{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,15]],"date-time":"2026-05-15T20:36:19Z","timestamp":1778877379628,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,8,24]],"date-time":"2021-08-24T00:00:00Z","timestamp":1629763200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100000266","name":"Engineering and Physical Sciences Research Council","doi-asserted-by":"publisher","award":["EP\/R026424\/1"],"award-info":[{"award-number":["EP\/R026424\/1"]}],"id":[{"id":"10.13039\/501100000266","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["H2020-832921 MIRROR"],"award-info":[{"award-number":["H2020-832921 MIRROR"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,8,24]]},"DOI":"10.1145\/3460426.3463630","type":"proceedings-article","created":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T22:50:28Z","timestamp":1630536628000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Combining Adversarial and Reinforcement Learning for Video Thumbnail Selection"],"prefix":"10.1145","author":[{"given":"Evlampios","family":"Apostolidis","sequence":"first","affiliation":[{"name":"CERTH-Information Technologies Institute &amp; Queen Mary University of London, Thessaloniki, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eleni","family":"Adamantidou","sequence":"additional","affiliation":[{"name":"CERTH-Information Technologies Institute, Thessaloniki, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vasileios","family":"Mezaris","sequence":"additional","affiliation":[{"name":"CERTH-Information Technologies Institute, Thessaloniki, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ioannis","family":"Patras","sequence":"additional","affiliation":[{"name":"Queen Mary University of London, London, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,9]]},"reference":[{"key":"#cr-split#-e_1_3_2_1_1_1.1","doi-asserted-by":"crossref","unstructured":"E. Apostolidis E. Adamantidou A. I. Metsai V. Mezaris and I. Patras. 2020. AC-SUM-GAN: Connecting Actor-Critic and Generative Adversarial Networks for Unsupervised Video Summarization. IEEE Transactions on Circuits and Systems for Video Technology (2020) 1--1. https:\/\/doi.org\/10.1109\/TCSVT.2020.3037883 10.1109\/TCSVT.2020.3037883","DOI":"10.1109\/TCSVT.2020.3037883"},{"key":"#cr-split#-e_1_3_2_1_1_1.2","doi-asserted-by":"crossref","unstructured":"E. Apostolidis E. Adamantidou A. I. Metsai V. Mezaris and I. Patras. 2020. AC-SUM-GAN: Connecting Actor-Critic and Generative Adversarial Networks for Unsupervised Video Summarization. IEEE Transactions on Circuits and Systems for Video Technology (2020) 1--1. https:\/\/doi.org\/10.1109\/TCSVT.2020.3037883","DOI":"10.1109\/TCSVT.2020.3037883"},{"key":"e_1_3_2_1_2_1","volume-title":"Proc. of the 28th ACM Int. Conf. on Multimedia (MM '20)","author":"Apostolidis E.","unstructured":"E. Apostolidis , E. Adamantidou , A. I. Metsai , V. Mezaris , and I. Patras . 2020. Performance over Random: A Robust Evaluation Protocol for Video Summarization Methods . In Proc. of the 28th ACM Int. Conf. on Multimedia (MM '20) . Association for Computing Machinery, New York, NY, USA, 1056--1064. https:\/\/doi.org\/10.1145\/3394171.3413632 10.1145\/3394171.3413632 E. Apostolidis, E. Adamantidou, A. I. Metsai, V. Mezaris, and I. Patras. 2020. Performance over Random: A Robust Evaluation Protocol for Video Summarization Methods. In Proc. of the 28th ACM Int. Conf. on Multimedia (MM '20). Association for Computing Machinery, New York, NY, USA, 1056--1064. https:\/\/doi.org\/10.1145\/3394171.3413632"},{"key":"e_1_3_2_1_3_1","volume-title":"Proc. of 25th Int. Conf. on MultiMedia Modeling (MMM","author":"Apostolidis K.","year":"2019","unstructured":"K. Apostolidis and V. Mezaris . 2019. Image Aesthetics Assessment Using Fully Convolutional Neural Networks . In Proc. of 25th Int. Conf. on MultiMedia Modeling (MMM 2019 ). Springer International Publishing, Cham, 361--373. K. Apostolidis and V. Mezaris. 2019. Image Aesthetics Assessment Using Fully Convolutional Neural Networks. In Proc. of 25th Int. Conf. on MultiMedia Modeling (MMM 2019). Springer International Publishing, Cham, 361--373."},{"key":"e_1_3_2_1_4_1","unstructured":"N. Arthurs and S. Birnbaum. 2017. Selecting Youtube Video Thumbnails via Convolutional Neural Networks. Technical Report. Stanford.  N. Arthurs and S. Birnbaum. 2017. Selecting Youtube Video Thumbnails via Convolutional Neural Networks. Technical Report. Stanford."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1010933404324"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1177\/24.1.1254907"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-015-2909-6"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2010.08.004"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2018.8486533"},{"key":"e_1_3_2_1_10_1","volume-title":"Proc. of the 19th Int. Conf. on Neural Information Processing Systems (NIPS'06)","author":"Harel J.","unstructured":"J. Harel , C. Koch , and P. Perona . 2006. Graph-Based Visual Saliency . In Proc. of the 19th Int. Conf. on Neural Information Processing Systems (NIPS'06) . MIT Press, Cambridge, MA, USA, 545--552. J. Harel, C. Koch, and P. Perona. 2006. Graph-Based Visual Saliency. In Proc. of the 19th Int. Conf. on Neural Information Processing Systems (NIPS'06). MIT Press, Cambridge, MA, USA, 545--552."},{"key":"e_1_3_2_1_11_1","volume-title":"Proc. of the 27th ACM Int. Conf. on Multimedia (MM '19)","author":"He X.","unstructured":"X. He , Y. Hua , T. Song , Z. Zhang , Z. Xue , R. Ma , N. Robertson , and H. Guan . 2019. Unsupervised Video Summarization with Attentive Conditional Generative Adversarial Networks . In Proc. of the 27th ACM Int. Conf. on Multimedia (MM '19) . ACM, New York, NY, USA, 2296--2304. https:\/\/doi.org\/10.1145\/3343031.3351056 10.1145\/3343031.3351056 X. He, Y. Hua, T. Song, Z. Zhang, Z. Xue, R. Ma, N. Robertson, and H. Guan. 2019. Unsupervised Video Summarization with Attentive Conditional Generative Adversarial Networks. In Proc. of the 27th ACM Int. Conf. on Multimedia (MM '19). ACM, New York, NY, USA, 2296--2304. https:\/\/doi.org\/10.1145\/3343031.3351056"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018537"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMT.2011.6002001"},{"key":"e_1_3_2_1_14_1","volume-title":"Proc. of the 2011 18th IEEE Int. Conf. on Image Processing (ICIP). 2449--2452","author":"Liu C.","year":"2011","unstructured":"C. Liu , Q. Huang , and S. Jiang . 2011. Query sensitive dynamic web video thumbnail generation . In Proc. of the 2011 18th IEEE Int. Conf. on Image Processing (ICIP). 2449--2452 . https:\/\/doi.org\/10.1109\/ICIP. 2011 .6116155 10.1109\/ICIP.2011.6116155 C. Liu, Q. Huang, and S. Jiang. 2011. Query sensitive dynamic web video thumbnail generation. In Proc. of the 2011 18th IEEE Int. Conf. on Image Processing (ICIP). 2449--2452. https:\/\/doi.org\/10.1109\/ICIP.2011.6116155"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.147"},{"key":"e_1_3_2_1_16_1","volume-title":"Proc. of the 15th ACM Int. Conf. on Multimedia (MM '07)","author":"Liu J.","unstructured":"J. Liu , B. Wang , M. Li , Z. Li , W. Ma , H. Lu , and S. Ma . 2007. Dual Cross-Media Relevance Model for Image Annotation . In Proc. of the 15th ACM Int. Conf. on Multimedia (MM '07) (MM '07). Association for Computing Machinery, New York, NY, USA, 605--614. https:\/\/doi.org\/10.1145\/1291233.1291380 10.1145\/1291233.1291380 J. Liu, B. Wang, M. Li, Z. Li, W. Ma, H. Lu, and S. Ma. 2007. Dual Cross-Media Relevance Model for Image Annotation. In Proc. of the 15th ACM Int. Conf. on Multimedia (MM '07) (MM '07). Association for Computing Machinery, New York, NY, USA, 605--614. https:\/\/doi.org\/10.1145\/1291233.1291380"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298994"},{"key":"e_1_3_2_1_18_1","volume-title":"Proc. of the 2017 IEEE\/CVF Conf. on Computer Vision and Pattern Recognition (CVPR). 2982--2991","author":"Mahasseni B.","unstructured":"B. Mahasseni , M. Lam , and S. Todorovic . 2017. Unsupervised Video Summarization with Adversarial LSTM Networks . In Proc. of the 2017 IEEE\/CVF Conf. on Computer Vision and Pattern Recognition (CVPR). 2982--2991 . B. Mahasseni, M. Lam, and S. Todorovic. 2017. Unsupervised Video Summarization with Adversarial LSTM Networks. In Proc. of the 2017 IEEE\/CVF Conf. on Computer Vision and Pattern Recognition (CVPR). 2982--2991."},{"key":"e_1_3_2_1_19_1","volume-title":"Proc. of the 2012 IEEE Conf. on Computer Vision and Pattern Recognition (CVPR). 2408--2415","author":"Murray N.","year":"2012","unstructured":"N. Murray , L. Marchesotti , and F. Perronnin . 2012. AVA: A large-scale database for aesthetic visual analysis . In Proc. of the 2012 IEEE Conf. on Computer Vision and Pattern Recognition (CVPR). 2408--2415 . https:\/\/doi.org\/10.1109\/CVPR. 2012 .6247954 10.1109\/CVPR.2012.6247954 N. Murray, L. Marchesotti, and F. Perronnin. 2012. AVA: A large-scale database for aesthetic visual analysis. In Proc. of the 2012 IEEE Conf. on Computer Vision and Pattern Recognition (CVPR). 2408--2415. https:\/\/doi.org\/10.1109\/CVPR.2012.6247954"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN48605.2020.9206951"},{"key":"e_1_3_2_1_21_1","volume-title":"Proc. of the 2020 IEEE Winter Conf. on Applications of Computer Vision (WACV). 3201--3210","author":"Ren J.","year":"2020","unstructured":"J. Ren , X. Shen , Z. Lin , and R. Mch . 2020. Best Frame Selection in a Short Video . In Proc. of the 2020 IEEE Winter Conf. on Applications of Computer Vision (WACV). 3201--3210 . https:\/\/doi.org\/10.1109\/WACV45572. 2020 .9093615 10.1109\/WACV45572.2020.9093615 J. Ren, X. Shen, Z. Lin, and R. Mch. 2020. Best Frame Selection in a Short Video. In Proc. of the 2020 IEEE Winter Conf. on Applications of Computer Vision (WACV). 3201--3210. https:\/\/doi.org\/10.1109\/WACV45572.2020.9093615"},{"key":"e_1_3_2_1_22_1","volume-title":"Proc. of the 3rd Int. Conf. on Learning Representations, ICLR 2015","author":"Simonyan K.","year":"2015","unstructured":"K. Simonyan and A. Zisserman . 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition . In Proc. of the 3rd Int. Conf. on Learning Representations, ICLR 2015 , San Diego, CA, USA, May 7--9 , 2015 , Y. Bengio and Y. LeCun (Eds.). http:\/\/arxiv.org\/abs\/1409.1556 K. Simonyan and A. Zisserman. 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition. In Proc. of the 3rd Int. Conf. on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Y. Bengio and Y. LeCun (Eds.). http:\/\/arxiv.org\/abs\/1409.1556"},{"key":"e_1_3_2_1_23_1","volume-title":"Proc. of the 25th ACM Int. on Conf. on Information and Knowledge Management (CIKM '16)","author":"Song Y.","unstructured":"Y. Song , M. Redi , J. Vallmitjana , and A. Jaimes . 2016. To Click or Not To Click: Automatic Selection of Beautiful Thumbnails from Videos . In Proc. of the 25th ACM Int. on Conf. on Information and Knowledge Management (CIKM '16) . Association for Computing Machinery, New York, NY, USA, 659--668. https:\/\/doi.org\/10.1145\/2983323.2983349 10.1145\/2983323.2983349 Y. Song, M. Redi, J. Vallmitjana, and A. Jaimes. 2016. To Click or Not To Click: Automatic Selection of Beautiful Thumbnails from Videos. In Proc. of the 25th ACM Int. on Conf. on Information and Knowledge Management (CIKM '16). Association for Computing Machinery, New York, NY, USA, 659--668. https:\/\/doi.org\/10.1145\/2983323.2983349"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_25_1","volume-title":"Proc. of the 2019 IEEE Conf. on Multimedia Information Processing and Retrieval (MIPR). 54--59","author":"Tsao C.","year":"2019","unstructured":"C. Tsao , J. Lou , and H. H. Chen . 2019. Thumbnail Image Selection for VOD Services . In Proc. of the 2019 IEEE Conf. on Multimedia Information Processing and Retrieval (MIPR). 54--59 . https:\/\/doi.org\/10.1109\/MIPR. 2019 .00018 10.1109\/MIPR.2019.00018 C. Tsao, J. Lou, and H. H. Chen. 2019. Thumbnail Image Selection for VOD Services. In Proc. of the 2019 IEEE Conf. on Multimedia Information Processing and Retrieval (MIPR). 54--59. https:\/\/doi.org\/10.1109\/MIPR.2019.00018"},{"key":"e_1_3_2_1_26_1","volume-title":"Proc. of the 25thACM Int. Conf. on Multimedia (MM' 17)","author":"Vasudevan A. B.","unstructured":"A. B. Vasudevan , M. Gygli , A. Volokitin , and L. Van Gool . 2017. Query-Adaptive Video Summarization via Quality-Aware Relevance Estimation . In Proc. of the 25thACM Int. Conf. on Multimedia (MM' 17) (MM '17). Association for Computing Machinery, New York, NY, USA, 582--590. https:\/\/doi.org\/10.1145\/3123266.3123297 10.1145\/3123266.3123297 A. B. Vasudevan, M. Gygli, A. Volokitin, and L. Van Gool. 2017. Query-Adaptive Video Summarization via Quality-Aware Relevance Estimation. In Proc. of the 25thACM Int. Conf. on Multimedia (MM' 17) (MM '17). Association for Computing Machinery, New York, NY, USA, 582--590. https:\/\/doi.org\/10.1145\/3123266.3123297"},{"key":"e_1_3_2_1_27_1","volume-title":"Proc. of the 2009 16th IEEE Int. Conf. on Image Processing (ICIP). 4333--4336","author":"Gao Y.","year":"2009","unstructured":"Y. Gao , T. Zhang , and J. Xiao . 2009. Thematic video thumbnail selection . In Proc. of the 2009 16th IEEE Int. Conf. on Image Processing (ICIP). 4333--4336 . https:\/\/doi.org\/10.1109\/ICIP. 2009 .5419128 10.1109\/ICIP.2009.5419128 Y. Gao, T. Zhang, and J. Xiao. 2009. Thematic video thumbnail selection. In Proc. of the 2009 16th IEEE Int. Conf. on Image Processing (ICIP). 4333--4336. https:\/\/doi.org\/10.1109\/ICIP.2009.5419128"},{"key":"e_1_3_2_1_28_1","unstructured":"Z. Yu and N. Shi. 2020. A Multi-modal Deep Learning Model for Video Thumbnail Selection. arXiv:2101.00073 [cs.CV]  Z. Yu and N. Shi. 2020. A Multi-modal Deep Learning Model for Video Thumbnail Selection. arXiv:2101.00073 [cs.CV]"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2959451"},{"key":"e_1_3_2_1_30_1","volume-title":"Proc. of the 27th ACM Int. Conf. on Multimedia (MM '19)","author":"Yuan Y.","unstructured":"Y. Yuan , L. Ma , and W. Zhu . 2019. Sentence Specified Dynamic Video Thumbnail Generation . In Proc. of the 27th ACM Int. Conf. on Multimedia (MM '19) . Association for Computing Machinery, New York, NY, USA, 2332--2340. https:\/\/doi.org\/10.1145\/3343031.3350985 10.1145\/3343031.3350985 Y. Yuan, L. Ma, and W. Zhu. 2019. Sentence Specified Dynamic Video Thumbnail Generation. In Proc. of the 27th ACM Int. Conf. on Multimedia (MM '19). Association for Computing Machinery, New York, NY, USA, 2332--2340. https:\/\/doi.org\/10.1145\/3343031.3350985"},{"key":"e_1_3_2_1_31_1","volume-title":"Proc. of the European Conf. on Computer Vision 2016 (ECCV), B. Leibe, J. Matas, N. Sebe, and M. Welling (Eds.). Springer International Publishing, Cham, 766--782","author":"Zhang K.","unstructured":"K. Zhang , W.-L. Chao , F. Sha , and K. Grauman . 2016. Video Summarization with Long Short-Term Memory . In Proc. of the European Conf. on Computer Vision 2016 (ECCV), B. Leibe, J. Matas, N. Sebe, and M. Welling (Eds.). Springer International Publishing, Cham, 766--782 . K. Zhang, W.-L. Chao, F. Sha, and K. Grauman. 2016. Video Summarization with Long Short-Term Memory. In Proc. of the European Conf. on Computer Vision 2016 (ECCV), B. Leibe, J. Matas, N. Sebe, and M. Welling (Eds.). Springer International Publishing, Cham, 766--782."},{"key":"e_1_3_2_1_32_1","volume-title":"Proc. of the 2012 Eighth Int. Conf. on Intelligent Information Hiding and Multimedia Signal Processing. 343--346","author":"Zhang W.","year":"2012","unstructured":"W. Zhang , C. Liu , Q. Huang , S. Jiang , and W. Gao . 2012. A Novel Framework for Web Video Thumbnail Generation . In Proc. of the 2012 Eighth Int. Conf. on Intelligent Information Hiding and Multimedia Signal Processing. 343--346 . https:\/\/doi.org\/10.1109\/IIH-MSP. 2012 .89 10.1109\/IIH-MSP.2012.89 W. Zhang, C. Liu, Q. Huang, S. Jiang, and W. Gao. 2012. A Novel Framework for Web Video Thumbnail Generation. In Proc. of the 2012 Eighth Int. Conf. on Intelligent Information Hiding and Multimedia Signal Processing. 343--346. https:\/\/doi.org\/10.1109\/IIH-MSP.2012.89"},{"key":"e_1_3_2_1_33_1","volume-title":"Proc. of the 2018 AAAI Conf. on Artificial Intelligence (AAAI","author":"Zhou K.","year":"2018","unstructured":"K. Zhou and Y. Qiao . 2018. Deep Reinforcement Learning for Unsupervised Video Summarization with Diversity-Representativeness Reward . In Proc. of the 2018 AAAI Conf. on Artificial Intelligence (AAAI 2018 ). K. Zhou and Y. Qiao. 2018. Deep Reinforcement Learning for Unsupervised Video Summarization with Diversity-Representativeness Reward. In Proc. of the 2018 AAAI Conf. on Artificial Intelligence (AAAI 2018)."}],"event":{"name":"ICMR '21: International Conference on Multimedia Retrieval","location":"Taipei Taiwan","acronym":"ICMR '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2021 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3460426.3463630","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3460426.3463630","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:17:03Z","timestamp":1750191423000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3460426.3463630"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,24]]},"references-count":34,"alternative-id":["10.1145\/3460426.3463630","10.1145\/3460426"],"URL":"https:\/\/doi.org\/10.1145\/3460426.3463630","relation":{},"subject":[],"published":{"date-parts":[[2021,8,24]]},"assertion":[{"value":"2021-09-01","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}