{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T16:24:04Z","timestamp":1778084644125,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,10,15]],"date-time":"2019-10-15T00:00:00Z","timestamp":1571097600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100012659","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61732010, 61872234"],"award-info":[{"award-number":["61732010, 61872234"]}],"id":[{"id":"10.13039\/501100012659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014717","name":"National Outstanding Youth Science Fund Project of National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61525204"],"award-info":[{"award-number":["61525204"]}],"id":[{"id":"10.13039\/100014717","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,10,15]]},"DOI":"10.1145\/3343031.3351056","type":"proceedings-article","created":{"date-parts":[[2019,10,21]],"date-time":"2019-10-21T16:32:26Z","timestamp":1571675546000},"page":"2296-2304","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":77,"title":["Unsupervised Video Summarization with Attentive Conditional Generative Adversarial Networks"],"prefix":"10.1145","author":[{"given":"Xufeng","family":"He","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Hua","sequence":"additional","affiliation":[{"name":"Queen's University Belfast, Belfast, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Song","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zongpu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengui","family":"Xue","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruhui","family":"Ma","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Neil","family":"Robertson","sequence":"additional","affiliation":[{"name":"Queen's University Belfast, Belfast, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haibing","family":"Guan","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2019,10,15]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Benoit Steiner, Paul A. Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng.","author":"Abadi Mart\u00edn","year":"2016"},{"key":"e_1_3_2_1_2_1","volume-title":"Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473","author":"Bahdanau Dzmitry","year":"2014"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2010.08.004"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Jia Deng Wei Dong Richard Socher Li-Jia Li Kai Li and Fei-Fei Li. 2009. ImageNet: A large-scale hierarchical image database. In CVPR.  Jia Deng Wei Dong Richard Socher Li-Jia Li Kai Li and Fei-Fei Li. 2009. ImageNet: A large-scale hierarchical image database. In CVPR.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.image.2012.10.002"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Litong Feng Ziyin Li Zhanghui Kuang and Wei Zhang. 2018. Extractive Video Summarizer with Memory Augmented Neural Networks. In MM.  Litong Feng Ziyin Li Zhanghui Kuang and Wei Zhang. 2018. Extractive Video Summarizer with Memory Augmented Neural Networks. In MM.","DOI":"10.1145\/3240508.3240651"},{"key":"e_1_3_2_1_7_1","unstructured":"Boqing Gong Wei-Lun Chao Kristen Grauman and Fei Sha. 2014. Diverse Sequential Subset Selection for Supervised Video Summarization. In NIPS.  Boqing Gong Wei-Lun Chao Kristen Grauman and Fei Sha. 2014. Diverse Sequential Subset Selection for Supervised Video Summarization. In NIPS."},{"key":"e_1_3_2_1_8_1","volume-title":"Sherjil Ozair, Aaron C. Courville, and Yoshua Bengio.","author":"Goodfellow Ian J.","year":"2014"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.06.042"},{"key":"e_1_3_2_1_10_1","volume-title":"Danilo Jimenez Rezende, and Daan Wierstra","author":"Gregor Karol","year":"2015"},{"key":"e_1_3_2_1_11_1","volume-title":"Van Gool","author":"Gygli Michael","year":"2015"},{"key":"e_1_3_2_1_12_1","volume-title":"Van Gool","author":"Gygli Michael","year":"2014"},{"key":"e_1_3_2_1_13_1","volume-title":"Efros","author":"Isola Phillip","year":"2017"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2019.2904996"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2766954"},{"key":"e_1_3_2_1_16_1","unstructured":"Hong-Wen Kang Yasuyuki Matsushita Xiaoou Tang and Xue-Quan Chen. 2006. Space-Time Video Montage. In CVPR.  Hong-Wen Kang Yasuyuki Matsushita Xiaoou Tang and Xue-Quan Chen. 2006. Space-Time Video Montage. In CVPR."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Aditya Khosla Raffay Hamid Chih-Jen Lin and Neel Sundaresan. 2013. Large- Scale Video Summarization Using Web-Image Priors. In CVPR.  Aditya Khosla Raffay Hamid Chih-Jen Lin and Neel Sundaresan. 2013. Large- Scale Video Summarization Using Web-Image Priors. In CVPR.","DOI":"10.1109\/CVPR.2013.348"},{"key":"e_1_3_2_1_18_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014"},{"key":"e_1_3_2_1_19_1","volume-title":"Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114","author":"Kingma Diederik P","year":"2013"},{"key":"e_1_3_2_1_20_1","volume-title":"The gan landscape: Losses, architectures, regularization, and normalization. arXiv preprint arXiv:1807.04720","author":"Kurach Karol","year":"2018"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Christian Ledig Lucas Theis Ferenc Huszar Jose Caballero Andrew Cunningham Alejandro Acosta Andrew P. Aitken Alykhan Tejani Johannes Totz Zehan Wang and Wenzhe Shi. 2017. Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network. In CVPR.  Christian Ledig Lucas Theis Ferenc Huszar Jose Caballero Andrew Cunningham Alejandro Acosta Andrew P. Aitken Alykhan Tejani Johannes Totz Zehan Wang and Wenzhe Shi. 2017. Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network. In CVPR.","DOI":"10.1109\/CVPR.2017.19"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Yong Jae Lee Joydeep Ghosh and Kristen Grauman. 2012. Discovering important people and objects for egocentric video summarization. In CVPR.  Yong Jae Lee Joydeep Ghosh and Kristen Grauman. 2012. Discovering important people and objects for egocentric video summarization. In CVPR.","DOI":"10.1109\/CVPR.2012.6247820"},{"key":"e_1_3_2_1_23_1","unstructured":"Yingbo Li and Bernard M\u00e9rialdo. 2010. Multi-video summarization based on Video-MMR. In WIAMIS.  Yingbo Li and Bernard M\u00e9rialdo. 2010. Multi-video summarization based on Video-MMR. In WIAMIS."},{"key":"e_1_3_2_1_24_1","unstructured":"Yandong Li Liqiang Wang Tianbao Yang and Boqing Gong. 2018. How Local Is the Local Diversity? Reinforcing Sequential Determinantal Point Processes with Dynamic Ground Sets for Supervised Video Summarization. In ECCV.  Yandong Li Liqiang Wang Tianbao Yang and Boqing Gong. 2018. How Local Is the Local Diversity? Reinforcing Sequential Determinantal Point Processes with Dynamic Ground Sets for Supervised Video Summarization. In ECCV."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Behrooz Mahasseni Michael Lam and Sinisa Todorovic. 2017. Unsupervised Video Summarization with Adversarial LSTM Networks. In CVPR.  Behrooz Mahasseni Michael Lam and Sinisa Todorovic. 2017. Unsupervised Video Summarization with Adversarial LSTM Networks. In CVPR.","DOI":"10.1109\/CVPR.2017.318"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Mateusz Malinowski Carl Doersch Adam Santoro and Peter Battaglia. 2018. Learning Visual Question Answering by Bootstrapping Hard Attention. In ECCV.  Mateusz Malinowski Carl Doersch Adam Santoro and Peter Battaglia. 2018. Learning Visual Question Answering by Bootstrapping Hard Attention. In ECCV.","DOI":"10.1007\/978-3-030-01231-1_1"},{"key":"e_1_3_2_1_27_1","unstructured":"Sophie Marat Mickael Guironnet and Denis Pellerin. 2007. Video summarization using a visual attention model. In EUSIPCO.  Sophie Marat Mickael Guironnet and Denis Pellerin. 2007. Video summarization using a visual attention model. In EUSIPCO."},{"key":"e_1_3_2_1_28_1","volume-title":"Deep multi-scale video prediction beyond mean square error. arXiv preprint arXiv:1511.05440","author":"Mathieu Michael","year":"2015"},{"key":"e_1_3_2_1_29_1","volume-title":"Conditional Generative Adversarial Nets. CoRR abs\/1411.1784","author":"Mirza Mehdi","year":"2014"},{"key":"e_1_3_2_1_30_1","volume-title":"Spectral normalization for generative adversarial networks. arXiv preprint arXiv:1802.05957","author":"Miyato Takeru","year":"2018"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Yair Poleg Tavi Halperin Chetan Arora and Shmuel Peleg. 2015. EgoSampling: Fast-forward and stereo for egocentric videos. In CVPR.  Yair Poleg Tavi Halperin Chetan Arora and Shmuel Peleg. 2015. EgoSampling: Fast-forward and stereo for egocentric videos. In CVPR.","DOI":"10.1109\/CVPR.2015.7299109"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Danila Potapov Matthijs Douze Za\u00efd Harchaoui and Cordelia Schmid. 2014. Category-Specific Video Summarization. In ECCV.  Danila Potapov Matthijs Douze Za\u00efd Harchaoui and Cordelia Schmid. 2014. Category-Specific Video Summarization. In ECCV.","DOI":"10.1007\/978-3-319-10599-4_35"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.29"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Mrigank Rochan Linwei Ye and Yang Wang. 2018. Video Summarization Using Fully Convolutional Sequence Networks. In ECCV.  Mrigank Rochan Linwei Ye and Yang Wang. 2018. Video Summarization Using Fully Convolutional Sequence Networks. In ECCV.","DOI":"10.1007\/978-3-030-01258-8_22"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Yale Song Jordi Vallmitjana Amanda Stent and Alejandro Jaimes. 2015. TVSum: Summarizing web videos using titles. In CVPR.  Yale Song Jordi Vallmitjana Amanda Stent and Alejandro Jaimes. 2015. TVSum: Summarizing web videos using titles. In CVPR.","DOI":"10.1109\/CVPR.2015.7299154"},{"key":"e_1_3_2_1_36_1","volume-title":"Seitz","author":"Sun Min","year":"2014"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Christian Szegedy Wei Liu Yangqing Jia Pierre Sermanet Scott E. Reed Dragomir Anguelov Dumitru Erhan Vincent Vanhoucke and Andrew Rabinovich. 2015. Going deeper with convolutions. In CVPR.  Christian Szegedy Wei Liu Yangqing Jia Pierre Sermanet Scott E. Reed Dragomir Anguelov Dumitru Erhan Vincent Vanhoucke and Andrew Rabinovich. 2015. Going deeper with convolutions. In CVPR.","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Arun Balajee Vasudevan Michael Gygli Anna Volokitin and Luc Van Gool. 2017. Query-adaptive Video Summarization via Quality-aware Relevance Estimation. In MM.  Arun Balajee Vasudevan Michael Gygli Anna Volokitin and Luc Van Gool. 2017. Query-adaptive Video Summarization via Quality-aware Relevance Estimation. In MM.","DOI":"10.1145\/3123266.3123297"},{"key":"e_1_3_2_1_39_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention is All you Need. In NIPS.  Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention is All you Need. In NIPS."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"XiaolongWang Ross Girshick Abhinav Gupta and Kaiming He. 2018. Non-local Neural Networks. In CVPR.  XiaolongWang Ross Girshick Abhinav Gupta and Kaiming He. 2018. Non-local Neural Networks. In CVPR.","DOI":"10.1109\/CVPR.2018.00813"},{"key":"e_1_3_2_1_41_1","unstructured":"L. Wu Y. Wang L. Shao and M. Wang. 2019. 3-D PersonVLAD: Learning Deep Global Representations for Video-Based Person Reidentification. IEEE Transactions on Neural Networks and Learning Systems (2019) 1--13.  L. Wu Y. Wang L. Shao and M. Wang. 2019. 3-D PersonVLAD: Learning Deep Global Representations for Video-Based Person Reidentification. IEEE Transactions on Neural Networks and Learning Systems (2019) 1--13."},{"key":"e_1_3_2_1_42_1","unstructured":"Kelvin Xu Jimmy Ba Ryan Kiros Kyunghyun Cho Aaron C. Courville Ruslan Salakhutdinov Richard S. Zemel and Yoshua Bengio. 2015. Show Attend and Tell: Neural Image Caption Generation with Visual Attention. In ICML.  Kelvin Xu Jimmy Ba Ryan Kiros Kyunghyun Cho Aaron C. Courville Ruslan Salakhutdinov Richard S. Zemel and Yoshua Bengio. 2015. Show Attend and Tell: Neural Image Caption Generation with Visual Attention. In ICML."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Tao Xu Pengchuan Zhang Qiuyuan Huang Han Zhang Zhe Gan Xiaolei Huang and Xiaodong He. 2018. AttnGAN: Fine-Grained Text to Image Generation With Attentional Generative Adversarial Networks. In CVPR.  Tao Xu Pengchuan Zhang Qiuyuan Huang Han Zhang Zhe Gan Xiaolei Huang and Xiaodong He. 2018. AttnGAN: Fine-Grained Text to Image Generation With Attentional Generative Adversarial Networks. In CVPR.","DOI":"10.1109\/CVPR.2018.00143"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"e_1_3_2_1_45_1","volume-title":"Self- Attention Generative Adversarial Networks. arXiv preprint arXiv:1805.08318","author":"Zhang Han","year":"2018"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Han Zhang Tao Xu and Hongsheng Li. 2017. StackGAN: Text to Photo-Realistic Image Synthesis with Stacked Generative Adversarial Networks. In ICCV.  Han Zhang Tao Xu and Hongsheng Li. 2017. StackGAN: Text to Photo-Realistic Image Synthesis with Stacked Generative Adversarial Networks. In ICCV.","DOI":"10.1109\/ICCV.2017.629"},{"key":"e_1_3_2_1_47_1","volume-title":"Summary Transfer: Exemplar-Based Subset Selection for Video Summarization. In CVPR.","author":"Zhang Ke","year":"2016"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Ke Zhang Wei-Lun Chao Fei Sha and Kristen Grauman. 2016. Video Summarization with Long Short-Term Memory. In ECCV.  Ke Zhang Wei-Lun Chao Fei Sha and Kristen Grauman. 2016. Video Summarization with Long Short-Term Memory. In ECCV.","DOI":"10.1007\/978-3-319-46478-7_47"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Ke Zhang Kristen Grauman and Fei Sha. 2018. Retrospective Encoders for Video Summarization. In ECCV.  Ke Zhang Kristen Grauman and Fei Sha. 2018. Retrospective Encoders for Video Summarization. In ECCV.","DOI":"10.1007\/978-3-030-01237-3_24"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Bin Zhao Xuelong Li and Xiaoqiang Lu. 2017. Hierarchical Recurrent Neural Network for Video Summarization. In MM.  Bin Zhao Xuelong Li and Xiaoqiang Lu. 2017. Hierarchical Recurrent Neural Network for Video Summarization. In MM.","DOI":"10.1145\/3123266.3123328"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Bin Zhao Xuelong Li and Xiaoqiang Lu. 2018. HSA-RNN: Hierarchical Structure- Adaptive RNN for Video Summarization. In CVPR.  Bin Zhao Xuelong Li and Xiaoqiang Lu. 2018. HSA-RNN: Hierarchical Structure- Adaptive RNN for Video Summarization. In CVPR.","DOI":"10.1109\/CVPR.2018.00773"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Kaiyang Zhou Yu Qiao and Tao Xiang. 2018. Deep Reinforcement Learning for Unsupervised Video Summarization With Diversity-Representativeness Reward. In AAAI.  Kaiyang Zhou Yu Qiao and Tao Xiang. 2018. Deep Reinforcement Learning for Unsupervised Video Summarization With Diversity-Representativeness Reward. In AAAI.","DOI":"10.1609\/aaai.v32i1.12255"}],"event":{"name":"MM '19: The 27th ACM International Conference on Multimedia","location":"Nice France","acronym":"MM '19","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 27th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3343031.3351056","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3343031.3351056","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:13:12Z","timestamp":1750201992000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3343031.3351056"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,10,15]]},"references-count":52,"alternative-id":["10.1145\/3343031.3351056","10.1145\/3343031"],"URL":"https:\/\/doi.org\/10.1145\/3343031.3351056","relation":{},"subject":[],"published":{"date-parts":[[2019,10,15]]},"assertion":[{"value":"2019-10-15","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}