{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T07:05:08Z","timestamp":1781075108366,"version":"3.54.1"},"publisher-location":"New York, New York, USA","reference-count":45,"publisher":"ACM Press","license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"SHMEC","award":["16CG24"],"award-info":[{"award-number":["16CG24"]}]},{"name":"Shanghai Sailing Program","award":["17YF1404500"],"award-info":[{"award-number":["17YF1404500"]}]},{"name":"NSFC-Zhejiang","award":["U1609220"],"award-info":[{"award-number":["U1609220"]}]},{"name":"NSFC","award":["61702190"],"award-info":[{"award-number":["61702190"]}]},{"name":"NSFC","award":["61672236"],"award-info":[{"award-number":["61672236"]}]},{"name":"NSFC","award":["61672231"],"award-info":[{"award-number":["61672231"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1145\/3178876.3186026","type":"proceedings-article","created":{"date-parts":[[2018,4,13]],"date-time":"2018-04-13T15:53:48Z","timestamp":1523634828000},"page":"1277-1286","source":"Crossref","is-referenced-by-count":60,"title":["User-guided Hierarchical Attention Network for Multi-modal Social Image Popularity Prediction"],"prefix":"10.1145","author":[{"given":"Wei","family":"Zhang","sequence":"first","affiliation":[{"name":"East China Normal University, Putuo Qu, Shanghai Shi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wen","family":"Wang","sequence":"additional","affiliation":[{"name":"East China Normal University, Putuo Qu, Shanghai Shi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jun","family":"Wang","sequence":"additional","affiliation":[{"name":"East China Normal University, Putuo Qu, Shanghai Shi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hongyuan","family":"Zha","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","reference":[{"key":"key-10.1145\/3178876.3186026-1","doi-asserted-by":"crossref","unstructured":"Stanislaw Antol, Aishwarya Agrawal, Jiasen Lu, Margaret Mitchell, Dhruv Batra, C. Lawrence Zitnick, and Devi Parikh. 2015. VQA: Visual Question Answering. In ICCV. 2425--2433.","DOI":"10.1109\/ICCV.2015.279"},{"key":"key-10.1145\/3178876.3186026-2","unstructured":"Dzmitry Bahdanau, Kyunghyun Cho, and Yoshua Bengio. 2014. Neural Machine Translation by Jointly Learning to Align and Translate. CoRR Vol. abs\/1409.0473 (2014). [arxiv]1409.0473"},{"key":"key-10.1145\/3178876.3186026-3","unstructured":"David M. Blei, Andrew Y. Ng, and Michael I. Jordan. 2003. Latent Dirichlet Allocation. Journal of Machine Learning Research Vol. 3 (2003), 993--1022."},{"key":"key-10.1145\/3178876.3186026-4","doi-asserted-by":"crossref","unstructured":"Biao Chang, Hengshu Zhu, Yong Ge, Enhong Chen, Hui Xiong, and Chang Tan. 2014. Predicting the Popularity of Online Serials with Autoregressive Models CIKM. 1339--1348.","DOI":"10.1145\/2661829.2662055"},{"key":"key-10.1145\/3178876.3186026-5","doi-asserted-by":"crossref","unstructured":"Jingyuan Chen, Xuemeng Song, Liqiang Nie, Xiang Wang, Hanwang Zhang, and Tat-Seng Chua. 2016. Micro Tells Macro: Predicting the Popularity of Micro-Videos via a Transductive Model MM. 898--907.","DOI":"10.1145\/2964284.2964314"},{"key":"key-10.1145\/3178876.3186026-6","unstructured":"Kan Chen, Jiang Wang, Liang-Chieh Chen, Haoyuan Gao, Wei Xu, and Ram Nevatia. 2015. ABC-CNN: An Attention Based Convolutional Neural Network for Visual Question Answering. CoRR Vol. abs\/1511.05960 (2015). [arxiv]1511.05960"},{"key":"key-10.1145\/3178876.3186026-7","unstructured":"Cesc Chunseong Park, Byeongchang Kim, and Gunhee Kim. 2017. Attend to You: Personalized Image Captioning With Context Sequence Memory Networks CVPR. 895--903."},{"key":"key-10.1145\/3178876.3186026-8","doi-asserted-by":"crossref","unstructured":"Peng Cui, Fei Wang, Shaowei Liu, Mingdong Ou, Shiqiang Yang, and Lifeng Sun. 2011. Who should share what?: item-level social influence prediction for users and posts ranking. In SIGIR. 185--194.","DOI":"10.1145\/2009916.2009945"},{"key":"key-10.1145\/3178876.3186026-9","doi-asserted-by":"crossref","unstructured":"Francesco Gelli, Tiberio Uricchio, Marco Bertini, Alberto Del Bimbo, and Shih-Fu Chang. 2015. Image Popularity Prediction in Social Media Using Sentiment and Context Features MM. 907--910.","DOI":"10.1145\/2733373.2806361"},{"key":"key-10.1145\/3178876.3186026-10","unstructured":"Alan G Hawkes. 1971. Spectra of some self-exciting and mutually exciting point processes. Biometrika (1971), 83--90."},{"key":"key-10.1145\/3178876.3186026-11","doi-asserted-by":"crossref","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In CVPR. 770--778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"key-10.1145\/3178876.3186026-12","doi-asserted-by":"crossref","unstructured":"Xiangnan He, Ming Gao, Min-Yen Kan, Yiqun Liu, and Kazunari Sugiyama. 2014. Predicting the popularity of web 2.0 items based on user comments SIGIR. 233--242.","DOI":"10.1145\/2600428.2609558"},{"key":"key-10.1145\/3178876.3186026-13","doi-asserted-by":"crossref","unstructured":"Sepp Hochreiter and J&#252;rgen Schmidhuber. 1997. Long short-term memory. Neural computation Vol. 9, 8 (1997), 1735--1780.","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"key-10.1145\/3178876.3186026-14","doi-asserted-by":"crossref","unstructured":"Yoonseop Kang, Saehoon Kim, and Seungjin Choi. 2012. Deep Learning to Hash with Multiple Representations ICDM. 930--935.","DOI":"10.1109\/ICDM.2012.24"},{"key":"key-10.1145\/3178876.3186026-15","doi-asserted-by":"crossref","unstructured":"Andrej Karpathy and Fei-Fei Li. 2015. Deep visual-semantic alignments for generating image descriptions CVPR. 3128--3137.","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"key-10.1145\/3178876.3186026-16","doi-asserted-by":"crossref","unstructured":"Aditya Khosla, Atish Das Sarma, and Raffay Hamid. 2014. What makes an image popular?. In WWW. 867--876.","DOI":"10.1145\/2566486.2567996"},{"key":"key-10.1145\/3178876.3186026-17","unstructured":"Jin-Hwa Kim, Sang-Woo Lee, Dong-Hyun Kwak, Min-Oh Heo, Jeonghee Kim, JungWoo Ha, and Byoung-Tak Zhang. 2016. Multimodal Residual Learning for Visual QA. In NIPS. 361--369."},{"key":"key-10.1145\/3178876.3186026-18","unstructured":"Jin-Hwa Kim, Kyoung-Woon On, Jeonghee Kim, Jung-Woo Ha, and Byoung-Tak Zhang. 2017. Hadamard product for low-rank bilinear pooling. ICLR (2017)."},{"key":"key-10.1145\/3178876.3186026-19","unstructured":"Diederik P. Kingma and Jimmy Ba. 2014. Adam: A Method for Stochastic Optimization. In ICLR."},{"key":"key-10.1145\/3178876.3186026-20","doi-asserted-by":"crossref","unstructured":"Himabindu Lakkaraju and Jitendra Ajmera. 2011. Attention prediction on social media brand pages. In CIKM. 2157--2160.","DOI":"10.1145\/2063576.2063915"},{"key":"key-10.1145\/3178876.3186026-21","doi-asserted-by":"crossref","unstructured":"Yann LeCun, Yoshua Bengio, and Geoffrey Hinton. 2015. Deep learning. Nature Vol. 521, 7553 (2015), 436--444.","DOI":"10.1038\/nature14539"},{"key":"key-10.1145\/3178876.3186026-22","doi-asserted-by":"crossref","unstructured":"Kathy Lee, Ashequl Qadir, Sadid A. Hasan, Vivek V. Datla, Aaditya Prakash, Joey Liu, and Oladimeji Farri. [n. d.]. Adverse Drug Event Detection in Tweets with Semi-Supervised Convolutional Neural Networks. In WWW. 705--714.","DOI":"10.1145\/3038912.3052671"},{"key":"key-10.1145\/3178876.3186026-23","unstructured":"Chee Wee Leong, Rada Mihalcea, and Samer Hassan. 2010. Text Mining for Automatic Image Tagging. In COLING. 647--655."},{"key":"key-10.1145\/3178876.3186026-24","doi-asserted-by":"crossref","unstructured":"Cheng Li, Jiaqi Ma, Xiaoxiao Guo, and Qiaozhu Mei. 2017. DeepCas: An End-to-end Predictor of Information Cascades WWW. 577--586.","DOI":"10.1145\/3038912.3052643"},{"key":"key-10.1145\/3178876.3186026-25","doi-asserted-by":"crossref","unstructured":"Pan Lu, Hongsheng Li, Wei Zhang, Jianyong Wang, and Xiaogang Wang. 2018. Co-attending Free-form Regions and Detections with Multi-modal Multiplicative Feature Embedding for Visual Question Answering. In AAAI.","DOI":"10.1609\/aaai.v32i1.12240"},{"key":"key-10.1145\/3178876.3186026-26","doi-asserted-by":"crossref","unstructured":"Corey Lynch, Kamelia Aryafar, and Josh Attenberg. 2016. Images Don't Lie: Transferring Deep Visual Semantic Features to Large-Scale Multimodal Learning to Rank. In SIGKDD. 541--548.","DOI":"10.1145\/2939672.2939728"},{"key":"key-10.1145\/3178876.3186026-27","doi-asserted-by":"crossref","unstructured":"Travis Martin, Jake M. Hofman, Amit Sharma, Ashton Anderson, and Duncan J. Watts. 2016. Exploring Limits to Prediction in Complex Social Systems WWW. 683--694.","DOI":"10.1145\/2872427.2883001"},{"key":"key-10.1145\/3178876.3186026-28","unstructured":"Volodymyr Mnih, Nicolas Heess, Alex Graves, and Koray Kavukcuoglu. 2014. Recurrent Models of Visual Attention. In NIPS. 2204--2212."},{"key":"key-10.1145\/3178876.3186026-29","unstructured":"Hyeonseob Nam, Jung-Woo Ha, and Jeonghee Kim. 2017. Dual Attention Networks for Multimodal Reasoning and Matching CVPR. 299--307."},{"key":"key-10.1145\/3178876.3186026-30","unstructured":"Jiquan Ngiam, Aditya Khosla, Mingyu Kim, Juhan Nam, Honglak Lee, and Andrew Y. Ng. 2011. Multimodal Deep Learning. In ICML. 689--696."},{"key":"key-10.1145\/3178876.3186026-31","doi-asserted-by":"crossref","unstructured":"Behnaz Nojavanasghari, Deepak Gopinath, Jayanth Koushik, Tadas Baltrusaitis, and Louis-Philippe Morency. 2016. Deep multimodal fusion for persuasiveness prediction ICML. 284--288.","DOI":"10.1145\/2993148.2993176"},{"key":"key-10.1145\/3178876.3186026-32","doi-asserted-by":"crossref","unstructured":"Jeffrey Pennington, Richard Socher, and Christopher D. Manning. 2014. Glove: Global Vectors for Word Representation. In EMNLP. 1532--1543.","DOI":"10.3115\/v1\/D14-1162"},{"key":"key-10.1145\/3178876.3186026-33","doi-asserted-by":"crossref","unstructured":"Marian-Andrei Rizoiu, Lexing Xie, Scott Sanner, Manuel Cebri&#225;n, Honglin Yu, and Pascal Van Hentenryck. 2017. Expecting to be HIP: Hawkes Intensity Processes for Social Media Popularity WWW. 735--744.","DOI":"10.1145\/3038912.3052650"},{"key":"key-10.1145\/3178876.3186026-34","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. CoRR Vol. abs\/1409.1556 (2014). [arxiv]1409.1556"},{"key":"key-10.1145\/3178876.3186026-35","doi-asserted-by":"crossref","unstructured":"G&#225;bor Szab&#243; and Bernardo A. Huberman. 2010. Predicting the popularity of online content. Journal of Commun. ACM Vol. 53, 8 (2010), 80--88.","DOI":"10.1145\/1787234.1787254"},{"key":"key-10.1145\/3178876.3186026-36","unstructured":"Kai Sheng Tai, Richard Socher, and Christopher D. Manning. 2015. Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks. In ACL. 1556--1566."},{"key":"key-10.1145\/3178876.3186026-37","doi-asserted-by":"crossref","unstructured":"Oren Tsur and Ari Rappoport. 2012. What's in a hashtag?: content based prediction of the spread of ideas in microblogging communities. In WSDM. 643--652.","DOI":"10.1145\/2124295.2124320"},{"key":"key-10.1145\/3178876.3186026-38","unstructured":"Daixin Wang, Peng Cui, Mingdong Ou, and Wenwu Zhu. 2015. Deep Multimodal Hashing with Orthogonal Regularization IJCAI. 2291--2297."},{"key":"key-10.1145\/3178876.3186026-39","unstructured":"William M Wells, Paul Viola, Hideki Atsumi, Shin Nakajima, and Ron Kikinis. 1996. Multi-modal volume registration by maximization of mutual information. Medical image analysis Vol. 1, 1 (1996), 35--51."},{"key":"key-10.1145\/3178876.3186026-40","doi-asserted-by":"crossref","unstructured":"Bo Wu, Wen-Huang Cheng, Yongdong Zhang, Qiushi Huang, Jintao Li, and Tao Mei. 2017. Sequential Prediction of Social Media Popularity with Deep Temporal Context Networks IJCAI. 3062--3068.","DOI":"10.24963\/ijcai.2017\/427"},{"key":"key-10.1145\/3178876.3186026-41","doi-asserted-by":"crossref","unstructured":"Bo Wu, Wen-Huang Cheng, Yongdong Zhang, and Tao Mei. 2016. Time Matters: Multi-scale Temporalization of Social Media Popularity MM. 1336--1344.","DOI":"10.1145\/2964284.2964335"},{"key":"key-10.1145\/3178876.3186026-42","unstructured":"Shuai Xiao, Junchi Yan, Changsheng Li, Bo Jin, Xiangfeng Wang, Xiaokang Yang, Stephen M. Chu, and Hongyuan Zha. 2016. On Modeling and Predicting Individual Paper Citation Count over Time IJCAI. 2676--2682."},{"key":"key-10.1145\/3178876.3186026-43","doi-asserted-by":"crossref","unstructured":"Zichao Yang, Xiaodong He, Jianfeng Gao, Li Deng, and Alexander J. Smola. 2016. Stacked Attention Networks for Image Question Answering CVPR. 21--29.","DOI":"10.1109\/CVPR.2016.10"},{"key":"key-10.1145\/3178876.3186026-44","unstructured":"Chao Zhang, Keyang Zhang, Quan Yuan, Haoruo Peng, Yu Zheng, Tim Hanratty, Shaowen Wang, and Jiawei Han. 2017. Regions, Periods, Activities: Uncovering Urban Dynamics via Cross-Modal Representation Learning. In WWW. 361--370."},{"key":"key-10.1145\/3178876.3186026-45","doi-asserted-by":"crossref","unstructured":"Qingyuan Zhao, Murat A. Erdogdu, Hera Y. He, Anand Rajaraman, and Jure Leskovec. 2015. SEISMIC: A Self-Exciting Point Process Model for Predicting Tweet Popularity SIGKDD. 1513--1522.","DOI":"10.1145\/2783258.2783401"}],"event":{"name":"the 2018 World Wide Web Conference","location":"Lyon, France","acronym":"WWW '18","number":"2018","sponsor":["SIGWEB, ACM Special Interest Group on Hypertext, Hypermedia, and Web","IW3C2, International World Wide Web Conference Committee"],"start":{"date-parts":[[2018,4,23]]},"end":{"date-parts":[[2018,4,27]]}},"container-title":["Proceedings of the 2018 World Wide Web Conference on World Wide Web - WWW '18"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3178876.3186026","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/dl.acm.org\/ft_gateway.cfm?id=3186026&ftid=1957438&dwn=1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,3]],"date-time":"2025-07-03T17:27:30Z","timestamp":1751563650000},"score":1,"resource":{"primary":{"URL":"http:\/\/dl.acm.org\/citation.cfm?doid=3178876.3186026"}},"subtitle":[],"proceedings-subject":"World Wide Web","short-title":[],"issued":{"date-parts":[[2018]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1145\/3178876.3186026","relation":{},"subject":[],"published":{"date-parts":[[2018]]}}}