{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:35:54Z","timestamp":1750221354858,"version":"3.41.0"},"publisher-location":"New York, New York, USA","reference-count":39,"publisher":"ACM Press","license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1145\/3184558.3186352","type":"proceedings-article","created":{"date-parts":[[2018,4,18]],"date-time":"2018-04-18T18:04:25Z","timestamp":1524074665000},"page":"379-386","source":"Crossref","is-referenced-by-count":0,"title":["Discovering Connotations as Labels for Weakly Supervised Image-Sentence Data"],"prefix":"10.1145","author":[{"given":"Aditya","family":"Mogadala","sequence":"first","affiliation":[{"name":"Karlsruhe Institute of Technology, Karlsruhe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bhargav","family":"Kanuparthi","sequence":"additional","affiliation":[{"name":"Birla Institute of Technology and Science, Hyderabad, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Achim","family":"Rettinger","sequence":"additional","affiliation":[{"name":"Karlsruhe Institute of Technology, Karlsruhe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"York","family":"Sure-Vetter","sequence":"additional","affiliation":[{"name":"Karlsruhe Institute of Technology, Karlsruhe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","reference":[{"key":"key-10.1145\/3184558.3186352-1","unstructured":"Galen Andrew, Raman Arora, Jeff Bilmes, and Karen Livescu. 2013. Deep canonical correlation analysis. In International Conference on Machine Learning. 1247--1255."},{"key":"key-10.1145\/3184558.3186352-2","unstructured":"David M Blei and Michael I Jordan. 2003. Modeling annotated data. In Proceedings of the 26th annual international ACM SIGIR conference on Research and development in informaion retrieval. ACM, 127--134."},{"key":"key-10.1145\/3184558.3186352-3","unstructured":"Kalina Bontcheva and Dominic Rout. 2014. Making sense of social media streams through semantics: a survey. Semantic Web Vol. 5, 5 (2014), 373--403."},{"key":"key-10.1145\/3184558.3186352-4","doi-asserted-by":"crossref","unstructured":"Zhe Cao, Tao Qin, Tie-Yan Liu, Ming-Feng Tsai, and Hang Li. 2007. Learning to rank: from pairwise approach to listwise approach Proceedings of the 24th international conference on Machine learning. ACM, 129--136.","DOI":"10.1145\/1273496.1273513"},{"key":"key-10.1145\/3184558.3186352-5","unstructured":"Rudolf Carnap. 1988. Meaning and necessity: a study in semantics and modal logic. University of Chicago Press."},{"key":"key-10.1145\/3184558.3186352-6","doi-asserted-by":"crossref","unstructured":"Xinlei Chen and Abhinav Gupta. 2015. Webly supervised learning of convolutional networks Proceedings of the IEEE International Conference on Computer Vision. 1431--1439.","DOI":"10.1109\/ICCV.2015.168"},{"key":"key-10.1145\/3184558.3186352-7","doi-asserted-by":"crossref","unstructured":"Xinlei Chen, Abhinav Shrivastava, and Abhinav Gupta. 2013. Neil: Extracting visual knowledge from web data. In Proceedings of the IEEE International Conference on Computer Vision. 1409--1416.","DOI":"10.1109\/ICCV.2013.178"},{"key":"key-10.1145\/3184558.3186352-8","unstructured":"Jia Deng, Wei Dong, Richard Socher, Li-Jia Li, Kai Li, and Li Fei-Fei. 2009. Imagenet: A large-scale hierarchical image database Computer Vision and Pattern Recognition, 2009. CVPR 2009. IEEE Conference on. IEEE, 248--255."},{"key":"key-10.1145\/3184558.3186352-9","doi-asserted-by":"crossref","unstructured":"Emily Denton, Jason Weston, Manohar Paluri, Lubomir Bourdev, and Rob Fergus. 2015. User conditional hashtag prediction for images. In Proceedings of the 21th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. ACM, 1731--1740.","DOI":"10.1145\/2783258.2788576"},{"key":"key-10.1145\/3184558.3186352-10","doi-asserted-by":"crossref","unstructured":"Bhuwan Dhingra, Zhong Zhou, Dylan Fitzpatrick, Michael Muehl, and William W Cohen. 2016. Tweet2vec: Character-based distributed representations for social media. arXiv preprint arXiv:1605.03481 (2016).","DOI":"10.18653\/v1\/P16-2044"},{"key":"key-10.1145\/3184558.3186352-11","doi-asserted-by":"crossref","unstructured":"Andr&#233; Elisseeff and Jason Weston. 2002. A kernel method for multi-labelled classification. In Advances in neural information processing systems. 681--687.","DOI":"10.7551\/mitpress\/1120.003.0092"},{"key":"key-10.1145\/3184558.3186352-12","unstructured":"Fangxiang Feng, Xiaojie Wang, and Ruifan Li. 2014. Cross-modal retrieval with correspondence autoencoder Proceedings of the 22nd ACM international conference on Multimedia. ACM, 7--16."},{"key":"key-10.1145\/3184558.3186352-13","unstructured":"Andrea Frome, Greg S Corrado, Jon Shlens, Samy Bengio, Jeff Dean, Tomas Mikolov, et almbox. 2013. Devise: A deep visual-semantic embedding model. In Advances in neural information processing systems. 2121--2129."},{"key":"key-10.1145\/3184558.3186352-14","unstructured":"Yoav Goldberg and Omer Levy. 2014. word2vec Explained: deriving Mikolov et al.'s negative-sampling word-embedding method. arXiv preprint arXiv:1402.3722 (2014)."},{"key":"key-10.1145\/3184558.3186352-15","unstructured":"Yuyun Gong and Qi Zhang. 2016. Hashtag Recommendation Using Attention-Based Convolutional Neural Network. IJCAI. 2782--2788."},{"key":"key-10.1145\/3184558.3186352-16","doi-asserted-by":"crossref","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition. 770--778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"key-10.1145\/3184558.3186352-17","doi-asserted-by":"crossref","unstructured":"Yoon Kim, Yacine Jernite, David Sontag, and Alexander M Rush. 2016. Character-Aware Neural Language Models. In AAAI. 2741--2749.","DOI":"10.1609\/aaai.v30i1.10362"},{"key":"key-10.1145\/3184558.3186352-18","unstructured":"Diederik Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"key-10.1145\/3184558.3186352-19","doi-asserted-by":"crossref","unstructured":"Ranjay Krishna, Yuke Zhu, Oliver Groth, Justin Johnson, Kenji Hata, Joshua Kravitz, Stephanie Chen, Yannis Kalantidis, Li-Jia Li, David A Shamma, et almbox. 2017. Visual genome: Connecting language and vision using crowdsourced dense image annotations. International Journal of Computer Vision Vol. 123, 1 (2017), 32--73.","DOI":"10.1007\/s11263-016-0981-7"},{"key":"key-10.1145\/3184558.3186352-20","unstructured":"Yuncheng Li, Yale Song, and Jiebo Luo. 2017. Improving Pairwise Ranking for Multi-label Image Classification. arXiv preprint arXiv:1704.03135 (2017)."},{"key":"key-10.1145\/3184558.3186352-21","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll&#225;r, and C Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In European conference on computer vision. Springer, 740--755."},{"key":"key-10.1145\/3184558.3186352-22","doi-asserted-by":"crossref","unstructured":"Ishan Misra, C Lawrence Zitnick, Margaret Mitchell, and Ross Girshick. 2016. Seeing through the human reporting bias: Visual classifiers from noisy human-centric labels. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2930--2939.","DOI":"10.1109\/CVPR.2016.320"},{"key":"key-10.1145\/3184558.3186352-23","unstructured":"Aditya Mogadala and Achim Rettinger. 2015. Multi-modal Correlated Centroid Space for Multi-lingual Cross-Modal Retrieval European Conference on Information Retrieval. Springer, 68--79."},{"key":"key-10.1145\/3184558.3186352-24","unstructured":"Cesc Chunseong Park, Byeongchang Kim, and Gunhee Kim. 2017. Attend to You: Personalized Image Captioning with Context Sequence Memory Networks. arXiv preprint arXiv:1704.06485 (2017)."},{"key":"key-10.1145\/3184558.3186352-25","doi-asserted-by":"crossref","unstructured":"NN Pascu. 1979. Alpha-close-to-convex functions. In Romanian-Finnish Seminar on Complex Analysis. Springer, 331--335.","DOI":"10.1007\/BFb0079505"},{"key":"key-10.1145\/3184558.3186352-26","unstructured":"Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy, Aditya Khosla, Michael Bernstein, et almbox. 2015. Imagenet large scale visual recognition challenge. International Journal of Computer Vision Vol. 115, 3 (2015), 211--252."},{"key":"key-10.1145\/3184558.3186352-27","doi-asserted-by":"crossref","unstructured":"Jieying She and Lei Chen. 2014. Tomoha: Topic model-based hashtag recommendation on twitter Proceedings of the 23rd International Conference on World Wide Web. ACM, 371--372.","DOI":"10.1145\/2567948.2577292"},{"key":"key-10.1145\/3184558.3186352-28","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"key-10.1145\/3184558.3186352-29","unstructured":"Nitish Srivastava and Ruslan R Salakhutdinov. 2012. Multimodal learning with deep boltzmann machines. In Advances in neural information processing systems. 2222--2230."},{"key":"key-10.1145\/3184558.3186352-30","doi-asserted-by":"crossref","unstructured":"Bart Thomee, David A Shamma, Gerald Friedland, Benjamin Elizalde, Karl Ni, Douglas Poland, Damian Borth, and Li-Jia Li. 2016. YFCC100M: The new data in multimedia research. Commun. ACM Vol. 59, 2 (2016), 64--73.","DOI":"10.1145\/2812802"},{"key":"key-10.1145\/3184558.3186352-31","doi-asserted-by":"crossref","unstructured":"Andreas Veit, Neil Alldrin, Gal Chechik, Ivan Krasin, Abhinav Gupta, and Serge Belongie. 2017. Learning From Noisy Large-Scale Datasets With Minimal Supervision. arXiv preprint arXiv:1701.01619 (2017).","DOI":"10.1109\/CVPR.2017.696"},{"key":"key-10.1145\/3184558.3186352-32","doi-asserted-by":"crossref","unstructured":"Jason Weston, Samy Bengio, and Nicolas Usunier. 2010. Large scale image annotation: learning to rank with joint word-image embeddings. Machine learning Vol. 81, 1 (2010), 21--35.","DOI":"10.1007\/s10994-010-5198-3"},{"key":"key-10.1145\/3184558.3186352-33","doi-asserted-by":"crossref","unstructured":"Jason Weston, Sumit Chopra, and Keith Adams. 2014. #TagSpace: Semantic Embeddings from Hashtags. In Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP). 1822--1827.","DOI":"10.3115\/v1\/D14-1194"},{"key":"key-10.1145\/3184558.3186352-34","doi-asserted-by":"crossref","unstructured":"Yan Xia, Xudong Cao, Fang Wen, and Jian Sun. 2014. Well begun is half done: Generating high-quality seeds for automatic image dataset construction from web. In European Conference on Computer Vision. Springer, 387--400.","DOI":"10.1007\/978-3-319-10593-2_26"},{"key":"key-10.1145\/3184558.3186352-35","doi-asserted-by":"crossref","unstructured":"Tong Xiao, Tian Xia, Yi Yang, Chang Huang, and Xiaogang Wang. 2015. Learning from massive noisy labeled data for image classification Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2691--2699.","DOI":"10.1109\/CVPR.2015.7298885"},{"key":"key-10.1145\/3184558.3186352-36","unstructured":"Yazhou Yao, Jian Zhang, Fumin Shen, Xian-Sheng Hua, Jingsong Xu, and Zhenmin Tang. 2017. Exploiting Web Images for Dataset Construction: A Domain Robust Approach. IEEE Transactions on Multimedia (2017)."},{"key":"key-10.1145\/3184558.3186352-37","doi-asserted-by":"crossref","unstructured":"Lei Zhang and Achim Rettinger. 2014. X-LiSA: cross-lingual semantic annotation. Proceedings of the VLDB Endowment Vol. 7, 13 (2014), 1693--1696.","DOI":"10.14778\/2733004.2733063"},{"key":"key-10.1145\/3184558.3186352-38","doi-asserted-by":"crossref","unstructured":"Min-Ling Zhang and Zhi-Hua Zhou. 2006. Multilabel neural networks with applications to functional genomics and text categorization. IEEE transactions on Knowledge and Data Engineering Vol. 18, 10 (2006), 1338--1351.","DOI":"10.1109\/TKDE.2006.162"},{"key":"key-10.1145\/3184558.3186352-39","unstructured":"Yan-Tao Zheng, Ming Zhao, Yang Song, Hartwig Adam, Ulrich Buddemeier, Alessandro Bissacco, Fernando Brucher, Tat-Seng Chua, and Hartmut Neven. 2009. Tour the world: building a web-scale landmark recognition engine Computer vision and pattern recognition, 2009. CVPR 2009. IEEE conference on. IEEE, 1085--1092."}],"event":{"number":"2018","sponsor":["IW3C2, International World Wide Web Conference Committee","SIGWEB, ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"acronym":"WWW '18","name":"Companion of the The Web Conference 2018","start":{"date-parts":[[2018,4,23]]},"location":"Lyon, France","end":{"date-parts":[[2018,4,27]]}},"container-title":["Companion of the The Web Conference 2018 on The Web Conference 2018 - WWW '18"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3184558.3186352","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/dl.acm.org\/ft_gateway.cfm?id=3186352&ftid=1958208&dwn=1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T02:26:07Z","timestamp":1750213567000},"score":1,"resource":{"primary":{"URL":"http:\/\/dl.acm.org\/citation.cfm?doid=3184558.3186352"}},"subtitle":[],"proceedings-subject":"The Web Conference 2018","short-title":[],"issued":{"date-parts":[[2018]]},"references-count":39,"URL":"https:\/\/doi.org\/10.1145\/3184558.3186352","relation":{},"subject":[],"published":{"date-parts":[[2018]]}}}