{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:40:01Z","timestamp":1750221601295,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2016,12,18]],"date-time":"2016-12-18T00:00:00Z","timestamp":1482019200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,12,18]]},"DOI":"10.1145\/3009977.3010010","type":"proceedings-article","created":{"date-parts":[[2016,12,22]],"date-time":"2016-12-22T21:20:29Z","timestamp":1482441629000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Towards semantic visual representation"],"prefix":"10.1145","author":[{"given":"Konda Reddy","family":"Mopuri","sequence":"first","affiliation":[{"name":"Indian Institute of Science, Bangalore, India"}]},{"given":"R. Venkatesh","family":"Babu","sequence":"additional","affiliation":[{"name":"Indian Institute of Science, Bangalore, India"}]}],"member":"320","published-online":{"date-parts":[[2016,12,18]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2001.937654"},{"key":"e_1_3_2_1_2_1","volume-title":"2008","author":"Blaschko M.","year":"2008","unstructured":"M. Blaschko and C. H. Lampert . Correlational spectral clustering . In 2008 , 2008 . 1, 4 M. Blaschko and C. H. Lampert. Correlational spectral clustering. In 2008, 2008. 1, 4"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/860435.860460"},{"key":"e_1_3_2_1_4_1","volume-title":"One billion word benchmark for measuring progress in statistical language modeling. CoRR, abs\/1312.3005","author":"Chelba C.","year":"2013","unstructured":"C. Chelba , T. Mikolov , M. Schuster , Q. Ge , T. Brants , and P. Koehn . One billion word benchmark for measuring progress in statistical language modeling. CoRR, abs\/1312.3005 , 2013 . 3.2 C. Chelba, T. Mikolov, M. Schuster, Q. Ge, T. Brants, and P. Koehn. One billion word benchmark for measuring progress in statistical language modeling. CoRR, abs\/1312.3005, 2013. 3.2"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.177"},{"key":"e_1_3_2_1_6_1","volume-title":"Word2visualvec: Cross-media retrieval by visual feature prediction. CoRR, abs\/1604.06838","author":"Dong J.","year":"2016","unstructured":"J. Dong , X. Li , and C. G. M. Snoek . Word2visualvec: Cross-media retrieval by visual feature prediction. CoRR, abs\/1604.06838 , 2016 . 2.2 J. Dong, X. Li, and C. G. M. Snoek. Word2visualvec: Cross-media retrieval by visual feature prediction. CoRR, abs\/1604.06838, 2016. 2.2"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.5555\/645318.649254"},{"key":"e_1_3_2_1_8_1","volume-title":"Interesting objects are visually salient. Journal of vision, 8(3)","author":"Elazary L.","year":"2008","unstructured":"L. Elazary and L. Itti . Interesting objects are visually salient. Journal of vision, 8(3) , 2008 . 4 L. Elazary and L. Itti. Interesting objects are visually salient. Journal of vision, 8(3), 2008. 4"},{"key":"e_1_3_2_1_9_1","volume-title":"et al. Efficient estimation of word representations in vector space. CoRR, abs\/1301.3781","author":"T.","year":"2013","unstructured":"T. M. et al. Efficient estimation of word representations in vector space. CoRR, abs\/1301.3781 , 2013 . 1, 2.2, 2.2, 3.2 T. M. et al. Efficient estimation of word representations in vector space. CoRR, abs\/1301.3781, 2013. 1, 2.2, 2.2, 3.2"},{"key":"e_1_3_2_1_10_1","first-page":"1137","article-title":"A neural probabilistic language model","volume":"3","author":"Y. B.","year":"2003","unstructured":"Y. B. et al . A neural probabilistic language model . JMLR , 3 : 1137 -- 1155 , 2003 . 1, 2.2 Y. B. et al. A neural probabilistic language model. JMLR, 3:1137--1155, 2003. 1, 2.2","journal-title":"JMLR"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.5555\/1888089.1888092"},{"key":"e_1_3_2_1_12_1","volume-title":"NIPS","author":"Frome A.","year":"2013","unstructured":"A. Frome , G. Corrado , J. Shlens , S. Bengio , J. Dean , M. Ranzato , and T. Mikolov . Devise: A deep visual-semantic embedding model . In NIPS , 2013 . 4 A. Frome, G. Corrado, J. Shlens, S. Bengio, J. Dean, M. Ranzato, and T. Mikolov. Devise: A deep visual-semantic embedding model. In NIPS, 2013. 4"},{"key":"e_1_3_2_1_13_1","first-page":"2","article-title":"The iapr benchmark: A new evaluation resource for visual information systems","author":"Grubinger M. e. a.","year":"2006","unstructured":"M. e. a. Grubinger . The iapr benchmark: A new evaluation resource for visual information systems . In Language Resources and Evaluation , 2006 . 2 , 3.1 M. e. a. Grubinger. The iapr benchmark: A new evaluation resource for visual information systems. In Language Resources and Evaluation, 2006. 2, 3.1","journal-title":"Language Resources and Evaluation"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1162\/0899766042321814"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.5555\/2566972.2566993"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1460096.1460104"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1743384.1743475"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.24.58"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-011-0494-3"},{"key":"e_1_3_2_1_20_1","first-page":"1889","volume-title":"Advances in Neural Information Processing Systems","author":"Karpathy A.","year":"2014","unstructured":"A. Karpathy , A. Joulin , and F. Li . Deep fragment embeddings for bidirectional image sentence mapping . In Advances in Neural Information Processing Systems , pages 1889 -- 1897 . 2014 . 2.3 A. Karpathy, A. Joulin, and F. Li. Deep fragment embeddings for bidirectional image sentence mapping. In Advances in Neural Information Processing Systems, pages 1889--1897. 2014. 2.3"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"e_1_3_2_1_22_1","volume-title":"Skip-thought vectors. CoRR, abs\/1506.06726","author":"Kiros R.","year":"2015","unstructured":"R. Kiros , Y. Zhu , R. Salakhutdinov , R. S. Zemel , A. Torralba , R. Urtasun , and S. Fidler . Skip-thought vectors. CoRR, abs\/1506.06726 , 2015 . 2.2 R. Kiros, Y. Zhu, R. Salakhutdinov, R. S. Zemel, A. Torralba, R. Urtasun, and S. Fidler. Skip-thought vectors. CoRR, abs\/1506.06726, 2015. 2.2"},{"key":"e_1_3_2_1_23_1","first-page":"1","article-title":"Imagenet classification with deep convolutional neural networks","author":"Krizhevsky A.","year":"2012","unstructured":"A. Krizhevsky , I. Sutskever , and G. E. Hinton . Imagenet classification with deep convolutional neural networks . In NIPS. 2012 . 1 , 2.1, 3.2 A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet classification with deep convolutional neural networks. In NIPS. 2012. 1, 2.1, 3.2","journal-title":"NIPS."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995466"},{"key":"e_1_3_2_1_25_1","first-page":"553","volume-title":"NIPS 16","author":"Lavrenko V.","year":"2004","unstructured":"V. Lavrenko , R. Manmatha , and J. Jeon . A model for learning the semantics of pictures. In S. Thrun, L. Saul, and B. Sch\u00f6lkopf, editors , NIPS 16 , pages 553 -- 560 , 2004 . 4 V. Lavrenko, R. Manmatha, and J. Jeon. A model for learning the semantics of pictures. In S. Thrun, L. Saul, and B. Sch\u00f6lkopf, editors, NIPS 16, pages 553--560, 2004. 4"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2766462.2767773"},{"key":"e_1_3_2_1_27_1","volume-title":"Microsoft COCO: common objects in context. CoRR, abs\/1405.0312","author":"Lin T.","year":"2014","unstructured":"T. Lin , M. Maire , S. J. Belongie , L. D. Bourdev , R. B. Girshick , J. Hays , P. Perona , D. Ramanan , P. Doll\u00e1r , and C. L. Zitnick . Microsoft COCO: common objects in context. CoRR, abs\/1405.0312 , 2014 . 2.3, 3.3 T. Lin, M. Maire, S. J. Belongie, L. D. Bourdev, R. B. Girshick, J. Hays, P. Perona, D. Ramanan, P. Doll\u00e1r, and C. L. Zitnick. Microsoft COCO: common objects in context. CoRR, abs\/1405.0312, 2014. 2.3, 3.3"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"e_1_3_2_1_29_1","volume-title":"Exploiting similarities among languages for machine translation. arXiv preprint arXiv:1309.4168","author":"Mikolov T.","year":"2013","unstructured":"T. Mikolov , Q. V. Le , and I. Sutskever . Exploiting similarities among languages for machine translation. arXiv preprint arXiv:1309.4168 , 2013 . 1 T. Mikolov, Q. V. Le, and I. Sutskever. Exploiting similarities among languages for machine translation. arXiv preprint arXiv:1309.4168, 2013. 1"},{"key":"e_1_3_2_1_30_1","volume-title":"Distributed representations of words and phrases and their compositionality. CoRR, abs\/1310.4546","author":"Mikolov T.","year":"2013","unstructured":"T. Mikolov , I. Sutskever , K. Chen , G. Corrado , and J. Dean . Distributed representations of words and phrases and their compositionality. CoRR, abs\/1310.4546 , 2013 . 1 T. Mikolov, I. Sutskever, K. Chen, G. Corrado, and J. Dean. Distributed representations of words and phrases and their compositionality. CoRR, abs\/1310.4546, 2013. 1"},{"key":"e_1_3_2_1_31_1","volume-title":"Human Language Technologies: Conference of the North American Chapter of the Association of Computational Linguistics, Proceedings, June 9--14, 2013","author":"Mikolov T.","year":"2013","unstructured":"T. Mikolov , W. Yih , and G. Zweig . Linguistic regularities in continuous space word representations . In Human Language Technologies: Conference of the North American Chapter of the Association of Computational Linguistics, Proceedings, June 9--14, 2013 , Westin Peachtree Plaza Hotel, Atlanta, Georgia, USA, pages 746--751 , 2013 . 1 T. Mikolov, W. Yih, and G. Zweig. Linguistic regularities in continuous space word representations. In Human Language Technologies: Conference of the North American Chapter of the Association of Computational Linguistics, Proceedings, June 9--14, 2013, Westin Peachtree Plaza Hotel, Atlanta, Georgia, USA, pages 746--751, 2013. 1"},{"key":"e_1_3_2_1_32_1","volume-title":"Zero-shot learning by convex combination of semantic embeddings. CoRR, abs\/1312.5650","author":"Norouzi M.","year":"2013","unstructured":"M. Norouzi , T. Mikolov , S. Bengio , Y. Singer , J. Shlens , A. Frome , G. Corrado , and J. Dean . Zero-shot learning by convex combination of semantic embeddings. CoRR, abs\/1312.5650 , 2013 . 4 M. Norouzi, T. Mikolov, S. Bengio, Y. Singer, J. Shlens, A. Frome, G. Corrado, and J. Dean. Zero-shot learning by convex combination of semantic embeddings. CoRR, abs\/1312.5650, 2013. 4"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_35_1","volume-title":"Very deep convolutional networks for large-scale image recognition. CoRR, abs\/1409.1556","author":"Simonyan K.","year":"2014","unstructured":"K. Simonyan and A. Zisserman . Very deep convolutional networks for large-scale image recognition. CoRR, abs\/1409.1556 , 2014 . 1, 2.1 K. Simonyan and A. Zisserman. Very deep convolutional networks for large-scale image recognition. CoRR, abs\/1409.1556, 2014. 1, 2.1"},{"key":"e_1_3_2_1_36_1","volume-title":"NIPS 2013","author":"Socher R.","year":"2013","unstructured":"R. Socher , M. Ganjoo , C. D. Manning , and A. Ng . Zero-shot learning through cross-modal transfer . In NIPS 2013 . 2013 . 4 R. Socher, M. Ganjoo, C. D. Manning, and A. Ng. Zero-shot learning through cross-modal transfer. In NIPS 2013. 2013. 4"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00177"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-88682-2_40"},{"key":"e_1_3_2_1_39_1","volume-title":"Going deeper with convolutions. CoRR, abs\/1409.4842","author":"Szegedy C.","year":"2014","unstructured":"C. Szegedy , W. Liu , Y. Jia , P. Sermanet , S. E. Reed , D. Anguelov , D. Erhan , V. Vanhoucke , and A. Rabinovich . Going deeper with convolutions. CoRR, abs\/1409.4842 , 2014 . 1, 2.1 C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. E. Reed, D. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich. Going deeper with convolutions. CoRR, abs\/1409.4842, 2014. 1, 2.1"},{"key":"e_1_3_2_1_40_1","volume-title":"sense2vec - A fast and accurate method for word sense disambiguation in neural word embeddings. CoRR, abs\/1511.06388","author":"Trask A.","year":"2015","unstructured":"A. Trask , P. Michalak , and J. Liu . sense2vec - A fast and accurate method for word sense disambiguation in neural word embeddings. CoRR, abs\/1511.06388 , 2015 . 2.2 A. Trask, P. Michalak, and J. Liu. sense2vec - A fast and accurate method for word sense disambiguation in neural word embeddings. CoRR, abs\/1511.06388, 2015. 2.2"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206816"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.5555\/2283696.2283856"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2009.5204274"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"}],"event":{"name":"ICVGIP '16: Indian Conference on Computer Vision, Graphics and Image Processing","sponsor":["Google Inc.","QI Qualcomm Inc.","Tata Consultancy Services","NVIDIA","MathWorks The MathWorks, Inc.","Microsoft Research Microsoft Research"],"location":"Guwahati Assam India","acronym":"ICVGIP '16"},"container-title":["Proceedings of the Tenth Indian Conference on Computer Vision, Graphics and Image Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3009977.3010010","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3009977.3010010","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:23:28Z","timestamp":1750220608000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3009977.3010010"}},"subtitle":["augmenting image representation with natural language descriptors"],"short-title":[],"issued":{"date-parts":[[2016,12,18]]},"references-count":45,"alternative-id":["10.1145\/3009977.3010010","10.1145\/3009977"],"URL":"https:\/\/doi.org\/10.1145\/3009977.3010010","relation":{},"subject":[],"published":{"date-parts":[[2016,12,18]]},"assertion":[{"value":"2016-12-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}