{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T07:29:15Z","timestamp":1770276555688,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,10,1]],"date-time":"2017-10-01T00:00:00Z","timestamp":1506816000000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61273299"],"award-info":[{"award-number":["61273299"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1110970"],"award-info":[{"award-number":["1110970"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,10]]},"DOI":"10.1145\/2964284.2967223","type":"proceedings-article","created":{"date-parts":[[2016,9,29]],"date-time":"2016-09-29T15:17:32Z","timestamp":1475162252000},"page":"262-266","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":50,"title":["Joint Image and Text Representation for Aesthetics Analysis"],"prefix":"10.1145","author":[{"given":"Ye","family":"Zhou","sequence":"first","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"given":"Xin","family":"Lu","sequence":"additional","affiliation":[{"name":"Adobe Systems Inc., San Jose, CA, USA"}]},{"given":"Junping","family":"Zhang","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"given":"James Z.","family":"Wang","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}]}],"member":"320","published-online":{"date-parts":[[2016,10]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1873990"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/11744078_23"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995467"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2006.303"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2011-720"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.5555\/2999134.2999257"},{"key":"e_1_3_2_1_7_1","first-page":"1188","volume-title":"International Conference on Machine Learning (ICML)","author":"Le Q.","year":"2014","unstructured":"Q. Le and T. Mikolov. Distributed representations of sentences and documents. In International Conference on Machine Learning (ICML), pages 1188--1196, 2014."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654927"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-88690-7_29"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126444"},{"key":"e_1_3_2_1_11_1","volume-title":"Ensemble of generative and discriminative techniques for sentiment analysis of movie reviews. arXiv:1412.5335","author":"Mesnil G.","year":"2014","unstructured":"G. Mesnil, T. Mikolov, M. Ranzato, and Y. Bengio. Ensemble of generative and discriminative techniques for sentiment analysis of movie reviews. arXiv:1412.5335, 2014."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/2354409.2354807"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995539"},{"key":"e_1_3_2_1_14_1","volume-title":"Very deep convolutional networks for large-scale image recognition. CoRR, abs\/1409.1556","author":"Simonyan K.","year":"2014","unstructured":"K. Simonyan and A. Zisserman. Very deep convolutional networks for large-scale image recognition. CoRR, abs\/1409.1556, 2014."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.5555\/2999325.2999383"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2479916"},{"key":"e_1_3_2_1_17_1","volume-title":"Learning good taste: Classifying aesthetic images. Technical report","author":"Veerina P.","year":"2015","unstructured":"P. Veerina. Learning good taste: Classifying aesthetic images. Technical report, Stanford University, 2015."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/2390665.2390688"}],"event":{"name":"MM '16: ACM Multimedia Conference","location":"Amsterdam The Netherlands","acronym":"MM '16","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 24th ACM international conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2964284.2967223","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2964284.2967223","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2964284.2967223","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:28:28Z","timestamp":1763458108000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2964284.2967223"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10]]},"references-count":18,"alternative-id":["10.1145\/2964284.2967223","10.1145\/2964284"],"URL":"https:\/\/doi.org\/10.1145\/2964284.2967223","relation":{},"subject":[],"published":{"date-parts":[[2016,10]]},"assertion":[{"value":"2016-10-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}