{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T14:50:19Z","timestamp":1773413419767,"version":"3.50.1"},"publisher-location":"New York, New York, USA","reference-count":43,"publisher":"ACM Press","license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1145\/3041021.3054201","type":"proceedings-article","created":{"date-parts":[[2018,1,11]],"date-time":"2018-01-11T18:39:25Z","timestamp":1515695965000},"page":"515-524","source":"Crossref","is-referenced-by-count":51,"title":["Visual Discovery at Pinterest"],"prefix":"10.1145","author":[{"given":"Andrew","family":"Zhai","sequence":"first","affiliation":[{"name":"Pinterest, San Francisco, CA, USA"}]},{"given":"Dmitry","family":"Kislyuk","sequence":"additional","affiliation":[{"name":"Pinterest, San Francisco, CA, USA"}]},{"given":"Yushi","family":"Jing","sequence":"additional","affiliation":[{"name":"Pinterest, San Francisco, CA, USA"}]},{"given":"Michael","family":"Feng","sequence":"additional","affiliation":[{"name":"Pinterest, San Francisco, CA, USA"}]},{"given":"Eric","family":"Tzeng","sequence":"additional","affiliation":[{"name":"Pinterest & University of California, Berkeley, San Francisco, CA, USA"}]},{"given":"Jeff","family":"Donahue","sequence":"additional","affiliation":[{"name":"Pinterest & University of California, Berkeley, San Francisco, CA, USA"}]},{"given":"Yue Li","family":"Du","sequence":"additional","affiliation":[{"name":"Pinterest, San Francisco, CA, USA"}]},{"given":"Trevor","family":"Darrell","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]}],"member":"320","reference":[{"key":"key-10.1145\/3041021.3054201-1","unstructured":"Introducing the future of visual discovery on pinterest. https:\/\/engineering.pinterest.com\/blog\/introducing-future-visual-discovery-pinterest. Published: 2017-02-08."},{"key":"key-10.1145\/3041021.3054201-2","unstructured":"Our crazy fun new visual search tool. https:\/\/blog.pinterest.com\/en\/our-crazy-fun-new-visual-search-tool. Published: 2015--11-08."},{"key":"key-10.1145\/3041021.3054201-3","unstructured":"M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen, C. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, S. Ghemawat, I. J. Goodfellow, A. Harp, G. Irving, M. Isard, Y. Jia, R. J&#243;zefowicz, L. Kaiser, M. Kudlur, J. Levenberg, D. Man&#233;, R. Monga, S. Moore, D. G. Murray, C. Olah, M. Schuster, J. Shlens, B. Steiner, I. Sutskever, K. Talwar, P. A. Tucker, V. Vanhoucke, V. Vasudevan, F. B. Vi&#233;gas, O. Vinyals, P. Warden, M. Wattenberg, M. Wicke, Y. Yu, and X. Zheng. Tensorflow: Large-scale machine learning on heterogeneous distributed systems. CoRR, abs\/1603.04467, 2016."},{"key":"key-10.1145\/3041021.3054201-4","doi-asserted-by":"crossref","unstructured":"P. Agrawal, R. Girshick, and J. Malik. Analyzing the performance of multilayer neural networks for object recognition. 2014.","DOI":"10.1007\/978-3-319-10584-0_22"},{"key":"key-10.1145\/3041021.3054201-5","doi-asserted-by":"crossref","unstructured":"K. Aizawa and M. Ogawa. Foodlog: Multimedia tool for healthcare applications. IEEE MultiMedia, 22(2):4--8, 2015.","DOI":"10.1109\/MMUL.2015.39"},{"key":"key-10.1145\/3041021.3054201-6","doi-asserted-by":"crossref","unstructured":"S. Bell and K. Bala. Learning visual similarity for product design with convolutional neural networks. ACM Trans. Graph., 34(4):98:1--98:10, July 2015.","DOI":"10.1145\/2766959"},{"key":"key-10.1145\/3041021.3054201-7","doi-asserted-by":"crossref","unstructured":"L. Bertelli, T. Yu, D. Vu, and B. Gokturk. Kernelized structural svm learning for supervised object segmentation. In Computer Vision and Pattern Recognition (CVPR), 2011 IEEE Conference on, pages 2153--2160. IEEE, 2011.","DOI":"10.1109\/CVPR.2011.5995597"},{"key":"key-10.1145\/3041021.3054201-8","doi-asserted-by":"crossref","unstructured":"R. Datta, D. Joshi, J. Li, and J. Wang. Image retrieval: Ideas, influences, and trends of the new age. ACM Computing Survey, 40(2):5:1--5:60, May 2008.","DOI":"10.1145\/1348246.1348248"},{"key":"key-10.1145\/3041021.3054201-9","doi-asserted-by":"crossref","unstructured":"J. Deng, W. Dong, R. Socher, L.-J. Li, K. Li, and L. Fei-Fei. ImageNet: A Large-Scale Hierarchical Image Database. In CVPR09, 2009.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"key-10.1145\/3041021.3054201-10","unstructured":"M. Everingham, L. Van Gool, C. K. I. Williams, J. Winn, and A. Zisserman. The PASCAL Visual Object Classes Challenge 2011 (VOC2011) Results. http:\/\/www.pascal-network.org\/challenges\/VOC\/voc2011\/workshop\/index.html."},{"key":"key-10.1145\/3041021.3054201-11","doi-asserted-by":"crossref","unstructured":"P. F. Felzenszwalb, R. B. Girshick, and D. A. McAllester. Cascade object detection with deformable part models. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pages 2241--2248, 2010.","DOI":"10.1109\/CVPR.2010.5539906"},{"key":"key-10.1145\/3041021.3054201-12","doi-asserted-by":"crossref","unstructured":"A. Frome, Y. Singer, and J. Malik. Image Retrieval and Classification Using Local Distance Functions. In B. Sch&#246;lkopf, J. Platt, and T. Hoffman, editors, Advances in Neural Information Processing Systems 19, pages 417--424. MIT Press, Cambridge, MA, 2007.","DOI":"10.7551\/mitpress\/7503.003.0057"},{"key":"key-10.1145\/3041021.3054201-13","unstructured":"Y. Gao, O. Beijbom, N. Zhang, and T. Darrell. Compact bilinear pooling. arXiv preprint arXiv:1511.06062, 2015."},{"key":"key-10.1145\/3041021.3054201-14","doi-asserted-by":"crossref","unstructured":"R. Girshick, J. Donahue, T. Darrell, and J. Malik. Rich feature hierarchies for accurate object detection and semantic segmentation. arXiv preprint arXiv:1311.2524, 2013.","DOI":"10.1109\/CVPR.2014.81"},{"key":"key-10.1145\/3041021.3054201-15","unstructured":"I. J. Goodfellow, Y. Bulatov, J. Ibarz, S. Arnoud, and V. D. Shet. Multi-digit number recognition from street view imagery using deep convolutional neural networks. CoRR, abs\/1312.6082, 2013."},{"key":"key-10.1145\/3041021.3054201-16","unstructured":"K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learning for image recognition. arXiv preprint arXiv:1512.03385, 2015."},{"key":"key-10.1145\/3041021.3054201-17","unstructured":"M. Jaderberg, K. Simonyan, A. Vedaldi, and A. Zisserman. Reading text in the wild with convolutional neural networks. CoRR, abs\/1412.1842, 2014."},{"key":"key-10.1145\/3041021.3054201-18","unstructured":"Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick, S. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for fast feature embedding. arXiv preprint arXiv:1408.5093, 2014."},{"key":"key-10.1145\/3041021.3054201-19","unstructured":"Y. Jing and S. Baluja. Visualrank: Applying pagerank to large-scale image search. IEEE Transactions on Pattern Analysis and Machine Intelligence (T-PAMI), 30(11):1877--1890, 2008."},{"key":"key-10.1145\/3041021.3054201-20","doi-asserted-by":"crossref","unstructured":"Y. Jing, M. Covell, D. Tsai, and J. M. Rehg. Learning query-specific distance functions for large-scale web image search. IEEE Transactions on Multimedia, 15:2022--2034, 2013.","DOI":"10.1109\/TMM.2013.2279663"},{"key":"key-10.1145\/3041021.3054201-21","doi-asserted-by":"crossref","unstructured":"Y. Jing, D. Liu, D. Kislyuk, A. Zhai, J. Xu, and J. Donahue. Visual search at pinterest. In Proceedings of the International Conference on Knowledge Discovery and Data Mining (SIGKDD).","DOI":"10.1145\/2783258.2788621"},{"key":"key-10.1145\/3041021.3054201-22","doi-asserted-by":"crossref","unstructured":"Y. Jing, H. Rowley, J. Wang, D. Tsai, C. Rosenberg, and M. Covell. Google image swirl: a large-scale content-based image visualization system. In Proceedings of the 21st International Conference on World Wide Web, pages 539--540. ACM, 2012.","DOI":"10.1145\/2187980.2188116"},{"key":"key-10.1145\/3041021.3054201-23","doi-asserted-by":"crossref","unstructured":"M. H. Kiapour, X. Han, S. Lazebnik, A. C. Berg, and T. L. Berg. Where to buy it:matching street clothing photos in online shops. In International Conference on Computer Vision, 2015.","DOI":"10.1109\/ICCV.2015.382"},{"key":"key-10.1145\/3041021.3054201-24","unstructured":"A. Krizhevsky, S. Ilya, and G. E. Hinton. Imagenet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems (NIPS), pages 1097--1105. 2012."},{"key":"key-10.1145\/3041021.3054201-25","doi-asserted-by":"crossref","unstructured":"D. C. Liu, S. Rogers, R. Shiau, K. Ma, Z. Zhong, D. Kislyuk, J. Liu, and Y. Jing. Related pins at pinterest, the evolution of a real-world recommender system. In Proceedings of the International Conference on World Wide Web (WWW), 2017.","DOI":"10.1145\/3041021.3054202"},{"key":"key-10.1145\/3041021.3054201-26","doi-asserted-by":"crossref","unstructured":"S. Liu, Z. Song, M. Wang, C. Xu, H. Lu, and S. Yan. Street-to-shop: Cross-scenario clothing retrieval via parts alignment and auxiliary set. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2012.","DOI":"10.1145\/2393347.2396471"},{"key":"key-10.1145\/3041021.3054201-27","doi-asserted-by":"crossref","unstructured":"W. Liu, D. Anguelov, D. Erhan, C. Szegedy, S. Reed, C.-Y. Fu, and A. C. Berg. Ssd: Single shot multibox detector. 2016. To appear.","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"key-10.1145\/3041021.3054201-28","doi-asserted-by":"crossref","unstructured":"Z. Liu, P. Luo, S. Qiu, X. Wang, and X. Tang. Deepfashion: Powering robust clothes recognition and retrieval with rich annotations. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016.","DOI":"10.1109\/CVPR.2016.124"},{"key":"key-10.1145\/3041021.3054201-29","doi-asserted-by":"crossref","unstructured":"J. Long, E. Shelhamer, and T. Darrell. Fully convolutional networks for semantic segmentation. arXiv preprint arXiv:1411.4038, 2014.","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"key-10.1145\/3041021.3054201-30","doi-asserted-by":"crossref","unstructured":"H. M&#252;ller, W. M&#252;ller, D. M. Squire, S. Marchand-Maillet, and T. Pun. Performance evaluation in content-based image retrieval: Overview and proposals. Pattern Recognition Letter, 22(5):593--601, 2001.","DOI":"10.1016\/S0167-8655(00)00118-5"},{"key":"key-10.1145\/3041021.3054201-31","doi-asserted-by":"crossref","unstructured":"J. Redmon, S. Divvala, R. Girshick, and A. Farhadi. You only look once: Unified, real-time object detection. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2016.","DOI":"10.1109\/CVPR.2016.91"},{"key":"key-10.1145\/3041021.3054201-32","unstructured":"S. Ren, K. He, R. Girshick, and J. Sun. Faster R-CNN: Towards real-time object detection with region proposal networks. In Neural Information Processing Systems (NIPS), 2015."},{"key":"key-10.1145\/3041021.3054201-33","doi-asserted-by":"crossref","unstructured":"F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A unified embedding for face recognition and clustering. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2015.","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"key-10.1145\/3041021.3054201-34","doi-asserted-by":"crossref","unstructured":"A. Sharif Razavian, H. Azizpour, J. Sullivan, and S. Carlsson. Cnn features off-the-shelf: an astounding baseline for recognition. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pages 806--813, 2014.","DOI":"10.1109\/CVPRW.2014.131"},{"key":"key-10.1145\/3041021.3054201-35","doi-asserted-by":"crossref","unstructured":"A. Shrivastava, A. Gupta, and R. Girshick. Training region-based object detectors with online hard example mining. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2016.","DOI":"10.1109\/CVPR.2016.89"},{"key":"key-10.1145\/3041021.3054201-36","doi-asserted-by":"crossref","unstructured":"E. Simo-Serra and H. Ishikawa. Fashion style in 128 floats: Joint ranking and classification using weak data for feature extraction. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2016.","DOI":"10.1109\/CVPR.2016.39"},{"key":"key-10.1145\/3041021.3054201-37","unstructured":"K. Simonyan and A. Zisserman. Very deep convolutional networks for large-scale image recognition. CoRR, abs\/1409.1556, 2014."},{"key":"key-10.1145\/3041021.3054201-38","doi-asserted-by":"crossref","unstructured":"H. O. Song, Y. Xiang, S. Jegelka, and S. Savarese. Deep metric learning via lifted structured feature embedding. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016.","DOI":"10.1109\/CVPR.2016.434"},{"key":"key-10.1145\/3041021.3054201-39","unstructured":"C. Szegedy, S. Ioffe, and V. Vanhoucke. Inception-v4, inception-resnet and the impact of residual connections on learning. CoRR, abs\/1602.07261, 2016."},{"key":"key-10.1145\/3041021.3054201-40","doi-asserted-by":"crossref","unstructured":"C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich. Going deeper with convolutions. arXiv preprint arXiv:1409.4842, 2014.","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"key-10.1145\/3041021.3054201-41","doi-asserted-by":"crossref","unstructured":"Y. Taigman, M. Yang, M. Ranzato, and L. Wolf. Deepface: Closing the gap to human-level performance in face verification. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pages 1701--1708, 2014.","DOI":"10.1109\/CVPR.2014.220"},{"key":"key-10.1145\/3041021.3054201-42","doi-asserted-by":"crossref","unstructured":"J. Wang, Y. Song, T. Leung, C. Rosenberg, J. Wang, J. Philbin, B. Chen, and Y. Wu. Learning fine-grained image similarity with deep ranking. In Proceedings of the 2014 IEEE Conference on Computer Vision and Pattern Recognition, CVPR '14, pages 1386--1393, Washington, DC, USA, 2014. IEEE Computer Society.","DOI":"10.1109\/CVPR.2014.180"},{"key":"key-10.1145\/3041021.3054201-43","doi-asserted-by":"crossref","unstructured":"K. Yamaguchi, M. H. Kiapour, L. E. Ortiz, and T. L. Berg. Retrieving similar styles to parse clothing. IEEE Trans. Pattern Anal. Mach. Intell., 37(5):1028--1040, 2015.","DOI":"10.1109\/TPAMI.2014.2353624"}],"event":{"name":"the 26th International Conference","location":"Perth, Australia","acronym":"WWW '17 Companion","number":"26","sponsor":["SIGWEB, ACM Special Interest Group on Hypertext, Hypermedia, and Web","IW3C2, International World Wide Web Conference Committee"],"start":{"date-parts":[[2017,4,3]]},"end":{"date-parts":[[2017,4,7]]}},"container-title":["Proceedings of the 26th International Conference on World Wide Web Companion - WWW '17 Companion"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3041021.3054201","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/dl.acm.org\/ft_gateway.cfm?id=3054201&ftid=1865096&dwn=1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:03:19Z","timestamp":1750215799000},"score":1,"resource":{"primary":{"URL":"http:\/\/dl.acm.org\/citation.cfm?doid=3041021.3054201"}},"subtitle":[],"proceedings-subject":"World Wide Web Companion","short-title":[],"issued":{"date-parts":[[2017]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1145\/3041021.3054201","relation":{},"subject":[],"published":{"date-parts":[[2017]]}}}