{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T05:21:34Z","timestamp":1755926494083,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,10,12]],"date-time":"2020-10-12T00:00:00Z","timestamp":1602460800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,10,12]]},"DOI":"10.1145\/3394171.3413990","type":"proceedings-article","created":{"date-parts":[[2020,10,12]],"date-time":"2020-10-12T13:10:18Z","timestamp":1602508218000},"page":"4125-4134","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["Cap2Seg: Inferring Semantic and Spatial Context from Captions for Zero-Shot Image Segmentation"],"prefix":"10.1145","author":[{"given":"Guiyu","family":"Tian","sequence":"first","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuai","family":"Wang","sequence":"additional","affiliation":[{"name":"BOE Technology Group Co., Ltd., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jie","family":"Feng","sequence":"additional","affiliation":[{"name":"BOE Technology Group Co., Ltd., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Li","family":"Zhou","sequence":"additional","affiliation":[{"name":"BOE Technology Group Co., Ltd., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yadong","family":"Mu","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2020,10,12]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2487986"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"crossref","unstructured":"Zeynep Akata Scott E. Reed Daniel Walter Honglak Lee and Bernt Schiele. 2015. Evaluation of output embeddings for fine-grained image classification. In CVPR. 2927--2936.  Zeynep Akata Scott E. Reed Daniel Walter Honglak Lee and Bernt Schiele. 2015. Evaluation of output embeddings for fine-grained image classification. In CVPR. 2927--2936.","DOI":"10.1109\/CVPR.2015.7298911"},{"key":"e_1_3_2_2_3_1","first-page":"2481","article-title":"SegNet","volume":"39","author":"Badrinarayanan Vijay","year":"2017","journal-title":"A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation. TPAMI"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"crossref","unstructured":"Amy L. Bearman Olga Russakovsky Vittorio Ferrari and Fei-Fei Li. 2016. What's the Point: Semantic Segmentation with Point Supervision. In ECCV. 549--565.  Amy L. Bearman Olga Russakovsky Vittorio Ferrari and Fei-Fei Li. 2016. What's the Point: Semantic Segmentation with Point Supervision. In ECCV. 549--565.","DOI":"10.1007\/978-3-319-46478-7_34"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","unstructured":"Yoshua Bengio J\u00e9r\u00f4me Louradour Ronan Collobert and Jason Weston. 2009. Curriculum learning. In ICML. 41--48.  Yoshua Bengio J\u00e9r\u00f4me Louradour Ronan Collobert and Jason Weston. 2009. Curriculum learning. In ICML. 41--48.","DOI":"10.1145\/1553374.1553380"},{"key":"e_1_3_2_2_6_1","unstructured":"Maxime Bucher Tuan-Hung Vu Matthieu Cord and Patrick P\u00e9rez. 2019. Zero-Shot Semantic Segmentation. In NIPS .  Maxime Bucher Tuan-Hung Vu Matthieu Cord and Patrick P\u00e9rez. 2019. Zero-Shot Semantic Segmentation. In NIPS ."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"crossref","unstructured":"Holger Caesar Jasper R. R. Uijlings and Vittorio Ferrari. 2018. COCO-Stuff: Thing and Stuff Classes in Context. In CVPR. 1209--1218.  Holger Caesar Jasper R. R. Uijlings and Vittorio Ferrari. 2018. COCO-Stuff: Thing and Stuff Classes in Context. In CVPR. 1209--1218.","DOI":"10.1109\/CVPR.2018.00132"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Soravit Changpinyo Wei-Lun Chao Boqing Gong and Fei Sha. 2016. Synthesized Classifiers for Zero-Shot Learning. In CVPR. 5327--5336.  Soravit Changpinyo Wei-Lun Chao Boqing Gong and Fei Sha. 2016. Synthesized Classifiers for Zero-Shot Learning. In CVPR. 5327--5336.","DOI":"10.1109\/CVPR.2016.575"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"crossref","unstructured":"Wei-Lun Chao Soravit Changpinyo Boqing Gong and Fei Sha. 2016. An Empirical Study and Analysis of Generalized Zero-Shot Learning for Object Recognition in the Wild. In ECCV. 52--68.  Wei-Lun Chao Soravit Changpinyo Boqing Gong and Fei Sha. 2016. An Empirical Study and Analysis of Generalized Zero-Shot Learning for Object Recognition in the Wild. In ECCV. 52--68.","DOI":"10.1007\/978-3-319-46475-6_4"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2699184"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2699184"},{"key":"e_1_3_2_2_12_1","unstructured":"Liang-Chieh Chen George Papandreou Florian Schroff and Hartwig Adam. 2017. Rethinking Atrous Convolution for Semantic Image Segmentation. CoRR Vol. abs\/1706.05587 (2017).  Liang-Chieh Chen George Papandreou Florian Schroff and Hartwig Adam. 2017. Rethinking Atrous Convolution for Semantic Image Segmentation. CoRR Vol. abs\/1706.05587 (2017)."},{"volume-title":"PROBABILISTIC TEXTUAL ENTAILMENT: GENERIC APPLIED MODELING OF LANGUAGE VARIABILITY.","year":"2004","author":"Dagan Ido","key":"e_1_3_2_2_13_1"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"crossref","unstructured":"Jifeng Dai Kaiming He and Jian Sun. 2015. BoxSup: Exploiting Bounding Boxes to Supervise Convolutional Networks for Semantic Segmentation. In ICCV. 1635--1643.  Jifeng Dai Kaiming He and Jian Sun. 2015. BoxSup: Exploiting Bounding Boxes to Supervise Convolutional Networks for Semantic Segmentation. In ICCV. 1635--1643.","DOI":"10.1109\/ICCV.2015.191"},{"key":"e_1_3_2_2_15_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019","volume":"1","author":"Devlin Jacob","year":"2019"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-014-0733-5"},{"key":"e_1_3_2_2_17_1","unstructured":"Andrea Frome Gregory S. Corrado Jonathon Shlens Samy Bengio Jeffrey Dean Marc'Aurelio Ranzato and Tomas Mikolov. 2013. DeViSE: A Deep Visual-Semantic Embedding Model. In NIPS. 2121--2129.  Andrea Frome Gregory S. Corrado Jonathon Shlens Samy Bengio Jeffrey Dean Marc'Aurelio Ranzato and Tomas Mikolov. 2013. DeViSE: A Deep Visual-Semantic Embedding Model. In NIPS. 2121--2129."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2763441"},{"key":"e_1_3_2_2_19_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In CVPR. 770--778.  Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In CVPR. 770--778."},{"key":"e_1_3_2_2_20_1","unstructured":"Armand Joulin Edouard Grave Piotr Bojanowski Matthijs Douze Herv\u00e9 J\u00e9 gou and Tomas Mikolov. 2016. FastText.zip: Compressing text classification models. CoRR Vol. abs\/1612.03651 (2016).  Armand Joulin Edouard Grave Piotr Bojanowski Matthijs Douze Herv\u00e9 J\u00e9 gou and Tomas Mikolov. 2016. FastText.zip: Compressing text classification models. CoRR Vol. abs\/1612.03651 (2016)."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00172"},{"key":"e_1_3_2_2_22_1","unstructured":"Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In ICLR .  Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In ICLR ."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"crossref","unstructured":"Elyor Kodirov Tao Xiang and Shaogang Gong. 2017. Semantic Autoencoder for Zero-Shot Learning. In CVPR. 4447--4456.  Elyor Kodirov Tao Xiang and Shaogang Gong. 2017. Semantic Autoencoder for Zero-Shot Learning. In CVPR. 4447--4456.","DOI":"10.1109\/CVPR.2017.473"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"crossref","unstructured":"Alexander Kolesnikov and Christoph H. Lampert. 2016. Seed Expand and Constrain: Three Principles for Weakly-Supervised Image Segmentation. In ECCV. 695--711.  Alexander Kolesnikov and Christoph H. Lampert. 2016. Seed Expand and Constrain: Three Principles for Weakly-Supervised Image Segmentation. In ECCV. 695--711.","DOI":"10.1007\/978-3-319-46493-0_42"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"crossref","unstructured":"Suha Kwak Seunghoon Hong and Bohyung Han. 2017. Weakly Supervised Semantic Segmentation Using Superpixel Pooling Network. In AAAI. 4111--4117.  Suha Kwak Seunghoon Hong and Bohyung Han. 2017. Weakly Supervised Semantic Segmentation Using Superpixel Pooling Network. In AAAI. 4111--4117.","DOI":"10.1609\/aaai.v31i1.11213"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.140"},{"key":"e_1_3_2_2_27_1","unstructured":"Yanan Li Donghui Wang Huanhang Hu Yuetan Lin and Yueting Zhuang. 2017. Zero-Shot Recognition Using Dual Visual-Semantic Mapping Paths. In CVPR. 5207--5215.  Yanan Li Donghui Wang Huanhang Hu Yuetan Lin and Yueting Zhuang. 2017. Zero-Shot Recognition Using Dual Visual-Semantic Mapping Paths. In CVPR. 5207--5215."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"crossref","unstructured":"Di Lin Jifeng Dai Jiaya Jia Kaiming He and Jian Sun. 2016. ScribbleSup: Scribble-Supervised Convolutional Networks for Semantic Segmentation. In CVPR. 3159--3167.  Di Lin Jifeng Dai Jiaya Jia Kaiming He and Jian Sun. 2016. ScribbleSup: Scribble-Supervised Convolutional Networks for Semantic Segmentation. In CVPR. 3159--3167.","DOI":"10.1109\/CVPR.2016.344"},{"key":"e_1_3_2_2_29_1","unstructured":"Tsung-Yi Lin Piotr Doll\u00e1r Ross B. Girshick Kaiming He Bharath Hariharan and Serge J. Belongie. 2017. Feature Pyramid Networks for Object Detection. In CVPR. 936--944.  Tsung-Yi Lin Piotr Doll\u00e1r Ross B. Girshick Kaiming He Bharath Hariharan and Serge J. Belongie. 2017. Feature Pyramid Networks for Object Detection. In CVPR. 936--944."},{"key":"e_1_3_2_2_30_1","unstructured":"Shichen Liu Mingsheng Long Jianmin Wang and Michael I. Jordan. 2018. Generalized Zero-Shot Learning with Deep Calibration Network. In NIPS. 2009--2019.  Shichen Liu Mingsheng Long Jianmin Wang and Michael I. Jordan. 2018. Generalized Zero-Shot Learning with Deep Calibration Network. In NIPS. 2009--2019."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"crossref","unstructured":"Jonathan Long Evan Shelhamer and Trevor Darrell. 2015. Fully convolutional networks for semantic segmentation. In CVPR. 3431--3440.  Jonathan Long Evan Shelhamer and Trevor Darrell. 2015. Fully convolutional networks for semantic segmentation. In CVPR. 3431--3440.","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"e_1_3_2_2_32_1","unstructured":"Tomas Mikolov Ilya Sutskever Kai Chen Gregory S. Corrado and Jeffrey Dean. 2013. Distributed Representations of Words and Phrases and their Compositionality. In NIPS. 3111--3119.  Tomas Mikolov Ilya Sutskever Kai Chen Gregory S. Corrado and Jeffrey Dean. 2013. Distributed Representations of Words and Phrases and their Compositionality. In NIPS. 3111--3119."},{"key":"e_1_3_2_2_33_1","unstructured":"Mohammad Norouzi Tomas Mikolov Samy Bengio Yoram Singer Jonathon Shlens Andrea Frome Greg Corrado and Jeffrey Dean. 2014. Zero-Shot Learning by Convex Combination of Semantic Embeddings. In ICLR .  Mohammad Norouzi Tomas Mikolov Samy Bengio Yoram Singer Jonathon Shlens Andrea Frome Greg Corrado and Jeffrey Dean. 2014. Zero-Shot Learning by Convex Combination of Semantic Embeddings. In ICLR ."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"crossref","unstructured":"George Papandreou Liang-Chieh Chen Kevin Murphy and Alan L. Yuille. 2015. Weakly- and Semi-Supervised Learning of a DCNN for Semantic Image Segmentation. CoRR Vol. abs\/1502.02734 (2015).  George Papandreou Liang-Chieh Chen Kevin Murphy and Alan L. Yuille. 2015. Weakly- and Semi-Supervised Learning of a DCNN for Semantic Image Segmentation. CoRR Vol. abs\/1502.02734 (2015).","DOI":"10.1109\/ICCV.2015.203"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"crossref","unstructured":"Pedro H. O. Pinheiro and Ronan Collobert. 2015. From image-level to pixel-level labeling with Convolutional Networks. In CVPR. 1713--1721.  Pedro H. O. Pinheiro and Ronan Collobert. 2015. From image-level to pixel-level labeling with Convolutional Networks. In CVPR. 1713--1721.","DOI":"10.1109\/CVPR.2015.7298780"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.5555\/1866696.1866717"},{"key":"e_1_3_2_2_37_1","unstructured":"Bernardino Romera-Paredes and Philip H. S. Torr. 2015. An embarrassingly simple approach to zero-shot learning. In ICML. 2152--2161.  Bernardino Romera-Paredes and Philip H. S. Torr. 2015. An embarrassingly simple approach to zero-shot learning. In ICML. 2152--2161."},{"key":"e_1_3_2_2_38_1","unstructured":"Anirban Roy and Sinisa Todorovic. 2017. Combining Bottom-Up Top-Down and Smoothness Cues for Weakly Supervised Image Segmentation. In CVPR. 7282--7291.  Anirban Roy and Sinisa Todorovic. 2017. Combining Bottom-Up Top-Down and Smoothness Cues for Weakly Supervised Image Segmentation. In CVPR. 7282--7291."},{"key":"e_1_3_2_2_39_1","unstructured":"Olga Russakovsky Jia Deng Hao Su Jonathan Krause Sanjeev Satheesh Sean Ma Zhiheng Huang Andrej Karpathy Aditya Khosla Michael S. Bernstein Alexander C. Berg and Fei-Fei Li. [n.d.]. ImageNet Large Scale Visual Recognition Challenge. International Journal of Computer Vision ( [n. d.]).  Olga Russakovsky Jia Deng Hao Su Jonathan Krause Sanjeev Satheesh Sean Ma Zhiheng Huang Andrej Karpathy Aditya Khosla Michael S. Bernstein Alexander C. Berg and Fei-Fei Li. [n.d.]. ImageNet Large Scale Visual Recognition Challenge. International Journal of Computer Vision ( [n. d.])."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"crossref","unstructured":"Johann Sawatzky Debayan Banerjee and Juergen Gall. 2019. Harvesting Information from Captions for Weakly Supervised Semantic Segmentation. CoRR Vol. abs\/1905.06784 (2019).  Johann Sawatzky Debayan Banerjee and Juergen Gall. 2019. Harvesting Information from Captions for Weakly Supervised Semantic Segmentation. CoRR Vol. abs\/1905.06784 (2019).","DOI":"10.1109\/ICCVW.2019.00549"},{"key":"e_1_3_2_2_41_1","unstructured":"Richard Socher Milind Ganjoo Christopher D. Manning and Andrew Y. Ng. 2013. Zero-Shot Learning Through Cross-Modal Transfer. In NIPS. 935--943.  Richard Socher Milind Ganjoo Christopher D. Manning and Andrew Y. Ng. 2013. Zero-Shot Learning Through Cross-Modal Transfer. In NIPS. 935--943."},{"key":"e_1_3_2_2_42_1","unstructured":"Vinay Kumar Verma Gundeep Arora Ashish Mishra and Piyush Rai. 2018. Generalized Zero-Shot Learning via Synthesized Examples. In CVPR. 4281--4289.  Vinay Kumar Verma Gundeep Arora Ashish Mishra and Piyush Rai. 2018. Generalized Zero-Shot Learning via Synthesized Examples. In CVPR. 4281--4289."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-71246-8_48"},{"key":"e_1_3_2_2_44_1","unstructured":"Hanna M. Wallach Hugo Larochelle Alina Beygelzimer Florence d'Alch\u00e9 -Buc Emily B. Fox and Roman Garnett (Eds.). 2019. NeurIPS .  Hanna M. Wallach Hugo Larochelle Alina Beygelzimer Florence d'Alch\u00e9 -Buc Emily B. Fox and Roman Garnett (Eds.). 2019. NeurIPS ."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3293318"},{"key":"e_1_3_2_2_46_1","unstructured":"Yunchao Wei Huaxin Xiao Honghui Shi Zequn Jie Jiashi Feng and Thomas S. Huang. 2018. Revisiting Dilated Convolution: A Simple Approach for Weakly- and Semi-Supervised Semantic Segmentation. In CVPR. 7268--7277.  Yunchao Wei Huaxin Xiao Honghui Shi Zequn Jie Jiashi Feng and Thomas S. Huang. 2018. Revisiting Dilated Convolution: A Simple Approach for Weakly- and Semi-Supervised Semantic Segmentation. In CVPR. 7268--7277."},{"key":"e_1_3_2_2_47_1","unstructured":"Thomas Wolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R'emi Louf Morgan Funtowicz and Jamie Brew. 2019. HuggingFace's Transformers: State-of-the-art Natural Language Processing. ArXiv Vol. abs\/1910.03771 (2019).  Thomas Wolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R'emi Louf Morgan Funtowicz and Jamie Brew. 2019. HuggingFace's Transformers: State-of-the-art Natural Language Processing. ArXiv Vol. abs\/1910.03771 (2019)."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"crossref","unstructured":"Yongqin Xian Zeynep Akata Gaurav Sharma Quynh N. Nguyen Matthias Hein and Bernt Schiele. 2016. Latent Embeddings for Zero-Shot Classification. In CVPR. 69--77.  Yongqin Xian Zeynep Akata Gaurav Sharma Quynh N. Nguyen Matthias Hein and Bernt Schiele. 2016. Latent Embeddings for Zero-Shot Classification. In CVPR. 69--77.","DOI":"10.1109\/CVPR.2016.15"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"crossref","unstructured":"Yongqin Xian Subhabrata Choudhury Yang He Bernt Schiele and Zeynep Akata. 2019 a. Semantic Projection Network for Zero- and Few-Label Semantic Segmentation. In CVPR. 8256--8265.  Yongqin Xian Subhabrata Choudhury Yang He Bernt Schiele and Zeynep Akata. 2019 a. Semantic Projection Network for Zero- and Few-Label Semantic Segmentation. In CVPR. 8256--8265.","DOI":"10.1109\/CVPR.2019.00845"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2857768"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"crossref","unstructured":"Jia Xu Alexander G. Schwing and Raquel Urtasun. 2015. Learning to segment under various forms of weak supervision. In CVPR. 3781--3790.  Jia Xu Alexander G. Schwing and Raquel Urtasun. 2015. Learning to segment under various forms of weak supervision. In CVPR. 3781--3790.","DOI":"10.1109\/CVPR.2015.7299002"},{"key":"e_1_3_2_2_52_1","unstructured":"Keren Ye Mingda Zhang Adriana Kovashka Wei Li Danfeng Qin and Jesse Berent. 2019. Cap2Det: Learning to Amplify Weak Caption Supervision for Object Detection. In ICCV .  Keren Ye Mingda Zhang Adriana Kovashka Wei Li Danfeng Qin and Jesse Berent. 2019. Cap2Det: Learning to Amplify Weak Caption Supervision for Object Detection. In ICCV ."},{"key":"e_1_3_2_2_53_1","unstructured":"Meng Ye and Yuhong Guo. 2017. Zero-Shot Classification with Discriminative Semantic Representation Learning. In CVPR. 5103--5111.  Meng Ye and Yuhong Guo. 2017. Zero-Shot Classification with Discriminative Semantic Representation Learning. In CVPR. 5103--5111."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"crossref","unstructured":"Li Zhang Tao Xiang and Shaogang Gong. 2017. Learning a Deep Embedding Model for Zero-Shot Learning. In CVPR. 3010--3019.  Li Zhang Tao Xiang and Shaogang Gong. 2017. Learning a Deep Embedding Model for Zero-Shot Learning. In CVPR. 3010--3019.","DOI":"10.1109\/CVPR.2017.321"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"crossref","unstructured":"Ziming Zhang and Venkatesh Saligrama. 2015. Zero-Shot Learning via Semantic Similarity Embedding. In ICCV. 4166--4174.  Ziming Zhang and Venkatesh Saligrama. 2015. Zero-Shot Learning via Semantic Similarity Embedding. In ICCV. 4166--4174.","DOI":"10.1109\/ICCV.2015.474"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"crossref","unstructured":"Bolei Zhou Aditya Khosla \u00c0gata Lapedriza Aude Oliva and Antonio Torralba. 2016. Learning Deep Features for Discriminative Localization. In CVPR. 2921--2929.  Bolei Zhou Aditya Khosla \u00c0gata Lapedriza Aude Oliva and Antonio Torralba. 2016. Learning Deep Features for Discriminative Localization. In CVPR. 2921--2929.","DOI":"10.1109\/CVPR.2016.319"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"crossref","unstructured":"Yanzhao Zhou Yi Zhu Qixiang Ye Qiang Qiu and Jianbin Jiao. 2018. Weakly Supervised Instance Segmentation Using Class Peak Response. In CVPR. 3791--3800.  Yanzhao Zhou Yi Zhu Qixiang Ye Qiang Qiu and Jianbin Jiao. 2018. Weakly Supervised Instance Segmentation Using Class Peak Response. In CVPR. 3791--3800.","DOI":"10.1109\/CVPR.2018.00399"}],"event":{"name":"MM '20: The 28th ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Seattle WA USA","acronym":"MM '20"},"container-title":["Proceedings of the 28th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394171.3413990","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3394171.3413990","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:32:07Z","timestamp":1750195927000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394171.3413990"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,12]]},"references-count":57,"alternative-id":["10.1145\/3394171.3413990","10.1145\/3394171"],"URL":"https:\/\/doi.org\/10.1145\/3394171.3413990","relation":{},"subject":[],"published":{"date-parts":[[2020,10,12]]},"assertion":[{"value":"2020-10-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}