{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,11]],"date-time":"2026-05-11T16:06:19Z","timestamp":1778515579359,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730606","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:40:47Z","timestamp":1753260047000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Instance Segmentation of Scene Sketches Using Natural Image Priors"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-3553-3732","authenticated-orcid":false,"given":"Mia","family":"Tang","sequence":"first","affiliation":[{"name":"Stanford University, Stanford, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4402-7267","authenticated-orcid":false,"given":"Yael","family":"Vinker","sequence":"additional","affiliation":[{"name":"Computer Science and Artificial Intelligence Laboratory (CSAIL), Massachusetts Institute of Technology (MIT), Boston, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2252-1185","authenticated-orcid":false,"given":"Chuan","family":"Yan","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3503-5791","authenticated-orcid":false,"given":"Lvmin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8996-7327","authenticated-orcid":false,"given":"Maneesh","family":"Agrawala","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, USA and Roblox Research, Stanford, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00925"},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00400"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"crossref","unstructured":"Bowen Cheng Ishan Misra Alexander\u00a0G. Schwing Alexander Kirillov and Rohit Girdhar. 2022. Masked-attention Mask Transformer for Universal Image Segmentation. CVPR.","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20074-8_15"},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"crossref","unstructured":"Mathias Eitz James Hays and Marc Alexa. 2012. How Do Humans Sketch Objects? ACM Trans. Graph. (Proc. SIGGRAPH) 31 4 (2012) 44:1\u201344:10.","DOI":"10.1145\/2185520.2335395"},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"crossref","unstructured":"Ce Ge Haifeng Sun Yi-Zhe Song Zhanyu Ma and Jianxin Liao. 2022. Exploring Local Detail Perception for Scene Sketch Semantic Segmentation. IEEE Transactions on Image Processing 31 (2022) 1447\u20131461. https:\/\/doi.org\/10.1109\/TIP.2022.3142511","DOI":"10.1109\/TIP.2022.3142511"},{"key":"e_1_3_3_2_9_1","unstructured":"Songwei Ge Vedanuj Goswami C.\u00a0Lawrence Zitnick and Devi Parikh. 2020. Creative Sketch Generation. arxiv:https:\/\/arXiv.org\/abs\/2011.10039\u00a0[cs.CV]"},{"key":"e_1_3_3_2_10_1","unstructured":"Kaiming He Georgia Gkioxari Piotr Doll\u00e1r and Ross Girshick. 2018. Mask R-CNN. arxiv:https:\/\/arXiv.org\/abs\/1703.06870\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1703.06870"},{"key":"e_1_3_3_2_11_1","doi-asserted-by":"crossref","unstructured":"Zhe Huang Hongbo Fu and Rynson W.\u00a0H. Lau. 2014. Data-driven segmentation and labeling of freehand sketches. ACM Trans. Graph. 33 6 Article 175 (Nov. 2014) 10\u00a0pages. https:\/\/doi.org\/10.1145\/2661229.2661280","DOI":"10.1145\/2661229.2661280"},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"crossref","unstructured":"Kurmanbek Kaiyrbekov and Metin Sezgin. 2020. Deep Stroke-Based Sketched Symbol Reconstruction and Segmentation. IEEE Computer Graphics and Applications 40 1 (2020) 112\u2013126. https:\/\/doi.org\/10.1109\/MCG.2019.2943333","DOI":"10.1109\/MCG.2019.2943333"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"crossref","unstructured":"Alexander Kirillov Eric Mintun Nikhila Ravi Hanzi Mao Chloe Rolland Laura Gustafson Tete Xiao Spencer Whitehead Alexander\u00a0C. Berg Wan-Yen Lo Piotr Doll\u00e1r and Ross Girshick. 2023. Segment Anything. arXiv:https:\/\/arXiv.org\/abs\/2304.02643 (2023).","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"crossref","unstructured":"Ranjay Krishna Yuke Zhu Oliver Groth Justin Johnson Kenji Hata Joshua Kravitz Stephanie Chen Yannis Kalantidis Li-Jia Li David\u00a0A. Shamma Michael\u00a0S. Bernstein and Li Fei-Fei. 2017. Visual Genome: Connecting Language and Vision Using Crowdsourced Dense Image Annotations. Int. J. Comput. Vision 123 1 (May 2017) 32\u201373. https:\/\/doi.org\/10.1007\/s11263-016-0981-7","DOI":"10.1007\/s11263-016-0981-7"},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"crossref","unstructured":"Changjian Li Hao Pan Adrien Bousseau and Niloy\u00a0J. Mitra. 2022. Free2CAD: Parsing Freehand Drawings into CAD Commands. ACM Trans. Graph. (Proceedings of SIGGRAPH 2022) 41 4 (2022) 93:1\u201393:16. https:\/\/doi.org\/10.1145\/3528223.3530133","DOI":"10.1145\/3528223.3530133"},{"key":"e_1_3_3_2_16_1","unstructured":"Ke Li Kaiyue Pang Jifei Song Yi-Zhe Song Tao Xiang Timothy\u00a0M. Hospedales and Honggang Zhang. 2018. Universal Perceptual Grouping. arxiv:https:\/\/arXiv.org\/abs\/1808.02312\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1808.02312"},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"crossref","unstructured":"Ke Li Kaiyue Pang Yi-Zhe Song Tao Xiang Timothy\u00a0M. Hospedales and Honggang Zhang. 2019. Toward Deep Universal Sketch Perceptual Grouper. IEEE Transactions on Image Processing 28 (2019) 3219\u20133231. https:\/\/api.semanticscholar.org\/CorpusID:73430225","DOI":"10.1109\/TIP.2019.2895155"},{"key":"e_1_3_3_2_18_1","volume-title":"NeurIPS","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong\u00a0Jae Lee. 2023a. Visual Instruction Tuning. In NeurIPS."},{"key":"e_1_3_3_2_19_1","unstructured":"Shilong Liu Zhaoyang Zeng Tianhe Ren Feng Li Hao Zhang Jie Yang Chunyuan Li Jianwei Yang Hang Su Jun Zhu et\u00a0al. 2023b. Grounding dino: Marrying dino with grounded pre-training for open-set object detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.05499 (2023)."},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"crossref","unstructured":"Matthias Minderer Alexey Gritsenko Austin Stone Maxim Neumann Dirk Weissenborn Alexey Dosovitskiy Aravindh Mahendran Anurag Arnab Mostafa Dehghani Zhuoran Shen Xiao Wang Xiaohua Zhai Thomas Kipf and Neil Houlsby. 2022. Simple Open-Vocabulary Object Detection with Vision Transformers. arxiv:https:\/\/arXiv.org\/abs\/2205.06230\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2205.06230","DOI":"10.1007\/978-3-031-20080-9_42"},{"key":"e_1_3_3_2_22_1","unstructured":"Anran Qi Yulia Gryaditskaya Tao Xiang and Yi-Zhe Song. 2021. One Sketch for All: One-Shot Personalized Sketch Segmentation. CoRR abs\/2112.10838 (2021). arXiv:https:\/\/arXiv.org\/abs\/2112.10838https:\/\/arxiv.org\/abs\/2112.10838"},{"key":"e_1_3_3_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298795"},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"crossref","unstructured":"Yonggang Qi and Zheng-Hua Tan. 2019. SketchSegNet+: An End-to-End Learning of RNN for Multi-Class Sketch Semantic Segmentation. IEEE Access 7 (2019) 102717\u2013102726. https:\/\/doi.org\/10.1109\/ACCESS.2019.2929804","DOI":"10.1109\/ACCESS.2019.2929804"},{"key":"e_1_3_3_2_25_1","unstructured":"Alec Radford Jong\u00a0Wook Kim Chris Hallacy Aditya Ramesh Gabriel Goh Sandhini Agarwal Girish Sastry Amanda Askell Pamela Mishkin Jack Clark Gretchen Krueger and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. arxiv:https:\/\/arXiv.org\/abs\/2103.00020\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2103.00020"},{"key":"e_1_3_3_2_26_1","unstructured":"Tianhe Ren Shilong Liu Ailing Zeng Jing Lin Kunchang Li He Cao Jiayu Chen Xinyu Huang Yukang Chen Feng Yan Zhaoyang Zeng Hao Zhang Feng Li Jie Yang Hongyang Li Qing Jiang and Lei Zhang. 2024. Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. arxiv:https:\/\/arXiv.org\/abs\/2401.14159\u00a0[cs.CV]"},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"crossref","unstructured":"Patsorn Sangkloy Nathan Burnell Cusuh Ham and James Hays. 2016. The sketchy database: learning to retrieve badly drawn bunnies. ACM Trans. Graph. 35 4 Article 119 (July 2016) 12\u00a0pages. https:\/\/doi.org\/10.1145\/2897824.2925954","DOI":"10.1145\/2897824.2925954"},{"key":"e_1_3_3_2_28_1","doi-asserted-by":"crossref","unstructured":"Ros\u00e1lia\u00a0G. Schneider and Tinne Tuytelaars. 2016. Example-Based Sketch Segmentation and Labeling Using CRFs. ACM Trans. Graph. 35 5 Article 151 (July 2016) 9\u00a0pages. https:\/\/doi.org\/10.1145\/2898351","DOI":"10.1145\/2898351"},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"crossref","unstructured":"L. Vincent and P. Soille. 1991. Watersheds in digital spaces: an efficient algorithm based on immersion simulations. IEEE Transactions on Pattern Analysis and Machine Intelligence 13 6 (1991) 583\u2013598. https:\/\/doi.org\/10.1109\/34.87344","DOI":"10.1109\/34.87344"},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"crossref","unstructured":"Yael Vinker Ehsan Pajouheshgar Jessica\u00a0Y. Bo Roman\u00a0Christian Bachmann Amit\u00a0Haim Bermano Daniel Cohen-Or Amir Zamir and Ariel Shamir. 2022. CLIPasso: Semantically-Aware Object Sketching. ACM Trans. Graph. 41 4 Article 86 (jul 2022) 11\u00a0pages. https:\/\/doi.org\/10.1145\/3528223.3530068","DOI":"10.1145\/3528223.3530068"},{"key":"e_1_3_3_2_31_1","unstructured":"Yael Vinker Tamar\u00a0Rott Shaham Kristine Zheng Alex Zhao Judith\u00a0E Fan and Antonio Torralba. 2024. SketchAgent: Language-Driven Sequential Sketch Generation. arxiv:https:\/\/arXiv.org\/abs\/2411.17673\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2411.17673"},{"key":"e_1_3_3_2_32_1","unstructured":"Patrick von Platen Suraj Patil Anton Lozhkov Pedro Cuenca Nathan Lambert Kashif Rasul Mishig Davaadorj Dhruv Nair Sayak Paul William Berman Yiyi Xu Steven Liu and Thomas Wolf. 2022. Diffusers: State-of-the-art diffusion models. https:\/\/github.com\/huggingface\/diffusers."},{"key":"e_1_3_3_2_33_1","doi-asserted-by":"crossref","unstructured":"Fei Wang Shujin Lin Hanhui Li Hefeng Wu Tie Cai Xiaonan Luo and Ruomei Wang. 2020. Multi-column point-CNN for sketch segmentation. Neurocomputing 392 (2020) 50\u201359. https:\/\/doi.org\/10.1016\/j.neucom.2019.12.117","DOI":"10.1016\/j.neucom.2019.12.117"},{"key":"e_1_3_3_2_34_1","unstructured":"Haofan Wang Matteo Spinelli Qixun Wang Xu Bai Zekui Qin and Anthony Chen. 2024. InstantStyle: Free Lunch towards Style-Preserving in Text-to-Image Generation. ArXiv abs\/2404.02733 (2024). https:\/\/api.semanticscholar.org\/CorpusID:268876474"},{"key":"e_1_3_3_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00353"},{"key":"e_1_3_3_2_36_1","doi-asserted-by":"crossref","unstructured":"Xinlong Wang Rufeng Zhang Chunhua Shen Tao Kong and Lei Li. 2021. SOLO: A Simple Framework for Instance Segmentation. IEEE T. Pattern Analysis and Machine Intelligence (TPAMI) (2021).","DOI":"10.1109\/TPAMI.2021.3111116"},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"crossref","unstructured":"Xingyuan Wu Yonggang Qi Jun Liu and Jie Yang. 2018. Sketchsegnet: A Rnn Model for Labeling Sketch Strokes. 2018 IEEE 28th International Workshop on Machine Learning for Signal Processing (MLSP) (2018) 1\u20136. https:\/\/api.semanticscholar.org\/CorpusID:53234925","DOI":"10.1109\/MLSP.2018.8516988"},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"crossref","unstructured":"Bin Xiao Haiping Wu Weijian Xu Xiyang Dai Houdong Hu Yumao Lu Michael Zeng Ce Liu and Lu Yuan. 2023. Florence-2: Advancing a unified representation for a variety of vision tasks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.06242 (2023).","DOI":"10.1109\/CVPR52733.2024.00461"},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"crossref","unstructured":"Jie Yang Aihua Ke Yaoxiang Yu and Bo Cai. 2023. Scene sketch semantic segmentation with hierarchical Transformer. Knowledge-Based Systems 280 (2023) 110962.","DOI":"10.1016\/j.knosys.2023.110962"},{"key":"e_1_3_3_2_40_1","unstructured":"Lihe Yang Bingyi Kang Zilong Huang Zhen Zhao Xiaogang Xu Jiashi Feng and Hengshuang Zhao. 2024. Depth Anything V2. arXiv:https:\/\/arXiv.org\/abs\/2406.09414 (2024)."},{"key":"e_1_3_3_2_41_1","doi-asserted-by":"crossref","unstructured":"Lumin Yang Jiajie Zhuang Hongbo Fu Xiangzhi Wei Kun Zhou and Youyi Zheng. 2021. SketchGNN: Semantic Sketch Segmentation with Graph Neural Networks. ACM Trans. Graph. 40 3 Article 28 (Aug. 2021) 13\u00a0pages. https:\/\/doi.org\/10.1145\/3450284","DOI":"10.1145\/3450284"},{"key":"e_1_3_3_2_42_1","unstructured":"Haotian* Zhang Pengchuan* Zhang Xiaowei Hu Yen-Chun Chen Liunian\u00a0Harold Li Xiyang Dai Lijuan Wang Lu Yuan Jenq-Neng Hwang and Jianfeng Gao. 2022b. GLIPv2: Unifying Localization and Vision-Language Understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2206.05836 (2022)."},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3229147.3229154"},{"key":"e_1_3_3_2_44_1","unstructured":"Youcai Zhang Xinyu Huang Jinyu Ma Zhaoyang Li Zhaochuan Luo Yanchun Xie Yuzhuo Qin Tong Luo Yaqian Li Shilong Liu et\u00a0al. 2023. Recognize Anything: A Strong Image Tagging Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.03514 (2023)."},{"key":"e_1_3_3_2_45_1","doi-asserted-by":"crossref","unstructured":"Zhengming Zhang Xiaoming Deng Jinyao Li Yukun Lai Cuixia Ma Yongjin Liu and Hongan Wang. 2022a. Stroke-based semantic segmentation for scene-level free-hand sketches. Vis. Comput. 39 12 (Dec. 2022) 6309\u20136321. https:\/\/doi.org\/10.1007\/s00371-022-02731-8","DOI":"10.1007\/s00371-022-02731-8"},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP56404.2022.10008880"},{"key":"e_1_3_3_2_47_1","doi-asserted-by":"crossref","unstructured":"Yixiao Zheng Jiyang Xie Aneeshan Sain Yi-Zhe Song and Zhanyu Ma. 2023. Sketch-Segformer: Transformer-Based Segmentation for Figurative and Creative Sketches. IEEE Transactions on Image Processing 32 (2023) 4595\u20134609. https:\/\/doi.org\/10.1109\/TIP.2023.3302521","DOI":"10.1109\/TIP.2023.3302521"},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-04167-0_34"},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_26"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730606","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T14:57:40Z","timestamp":1774018660000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730606"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":48,"alternative-id":["10.1145\/3721238.3730606","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730606","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}