{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T09:54:00Z","timestamp":1762509240154,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":87,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819609161"},{"type":"electronic","value":"9789819609178"}],"license":[{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0917-8_10","type":"book-chapter","created":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T07:58:37Z","timestamp":1733558317000},"page":"167-186","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Exploring Limits of\u00a0Diffusion-Synthetic Training with\u00a0Weakly Supervised Semantic Segmentation"],"prefix":"10.1007","author":[{"given":"Ryota","family":"Yoshihashi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuya","family":"Otsuka","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kenji","family":"Doi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tomohiro","family":"Tanaka","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hirokatsu","family":"Kataoka","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,8]]},"reference":[{"key":"10_CR1","doi-asserted-by":"crossref","unstructured":"Ahn, J., Cho, S., Kwak, S.: Weakly supervised learning of instance segmentation with inter-pixel relations. In: CVPR. pp. 2209\u20132218 (2019)","DOI":"10.1109\/CVPR.2019.00231"},{"key":"10_CR2","unstructured":"Baranchuk, D., Voynov, A., Rubachev, I., Khrulkov, V., Babenko, A.: Label-efficient semantic segmentation with diffusion models. In: ICLR (2022)"},{"key":"10_CR3","unstructured":"Bi\u0144kowski, M., Sutherland, D.J., Arbel, M., Gretton, A.: Demystifying MMD GANs. ICLR (2018)"},{"key":"10_CR4","doi-asserted-by":"crossref","unstructured":"Chen, C., Shu, K.: PromptDA: Label-guided data augmentation for prompt-based few-shot learners. arXiv preprint arXiv:2205.09229 (2022)","DOI":"10.18653\/v1\/2023.eacl-main.41"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Chen, L.C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"10_CR6","unstructured":"Chen, X., Fang, H., Lin, T.Y., Vedantam, R., Gupta, S., Doll\u00e1r, P., Zitnick, C.L.: Microsoft COCO captions: Data collection and evaluation server. arXiv preprint arXiv:1504.00325 (2015)"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Chen, Z., Wang, T., Wu, X., Hua, X.S., Zhang, H., Sun, Q.: Class re-activation maps for weakly-supervised semantic segmentation. In: CVPR. pp. 969\u2013978 (2022)","DOI":"10.1109\/CVPR52688.2022.00104"},{"key":"10_CR8","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: CVPR. pp. 1290\u20131299 (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"10_CR9","unstructured":"Cho, J.H., Mall, U., Bala, K., Hariharan, B.: PiCIE: Unsupervised semantic segmentation using invariance and equivariance in clustering. In: CVPR. pp. 16794\u201316804 (2021)"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., Schiele, B.: The cityscapes dataset for semantic urban scene understanding. In: CVPR. pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"10_CR11","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: A large-scale hierarchical image database. In: CVPR. pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"10_CR12","doi-asserted-by":"crossref","unstructured":"Dwibedi, D., Misra, I., Hebert, M.: Cut, paste and learn: Surprisingly easy synthesis for instance detection. In: ICCV. pp. 1301\u20131310 (2017)","DOI":"10.1109\/ICCV.2017.146"},{"key":"10_CR13","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, S.A., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes challenge: A retrospective. IJCV 111, 98\u2013136 (2015)","journal-title":"IJCV"},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Fan, L., Chen, K., Krishnan, D., Katabi, D., Isola, P., Tian, Y.: Scaling laws of synthetic images for model training... for now. In: CVPR. pp. 7382\u20137392 (2024)","DOI":"10.1109\/CVPR52733.2024.00705"},{"key":"10_CR15","unstructured":"Gao, S., Li, Z.Y., Yang, M.H., Cheng, M.M., Han, J., Torr, P.: Large-scale unsupervised semantic segmentation. IEEE TPAMI (2022)"},{"key":"10_CR16","unstructured":"Ge, Y., Xu, J., Zhao, B.N., Joshi, N., Itti, L., Vineet, V.: Beyond generation: Harnessing text to image models for object detection and segmentation. arXiv preprint arXiv:2309.05956 (2023)"},{"key":"10_CR17","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial nets. In: NeurIPS. pp. 2672\u20132680 (2014)"},{"key":"10_CR18","unstructured":"Hamilton, M., Zhang, Z., Hariharan, B., Snavely, N., Freeman, W.T.: Unsupervised semantic segmentation by distilling feature correspondences. ICLR (2022)"},{"key":"10_CR19","unstructured":"Hammoud, H.A.A.K., Itani, H., Pizzati, F., Torr, P., Bibi, A., Ghanem, B.: SynthCLIP: Are we ready for a fully synthetic clip training? CVPRW (2024)"},{"key":"10_CR20","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR. pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"10_CR21","unstructured":"Healthcare Intelligence Laboratory: SimpleCRF. https:\/\/github.com\/HiLab-git\/SimpleCRF (2017)"},{"key":"10_CR22","unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-or, D.: Prompt-to-prompt image editing with cross-attention control. In: ICLR (2022)"},{"key":"10_CR23","doi-asserted-by":"crossref","unstructured":"Hessel, J., Holtzman, A., Forbes, M., Bras, R.L., Choi, Y.: CLIPScore: A reference-free evaluation metric for image captioning. In: Conference on Empirical Methods in Natural Language Processing (EMNLP) (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"10_CR24","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local nash equilibrium. NeurIPS 30 (2017)"},{"key":"10_CR25","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. NeurIPS 33, 6840\u20136851 (2020)","journal-title":"Denoising diffusion probabilistic models. NeurIPS"},{"key":"10_CR26","unstructured":"Hoffman, J., Tzeng, E., Park, T., Zhu, J.Y., Isola, P., Saenko, K., Efros, A., Darrell, T.: CyCADA: Cycle-consistent adversarial domain adaptation. In: ICML. pp. 1989\u20131998. Pmlr (2018)"},{"key":"10_CR27","unstructured":"Hu, E.J., Shen, Y., Wallis, P., Allen-Zhu, Z., Li, Y., Wang, S., Wang, L., Chen, W.: LoRA: Low-rank adaptation of large language models. ICLR (2022)"},{"key":"10_CR28","unstructured":"Hugging Face: DreamBooth fine-tuning with LoRA (2023), https:\/\/huggingface.co\/docs\/peft\/task_guides\/dreambooth_lora"},{"key":"10_CR29","unstructured":"Hugging Face: Stable-Diffusion-v1-4 (2023), https:\/\/huggingface.co\/CompVis\/stable-diffusion-v1-4"},{"key":"10_CR30","unstructured":"Isola, P.: Generative Models as Data++ (2023), hhttps:\/\/iplab.dmi.unict.it\/icvss2023\/Abstracts\/Isola"},{"key":"10_CR31","doi-asserted-by":"crossref","unstructured":"Jo, S., Yu, I.J.: Puzzle-CAM: Improved localization via matching partial and full features. In: ICIP. pp. 639\u2013643. IEEE (2021)","DOI":"10.1109\/ICIP42928.2021.9506058"},{"key":"10_CR32","doi-asserted-by":"crossref","unstructured":"Jo, S., Yu, I.J., Kim, K.: MARS: Model-agnostic biased object removal without additional supervision for weakly-supervised semantic segmentation. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00063"},{"key":"10_CR33","doi-asserted-by":"crossref","unstructured":"Karazija, L., Laina, I., Vedaldi, A., Rupprecht, C.: Diffusion models for zero-shot open-vocabulary segmentation. arXiv preprint arXiv:2306.09316 (2023)","DOI":"10.1007\/978-3-031-72652-1_18"},{"key":"10_CR34","doi-asserted-by":"crossref","unstructured":"Kataoka, H., Hayamizu, R., Yamada, R., Nakashima, K., Takashima, S., Zhang, X., Martinez-Noriega, E.J., Inoue, N., Yokota, R.: Replacing labeled real-image datasets with auto-generated contours. In: CVPR. pp. 21232\u201321241 (2022)","DOI":"10.1109\/CVPR52688.2022.02055"},{"key":"10_CR35","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Mintun, E., Ravi, N., Mao, H., Rolland, C., Gustafson, L., Xiao, T., Whitehead, S., Berg, A.C., Lo, W.Y., et\u00a0al.: Segment anything. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"10_CR36","unstructured":"Kr\u00e4henb\u00fchl, P., Koltun, V.: Efficient inference in fully connected CRFs with gaussian edge potentials. NeurIPS 24 (2011)"},{"key":"10_CR37","unstructured":"LAION.ai: Safety Review for LAION, 19 Dec, 2023 (2023), https:\/\/laion.ai\/notes\/laion-maintanence\/"},{"key":"10_CR38","doi-asserted-by":"crossref","unstructured":"Li, D., Ling, H., Kim, S.W., Kreis, K., Fidler, S., Torralba, A.: BigDatasetGAN: Synthesizing imagenet with pixel-wise annotations. In: CVPR. pp. 21330\u201321340 (2022)","DOI":"10.1109\/CVPR52688.2022.02064"},{"key":"10_CR39","doi-asserted-by":"crossref","unstructured":"Li, Y., Duan, Y., Kuang, Z., Chen, Y., Zhang, W., Li, X.: Uncertainty estimation via response scaling for pseudo-mask noise mitigation in weakly-supervised semantic segmentation. In: AAAI. vol.\u00a036, pp. 1447\u20131455 (2022)","DOI":"10.1609\/aaai.v36i2.20034"},{"key":"10_CR40","doi-asserted-by":"crossref","unstructured":"Li, Y., Kuang, Z., Liu, L., Chen, Y., Zhang, W.: Pseudo-mask matters in weakly-supervised semantic segmentation. In: ICCV. pp. 6964\u20136973 (2021)","DOI":"10.1109\/ICCV48922.2021.00688"},{"key":"10_CR41","doi-asserted-by":"crossref","unstructured":"Li, Z., Zhou, Q., Zhang, X., Zhang, Y., Wang, Y., Xie, W.: Open-vocabulary object segmentation with diffusion models. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00705"},{"key":"10_CR42","doi-asserted-by":"crossref","unstructured":"Lin, Y., Chen, M., Wang, W., Wu, B., Li, K., Lin, B., Liu, H., He, X.: CLIP is also an efficient segmenter: A text-driven approach for weakly supervised semantic segmentation. In: CVPR. pp. 15305\u201315314 (2023)","DOI":"10.1109\/CVPR52729.2023.01469"},{"key":"10_CR43","doi-asserted-by":"crossref","unstructured":"Liu, S., Liu, K., Zhu, W., Shen, Y., Fernandez-Granda, C.: Adaptive early-learning correction for segmentation from noisy annotations. In: CVPR. pp. 2606\u20132616 (2022)","DOI":"10.1109\/CVPR52688.2022.00263"},{"key":"10_CR44","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: Hierarchical vision transformer using shifted windows. In: ICCV. pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"10_CR45","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: CVPR. pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"10_CR46","unstructured":"Ma, C., Yang, Y., Ju, C., Zhang, F., Liu, J., Wang, Y., Zhang, Y., Wang, Y.: DiffusionSeg: Adapting diffusion towards unsupervised object discovery. arXiv preprint arXiv:2303.09813 (2023)"},{"key":"10_CR47","unstructured":"MMSegmentation Contributors: MMSegmentation: Openmmlab semantic segmentation toolbox and benchmark. https:\/\/github.com\/open-mmlab\/mmsegmentation (2020)"},{"key":"10_CR48","unstructured":"Nguyen, Q., Vu, T., Tran, A., Nguyen, K.: Dataset diffusion: Diffusion-based synthetic dataset generation for pixel-level semantic segmentation. NeurIPS (2023)"},{"key":"10_CR49","unstructured":"Nichol, A.Q., Dhariwal, P., Ramesh, A., Shyam, P., Mishkin, P., Mcgrew, B., Sutskever, I., Chen, M.: GLIDE: Towards photorealistic image generation and editing with text-guided diffusion models. In: ICML. pp. 16784\u201316804. PMLR (2022)"},{"key":"10_CR50","unstructured":"OpenAI: ChatGPT, Oct 16 version (2023), https:\/\/chat.openai.com\/chat"},{"key":"10_CR51","doi-asserted-by":"crossref","unstructured":"Oquab, M., Bottou, L., Laptev, I., Sivic, J.: Is object localization for free?-weakly-supervised learning with convolutional neural networks. In: CVPR. pp. 685\u2013694 (2015)","DOI":"10.1109\/CVPR.2015.7298668"},{"key":"10_CR52","doi-asserted-by":"crossref","unstructured":"Qi, L., Yang, L., Guo, W., Xu, Y., Du, B., Jampani, V., Yang, M.H.: UniGS: Unified representation for image generation and segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 6305\u20136315 (2024)","DOI":"10.1109\/CVPR52733.2024.00603"},{"key":"10_CR53","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et\u00a0al.: Learning transferable visual models from natural language supervision. In: ICML. pp. 8748\u20138763. PMLR (2021)"},{"key":"10_CR54","unstructured":"Ramesh, A., Pavlov, M., Goh, G., Gray, S., Voss, C., Radford, A., Chen, M., Sutskever, I.: Zero-shot text-to-image generation. In: ICML. pp. 8821\u20138831. PMLR (2021)"},{"key":"10_CR55","doi-asserted-by":"crossref","unstructured":"Rao, Y., Zhao, W., Chen, G., Tang, Y., Zhu, Z., Huang, G., Zhou, J., Lu, J.: DenseCLIP: Language-guided dense prediction with context-aware prompting. In: CVPR. pp. 18082\u201318091 (2022)","DOI":"10.1109\/CVPR52688.2022.01755"},{"key":"10_CR56","doi-asserted-by":"crossref","unstructured":"Richter, S.R., Vineet, V., Roth, S., Koltun, V.: Playing for data: Ground truth from computer games. In: ECCV. pp. 102\u2013118. Springer (2016)","DOI":"10.1007\/978-3-319-46475-6_7"},{"key":"10_CR57","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: CVPR. pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"10_CR58","doi-asserted-by":"crossref","unstructured":"Rong, S., Tu, B., Wang, Z., Li, J.: Boundary-enhanced co-training for weakly supervised semantic segmentation. In: CVPR. pp. 19574\u201319584 (2023)","DOI":"10.1109\/CVPR52729.2023.01875"},{"key":"10_CR59","doi-asserted-by":"crossref","unstructured":"Ros, G., Sellart, L., Materzynska, J., Vazquez, D., Lopez, A.M.: The SYNTHIA dataset: A large collection of synthetic images for semantic segmentation of urban scenes. In: CVPR. pp. 3234\u20133243 (2016)","DOI":"10.1109\/CVPR.2016.352"},{"key":"10_CR60","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: DreamBooth: Fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"10_CR61","doi-asserted-by":"crossref","unstructured":"Sariyildiz, M.B., Alahari, K., Larlus, D., Kalantidis, Y.: Fake it till you make it: Learning transferable representations from synthetic imagenet clones. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00774"},{"key":"10_CR62","unstructured":"Schuhmann, C., Vencu, R., Beaumont, R., Kaczmarczyk, R., Mullis, C., Katta, A., Coombes, T., Jitsev, J., Komatsuzaki, A.: Laion-400m: Open dataset of clip-filtered 400 million image-text pairs. NeurIPS workshop (2021)"},{"key":"10_CR63","first-page":"33754","volume":"35","author":"G Shin","year":"2022","unstructured":"Shin, G., Xie, W., Albanie, S.: ReCo: Retrieve and co-segment for zero-shot transfer. NeurIPS 35, 33754\u201333767 (2022)","journal-title":"NeurIPS"},{"key":"10_CR64","doi-asserted-by":"crossref","unstructured":"Shinoda, R., Hayamizu, R., Nakashima, K., Inoue, N., Yokota, R., Kataoka, H.: SegRCDB: Semantic segmentation via formula-driven supervised learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 20054\u201320063 (2023)","DOI":"10.1109\/ICCV51070.2023.01835"},{"key":"10_CR65","unstructured":"Sun, W., Liu, Z., Zhang, Y., Zhong, Y., Barnes, N.: An alternative to WSSS? an empirical study of the segment anything model (SAM) on weakly-supervised semantic segmentation problems. arXiv preprint arXiv:2305.01586 (2023)"},{"key":"10_CR66","doi-asserted-by":"crossref","unstructured":"Tang, R., Liu, L., Pandey, A., Jiang, Z., Yang, G., Kumar, K., Stenetorp, P., Lin, J., Ture, F.: What the DAAM: Interpreting stable diffusion using cross attention. Annual Meeting of the Association for Computational Linguistics (ACL) (2023)","DOI":"10.18653\/v1\/2023.acl-long.310"},{"key":"10_CR67","doi-asserted-by":"crossref","unstructured":"Tian, J., Aggarwal, L., Colaco, A., Kira, Z., Gonzalez-Franco, M.: Diffuse, attend, and segment: Unsupervised zero-shot segmentation using stable diffusion. arXiv preprint arXiv:2308.12469 (2023)","DOI":"10.1109\/CVPR52733.2024.00341"},{"key":"10_CR68","doi-asserted-by":"crossref","unstructured":"Tian, Y., Fan, L., Chen, K., Katabi, D., Krishnan, D., Isola, P.: Learning vision from models rivals learning vision from data. In: CVPR. pp. 15887\u201315898 (2024)","DOI":"10.1109\/CVPR52733.2024.01504"},{"key":"10_CR69","unstructured":"Tian, Y., Fan, L., Isola, P., Chang, H., Krishnan, D.: StableRep: Synthetic images from text-to-image models make strong visual representation learners. arXiv preprint arXiv:2306.00984 (2023)"},{"key":"10_CR70","unstructured":"Wang, J., Li, X., Zhang, J., Xu, Q., Zhou, Q., Yu, Q., Sheng, L., Xu, D.: Diffusion model is secretly a training-free open vocabulary semantic segmenter. arXiv preprint arXiv:2309.02773 (2023)"},{"key":"10_CR71","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1016\/j.neucom.2019.11.019","volume":"381","author":"X Wang","year":"2020","unstructured":"Wang, X., Ma, H., You, S.: Deep clustering for weakly-supervised semantic segmentation in autonomous driving scenes. Neurocomputing 381, 20\u201328 (2020)","journal-title":"Neurocomputing"},{"key":"10_CR72","doi-asserted-by":"crossref","unstructured":"Wang, Y., Zhang, J., Kan, M., Shan, S., Chen, X.: Self-supervised equivariant attention mechanism for weakly supervised semantic segmentation. In: CVPR. pp. 12275\u201312284 (2020)","DOI":"10.1109\/CVPR42600.2020.01229"},{"key":"10_CR73","doi-asserted-by":"crossref","unstructured":"Wang, Y., Xu, C., Sun, Q., Hu, H., Tao, C., Geng, X., Jiang, D.: PromDA: Prompt-based data augmentation for low-resource NLU tasks. pp. 4242\u20134255 (2022)","DOI":"10.18653\/v1\/2022.acl-long.292"},{"key":"10_CR74","doi-asserted-by":"crossref","unstructured":"Wei, J., Zou, K.: EDA: Easy data augmentation techniques for boosting performance on text classification tasks. In: Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). pp. 6382\u20136388 (2019)","DOI":"10.18653\/v1\/D19-1670"},{"key":"10_CR75","doi-asserted-by":"crossref","unstructured":"Wu, W., Dai, T., Huang, X., Ma, F., Xiao, J.: Image augmentation with controlled diffusion for weakly-supervised semantic segmentation. arXiv preprint arXiv:2310.09760 (2023)","DOI":"10.2139\/ssrn.5033159"},{"key":"10_CR76","doi-asserted-by":"crossref","unstructured":"Wu, W., Zhao, Y., Shou, M.Z., Zhou, H., Shen, C.: Diffumask: Synthesizing images with pixel-level annotations for semantic segmentation using diffusion models. ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00117"},{"key":"10_CR77","doi-asserted-by":"crossref","unstructured":"Xu, J., Liu, S., Vahdat, A., Byeon, W., Wang, X., De\u00a0Mello, S.: Open-vocabulary panoptic segmentation with text-to-image diffusion models. In: CVPR. pp. 2955\u20132966 (2023)","DOI":"10.1109\/CVPR52729.2023.00289"},{"key":"10_CR78","unstructured":"Yang, L., Xu, X., Kang, B., Shi, Y., Zhao, H.: FreeMask: Synthetic images with dense annotations make stronger segmentation models. NeurIPS 36 (2024)"},{"key":"10_CR79","doi-asserted-by":"crossref","unstructured":"Yang, X., Gong, X.: Foundation model assisted weakly supervised semantic segmentation. In: WACV. pp. 523\u2013532 (2024)","DOI":"10.1109\/WACV57701.2024.00058"},{"key":"10_CR80","unstructured":"Yang, Z., Zhan, F., Liu, K., Xu, M., Lu, S.: Ai-generated images as data source: The dawn of synthetic era. arXiv preprint arXiv:2310.01830 (2023)"},{"key":"10_CR81","first-page":"14264","volume":"34","author":"P Yu","year":"2021","unstructured":"Yu, P., Xie, S., Ma, X., Zhu, Y., Wu, Y.N., Zhu, S.C.: Unsupervised foreground extraction via deep region competition. NeurIPS 34, 14264\u201314279 (2021)","journal-title":"NeurIPS"},{"key":"10_CR82","unstructured":"Zhang, D.J., Xu, M., Xue, C., Zhang, W., Han, X., Bai, S., Shou, M.Z.: Free-ATM: Exploring unsupervised learning on diffusion-generated images with free attention masks. arXiv preprint arXiv:2308.06739 (2023)"},{"key":"10_CR83","doi-asserted-by":"crossref","unstructured":"Zhang, L., Agrawala, M.: Adding conditional control to text-to-image diffusion models. arXiv preprint arXiv:2302.05543 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"10_CR84","doi-asserted-by":"crossref","unstructured":"Zhao, W., Rao, Y., Liu, Z., Liu, B., Zhou, J., Lu, J.: Unleashing text-to-image diffusion models for visual perception. ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00527"},{"key":"10_CR85","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Ye, Q., Wu, W., Shen, C., Wan, F.: Generative prompt model for weakly supervised object localization. In: ICCV. pp. 6351\u20136361 (2023)","DOI":"10.1109\/ICCV51070.2023.00584"},{"key":"10_CR86","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., Torralba, A.: Learning deep features for discriminative localization. In: CVPR. pp. 2921\u20132929 (2016)","DOI":"10.1109\/CVPR.2016.319"},{"key":"10_CR87","doi-asserted-by":"crossref","unstructured":"Zhou, C., Loy, C.C., Dai, B.: Extract free dense labels from CLIP. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19815-1_40"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0917-8_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T08:25:40Z","timestamp":1733559940000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0917-8_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,8]]},"ISBN":["9789819609161","9789819609178"],"references-count":87,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0917-8_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,8]]},"assertion":[{"value":"8 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}