{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:49:05Z","timestamp":1765547345468,"version":"3.40.3"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031781711"},{"type":"electronic","value":"9783031781728"}],"license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78172-8_15","type":"book-chapter","created":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T09:48:32Z","timestamp":1733132912000},"page":"222-238","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Controllable 3D Object Generation with\u00a0Single Image Prompt"],"prefix":"10.1007","author":[{"given":"Jaeseok","family":"Lee","sequence":"first","affiliation":[]},{"given":"Jaekoo","family":"Lee","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,3]]},"reference":[{"key":"15_CR1","unstructured":"Armandpour, M., Zheng, H., Sadeghian, A., Sadeghian, A., Zhou, M.: Re-imagine the negative prompt algorithm: transform 2d diffusion into 3d, alleviate janus problem and beyond. arXiv preprint arXiv:2304.04968 (2023)"},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Chen, D.Z., Siddiqui, Y., Lee, H.Y., Tulyakov, S., Nie\u00dfner, M.: Text2tex: text-driven texture synthesis via diffusion models (2023)","DOI":"10.1109\/ICCV51070.2023.01701"},{"key":"15_CR3","doi-asserted-by":"crossref","unstructured":"Chen, R., Chen, Y., Jiao, N., Jia, K.: Fantasia3d: disentangling geometry and appearance for high-quality text-to-3d content creation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), October 2023","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"15_CR4","doi-asserted-by":"crossref","unstructured":"Chen, Y., Pan, Y., Li, Y., Yao, T., Mei, T.: Control3d: towards controllable text-to-3d generation (2023)","DOI":"10.1145\/3581783.3612489"},{"key":"15_CR5","doi-asserted-by":"crossref","unstructured":"Downs, L., et al.: Google scanned objects: a high-quality dataset of 3d scanned household items (2022). https:\/\/arxiv.org\/abs\/2204.11918","DOI":"10.1109\/ICRA46639.2022.9811809"},{"key":"15_CR6","unstructured":"Gal, R., Alaluf, Y., Atzmon, Y., Patashnik, O., Bermano, A.H., Chechik, G., Cohen-Or, D.: An image is worth one word: Personalizing text-to-image generation using textual inversion (2022)"},{"key":"15_CR7","unstructured":"He, Y., et al.: T$$^3$$bench: benchmarking current progress in text-to-3d generation (2023)"},{"issue":"1","key":"15_CR8","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1145\/963770.963772","volume":"22","author":"JL Herlocker","year":"2004","unstructured":"Herlocker, J.L., Konstan, J.A., Terveen, L.G., Riedl, J.T.: Evaluating collaborative filtering recommender systems. ACM Trans. Inf. Syst. (TOIS) 22(1), 5\u201353 (2004)","journal-title":"ACM Trans. Inf. Syst. (TOIS)"},{"key":"15_CR9","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models (2020)"},{"key":"15_CR10","unstructured":"Hong, S., Ahn, D., Kim, S.: Debiasing scores and prompts of 2d diffusion for robust text-to-3d generation (2023)"},{"key":"15_CR11","unstructured":"Hong, Y., et al.: Lrm: large reconstruction model for single image to 3d. arXiv preprint arXiv:2311.04400 (2023)"},{"key":"15_CR12","unstructured":"Katzir, O., Patashnik, O., Cohen-Or, D., Lischinski, D.: Noise-free score distillation (2023)"},{"key":"15_CR13","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25 (2012)"},{"key":"15_CR14","doi-asserted-by":"publisher","unstructured":"Lee, H., Kim, D., Lee, D., Kim, J., Lee, J.: Bridging the domain gap towards generalization in automatic colorization. In: European Conference on Computer Vision, pp. 527\u2013543. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19790-1_32","DOI":"10.1007\/978-3-031-19790-1_32"},{"key":"15_CR15","doi-asserted-by":"crossref","unstructured":"Li, H., Yang, Y., Chang, M., Feng, H., Xu, Z., Li, Q., Chen, Y.: Srdiff: single image super-resolution with diffusion probabilistic models (2021)","DOI":"10.1016\/j.neucom.2022.01.029"},{"key":"15_CR16","doi-asserted-by":"crossref","unstructured":"Lin, C.H., et al.: Magic3d: high-resolution text-to-3d content creation. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2023)","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"15_CR17","unstructured":"Lin, T.Y., et al.: Microsoft coco: common objects in context (2015). https:\/\/arxiv.org\/abs\/1405.0312"},{"key":"15_CR18","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Hoorick, B.V., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: Zero-shot one image to 3d object (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"15_CR19","doi-asserted-by":"crossref","unstructured":"Lugmayr, A., Danelljan, M., Romero, A., Yu, F., Timofte, R., Gool, L.V.:\u2019 Repaint: inpainting using denoising diffusion probabilistic models (2022)","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"15_CR20","doi-asserted-by":"crossref","unstructured":"Melas-Kyriazi, L., Rupprecht, C., Laina, I., Vedaldi, A.: Realfusion: 360 reconstruction of any object from a single image. In: CVPR (2023). https:\/\/arxiv.org\/abs\/2302.10663","DOI":"10.1109\/CVPR52729.2023.00816"},{"key":"15_CR21","unstructured":"Meng, C., et al.: Sdedit: guided image synthesis and editing with stochastic differential equations (2022)"},{"key":"15_CR22","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: Nerf: representing scenes as neural radiance fields for view synthesis. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"15_CR23","doi-asserted-by":"publisher","unstructured":"Mohammad\u00a0Khalid, N., Xie, T., Belilovsky, E., Popa, T.: Clip-mesh: generating textured meshes from text using pretrained image-text models. In: SIGGRAPH Asia 2022 Conference Papers, SA 2022. ACM, November 2022. https:\/\/doi.org\/10.1145\/3550469.3555392","DOI":"10.1145\/3550469.3555392"},{"key":"15_CR24","doi-asserted-by":"publisher","unstructured":"M\u00fcller, T., Evans, A., Schied, C., Keller, A.: Instant neural graphics primitives with a multiresolution hash encoding. ACM Trans. Graph. 41(4), 102:1\u2013102:15 (2022). https:\/\/doi.org\/10.1145\/3528223.3530127","DOI":"10.1145\/3528223.3530127"},{"key":"15_CR25","unstructured":"OpenAI: Sora: Creating video from text (2024). https:\/\/openai.com\/sora"},{"key":"15_CR26","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: text-to-3d using 2d diffusion. arXiv (2022)"},{"key":"15_CR27","unstructured":"Qian, G., et al.: Magic123: one image to high-quality 3d object generation using both 2d and 3d diffusion priors. arXiv preprint arXiv:2306.17843 (2023)"},{"key":"15_CR28","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision (2021)"},{"key":"15_CR29","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents (2022)"},{"key":"15_CR30","doi-asserted-by":"crossref","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. ArXiv preprint (2021)","DOI":"10.1109\/ICCV48922.2021.01196"},{"issue":"3","key":"15_CR31","doi-asserted-by":"publisher","first-page":"1623","DOI":"10.1109\/TPAMI.2020.3019967","volume":"44","author":"R Ranftl","year":"2020","unstructured":"Ranftl, R., Lasinger, K., Hafner, D., Schindler, K., Koltun, V.: Towards robust monocular depth estimation: mixing datasets for zero-shot cross-dataset transfer. IEEE Trans. Pattern Anal. Mach. Intell. 44(3), 1623\u20131637 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"15_CR32","doi-asserted-by":"crossref","unstructured":"Reizenstein, J., Shapovalov, R., Henzler, P., Sbordone, L., Labatut, P., Novotny, D.: Common objects in 3d: Large-scale learning and evaluation of real-life 3d category reconstruction. In: International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"15_CR33","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10684\u201310695, June 2022","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"15_CR34","doi-asserted-by":"crossref","unstructured":"Saharia, C., et al.: Palette: Image-to-image diffusion models (2022)","DOI":"10.1145\/3528233.3530757"},{"key":"15_CR35","doi-asserted-by":"crossref","unstructured":"Saharia, C., Ho, J., Chan, W., Salimans, T., Fleet, D.J., Norouzi, M.: Image super-resolution via iterative refinement (2021)","DOI":"10.1109\/TPAMI.2022.3204461"},{"key":"15_CR36","first-page":"25278","volume":"35","author":"C Schuhmann","year":"2022","unstructured":"Schuhmann, C., et al.: Laion-5b: an open large-scale dataset for training next generation image-text models. Adv. Neural. Inf. Process. Syst. 35, 25278\u201325294 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"15_CR37","unstructured":"Shen, T., Gao, J., Yin, K., Liu, M.Y., Fidler, S.: Deep marching tetrahedra: a hybrid representation for high-resolution 3d shape synthesis. In: Advances in Neural Information Processing Systems (NeurIPS) (2021)"},{"key":"15_CR38","doi-asserted-by":"crossref","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition (2015)","DOI":"10.1109\/ICCV.2015.314"},{"key":"15_CR39","unstructured":"Singer, U., et al.: Make-a-video: text-to-video generation without text-video data (2022)"},{"key":"15_CR40","unstructured":"Sinha, A., Song, J., Meng, C., Ermon, S.: D2c: diffusion-denoising models for few-shot conditional generation (2021)"},{"key":"15_CR41","doi-asserted-by":"crossref","unstructured":"Tang, J., Chen, Z., Chen, X., Wang, T., Zeng, G., Liu, Z.: Lgm: large multi-view gaussian model for high-resolution 3d content creation. arXiv preprint arXiv:2402.05054 (2024)","DOI":"10.1007\/978-3-031-73235-5_1"},{"key":"15_CR42","doi-asserted-by":"crossref","unstructured":"Wu, T., et al.: Gpt-4v(ision) is a human-aligned evaluator for text-to-3d generation (2024)","DOI":"10.1109\/CVPR52733.2024.02098"},{"key":"15_CR43","doi-asserted-by":"crossref","unstructured":"Xu, D., Jiang, Y., Wang, P., Fan, Z., Wang, Y., Wang, Z.: Neurallift-360: lifting an in-the-wild 2d photo to a 3d object with 360deg views. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4479\u20134489 (2023)","DOI":"10.1109\/CVPR52729.2023.00435"},{"key":"15_CR44","unstructured":"Ye, H., Zhang, J., Liu, S., Han, X., Yang, W.: Ip-adapter: text compatible image prompt adapter for text-to-image diffusion models (2023)"},{"key":"15_CR45","doi-asserted-by":"crossref","unstructured":"Zabari, N., Azulay, A., Gorkor, A., Halperin, T., Fried, O.: Diffusing colors: image colorization with text guided diffusion (2023)","DOI":"10.1145\/3610548.3618180"},{"key":"15_CR46","doi-asserted-by":"crossref","unstructured":"Zhang, L., Agrawala, M.: Adding conditional control to text-to-image diffusion models (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"15_CR47","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"15_CR48","doi-asserted-by":"crossref","unstructured":"Zhu, L., et al.: Tryondiffusion: a tale of two unets. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4606\u20134615, June 2023","DOI":"10.1109\/CVPR52729.2023.00447"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78172-8_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T10:06:50Z","timestamp":1733134010000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78172-8_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"ISBN":["9783031781711","9783031781728"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78172-8_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,3]]},"assertion":[{"value":"3 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}