{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:14:07Z","timestamp":1767323647344,"version":"3.48.0"},"publisher-location":"Singapore","reference-count":29,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819557363","type":"print"},{"value":"9789819557370","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5737-0_3","type":"book-chapter","created":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:09:17Z","timestamp":1767323357000},"page":"32-46","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["MonoBite: Scale-Aware 3D Reconstruction and\u00a0Volume Estimation from\u00a0Monocular Multi-food Images"],"prefix":"10.1007","author":[{"given":"Songen","family":"Gu","sequence":"first","affiliation":[]},{"given":"Lina","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Binjie","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Sanyi","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Yanwei","family":"Fu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"3_CR1","doi-asserted-by":"publisher","unstructured":"AlMughrabi, A., Haroon, U., Marques, R., Radeva, P.: VolETA: one- and few-shot food volume estimation, July 2024. https:\/\/doi.org\/10.48550\/arXiv.2407.01717","DOI":"10.48550\/arXiv.2407.01717"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"AlMughrabi, A., Haroon, U., Marques, R., Radeva, P.: VolTex: food volume estimation using text-guided segmentation and neural surface reconstruction. In: Proceedings of the Computer Vision and Pattern Recognition Conference, pp. 450\u2013457 (2025)","DOI":"10.1109\/CVPRW67362.2025.00049"},{"key":"3_CR3","doi-asserted-by":"publisher","unstructured":"Ando, Y., Ege, T., Cho, J., Yanai, K.: DepthCalorieCam: a mobile application for volume-based foodcalorie estimation using depth cameras. In: Proceedings of the 5th International Workshop on Multimedia Assisted Dietary Management, Nice France, October 2019, pp. 76\u201381. ACM (2019). https:\/\/doi.org\/10.1145\/3347448.3357172","DOI":"10.1145\/3347448.3357172"},{"key":"3_CR4","unstructured":"Barrow, H.G., Tenenbaum, J.M., Bolles, R.C., Wolf, H.C.: Parametric correspondence and chamfer matching: two new techniques for image matching. In: Proceedings: Image Understanding Workshop, pp. 21\u201327. Science Applications, Inc. (1977)"},{"key":"3_CR5","unstructured":"Bochkovskii, A., et al.: Depth pro: sharp monocular metric depth in less than a second (2025). https:\/\/arxiv.org\/abs\/2410.02073"},{"key":"3_CR6","doi-asserted-by":"publisher","unstructured":"Chen, Y., et al.: MetaFood3D: 3D food dataset with nutrition values, December 2024. https:\/\/doi.org\/10.48550\/arXiv.2409.01966","DOI":"10.48550\/arXiv.2409.01966"},{"key":"3_CR7","doi-asserted-by":"publisher","unstructured":"Christ, P.F., et al.: Diabetes60 \u2014 inferring bread units from food images using fully convolutional neural networks. In: 2017 IEEE International Conference on Computer Vision Workshops (ICCVW), October 2017, pp. 1526\u20131535 (2017). https:\/\/doi.org\/10.1109\/ICCVW.2017.180","DOI":"10.1109\/ICCVW.2017.180"},{"key":"3_CR8","doi-asserted-by":"publisher","unstructured":"Fang, S., Zhu, F., Jiang, C., Zhang, S., Boushey, C.J., Delp, E.J.: A comparison of food portion size estimation using geometric models and depth images. In: 2016 IEEE International Conference on Image Processing (ICIP), September 2016, pp. 26\u201330 (2016). https:\/\/doi.org\/10.1109\/ICIP.2016.7532312","DOI":"10.1109\/ICIP.2016.7532312"},{"key":"3_CR9","doi-asserted-by":"publisher","unstructured":"Gao, A., Lo, F.P.W., Lo, B.: Food volume estimation for quantifying dietary intake with a wearable camera. In: 2018 IEEE 15th International Conference on Wearable and Implantable Body Sensor Networks (BSN), Las Vegas, NV, March 2018, pp. 110\u2013113. IEEE (2018). https:\/\/doi.org\/10.1109\/BSN.2018.8329671","DOI":"10.1109\/BSN.2018.8329671"},{"key":"3_CR10","doi-asserted-by":"publisher","unstructured":"Haroon, U., AlMughrabi, A., Zoumpekas, T., Marques, R., Radeva, P.: VolE: a point-cloud framework for food 3D reconstruction and volume estimation, May 2025. https:\/\/doi.org\/10.48550\/arXiv.2505.10205","DOI":"10.48550\/arXiv.2505.10205"},{"key":"3_CR11","doi-asserted-by":"publisher","unstructured":"Hong, Y., et al.: LRM: large reconstruction model for single image to 3D, March 2024. https:\/\/doi.org\/10.48550\/arXiv.2311.04400","DOI":"10.48550\/arXiv.2311.04400"},{"key":"3_CR12","doi-asserted-by":"publisher","unstructured":"Konstantakopoulos, F., Georga, E.I., Fotiadis, D.I.: 3D reconstruction and volume estimation of food using stereo vision techniques. In: 2021 IEEE 21st International Conference on Bioinformatics and Bioengineering (BIBE), October 2021, pp.\u00a01\u20134 (2021). https:\/\/doi.org\/10.1109\/BIBE52308.2021.9635418","DOI":"10.1109\/BIBE52308.2021.9635418"},{"key":"3_CR13","doi-asserted-by":"publisher","unstructured":"Lo, F.P.W., Sun, Y., Lo, B.: Depth estimation based on a single close-up image with volumetric annotations in the wild: a pilot study. In: 2019 IEEE\/ASME International Conference on Advanced Intelligent Mechatronics (AIM), July 2019, pp. 513\u2013518 (2019). https:\/\/doi.org\/10.1109\/AIM.2019.8868629","DOI":"10.1109\/AIM.2019.8868629"},{"key":"3_CR14","doi-asserted-by":"publisher","unstructured":"Ma, J., Zhang, X., Vinod, G., Raghavan, S., He, J., Zhu, F.: MFP3D: monocular food portion estimation leveraging 3D point clouds, November 2024. https:\/\/doi.org\/10.48550\/arXiv.2411.10492","DOI":"10.48550\/arXiv.2411.10492"},{"issue":"3","key":"3_CR15","doi-asserted-by":"publisher","first-page":"564","DOI":"10.3390\/s19030564","volume":"19","author":"S Makhsous","year":"2019","unstructured":"Makhsous, S., Mohammad, H.M., Schenk, J.M., Mamishev, A.V., Kristal, A.R.: A novel mobile structured light system in food 3D reconstruction and volume estimation. Sensors 19(3), 564 (2019). https:\/\/doi.org\/10.3390\/s19030564","journal-title":"Sensors"},{"key":"3_CR16","unstructured":"MetaFood: Metafood2025 - challenge 1, June 2025. https:\/\/sites.google.com\/view\/cvpr-metafood-2025\/challenge-1. Accessed 26 Jun 2025"},{"key":"3_CR17","doi-asserted-by":"publisher","unstructured":"Naritomi, S., Yanai, K.: Hungry networks: 3D mesh reconstruction of a dish and a plate from a single dish image for estimating food volume. In: Proceedings of the 2nd ACM International Conference on Multimedia in Asia, MMAsia \u201920, May 2021, pp.\u00a01\u20137. Association for Computing Machinery, New York, NY, USA (2021). https:\/\/doi.org\/10.1145\/3444685.3446275","DOI":"10.1145\/3444685.3446275"},{"key":"3_CR18","unstructured":"Ren, T., et al.: Grounded SAM: assembling open-world models for diverse visual tasks (2024). https:\/\/arxiv.org\/abs\/2401.14159"},{"key":"3_CR19","doi-asserted-by":"publisher","unstructured":"Schonberger, J.L., Frahm, J.M.: Structure-from-Motion Revisited. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Las Vegas, NV, USA, June 2016, pp. 4104\u20134113. IEEE (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.445","DOI":"10.1109\/CVPR.2016.445"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Sch\u00f6nberger, J.L., Frahm, J.M.: Structure-from-motion revisited. In: Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.445"},{"key":"3_CR21","doi-asserted-by":"publisher","unstructured":"Shao, Z., Vinod, G., He, J., Zhu, F.: An end-to-end food portion estimation framework based on shape reconstruction from monocular image, August 2023. https:\/\/doi.org\/10.48550\/arXiv.2308.01810","DOI":"10.48550\/arXiv.2308.01810"},{"key":"3_CR22","doi-asserted-by":"publisher","unstructured":"Tang, J., Chen, Z., Chen, X., Wang, T., Zeng, G., Liu, Z.: LGM: large multi-view gaussian model for high-resolution 3D content creation, February 2024. https:\/\/doi.org\/10.48550\/arXiv.2402.05054","DOI":"10.48550\/arXiv.2402.05054"},{"key":"3_CR23","doi-asserted-by":"publisher","unstructured":"Thames, Q., et al.: Nutrition5k: towards automatic nutritional understanding of generic food. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Nashville, TN, USA, June 2021, pp. 8899\u20138907. IEEE (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.00879","DOI":"10.1109\/CVPR46437.2021.00879"},{"key":"3_CR24","doi-asserted-by":"publisher","unstructured":"Wen, B., Yang, W., Kautz, J., Birchfield, S.: FoundationPose: unified 6D pose estimation and tracking of novel objects, March 2024. https:\/\/doi.org\/10.48550\/arXiv.2312.08344","DOI":"10.48550\/arXiv.2312.08344"},{"key":"3_CR25","doi-asserted-by":"publisher","unstructured":"Xiang, J., et al.: Structured 3D Latents for scalable and versatile 3D generation, May 2025. https:\/\/doi.org\/10.48550\/arXiv.2412.01506","DOI":"10.48550\/arXiv.2412.01506"},{"key":"3_CR26","doi-asserted-by":"crossref","unstructured":"Xu, C., He, Y., Khanna, N., Boushey, C.J., Delp, E.J.: Model-based food volume estimation using 3D pose. In: 2013 IEEE International Conference on Image Processing, pp. 2534\u20132538. IEEE (2013)","DOI":"10.1109\/ICIP.2013.6738522"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Yao, Y., Luo, Z., Li, S., Fang, T., Quan, L.: MVSNet: depth inference for unstructured multi-view stereo. In: Proceedings of the European Conference on Computer Vision (ECCV), September 2018 (2018)","DOI":"10.1007\/978-3-030-01237-3_47"},{"issue":"02","key":"3_CR28","doi-asserted-by":"publisher","first-page":"2350026","DOI":"10.1142\/S0219519423500264","volume":"23","author":"CD Yuan","year":"2023","unstructured":"Yuan, C.D., et al.: Research on 3D reconstruction method and application of food in stroke patients based on RGB-D image. J. Mech. Med. Biol. 23(02), 2350026 (2023). https:\/\/doi.org\/10.1142\/S0219519423500264","journal-title":"J. Mech. Med. Biol."},{"key":"3_CR29","doi-asserted-by":"publisher","unstructured":"Zhao, Z., et al.: Hunyuan3D 2.0: scaling diffusion models for high resolution textured 3D assets generation, February 2025. https:\/\/doi.org\/10.48550\/arXiv.2501.12202","DOI":"10.48550\/arXiv.2501.12202"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5737-0_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:09:20Z","timestamp":1767323360000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5737-0_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819557363","9789819557370"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5737-0_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}