{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T16:52:57Z","timestamp":1776444777174,"version":"3.51.2"},"reference-count":34,"publisher":"Informa UK Limited","issue":"18","content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["Advanced Robotics"],"published-print":{"date-parts":[[2024,9,16]]},"DOI":"10.1080\/01691864.2024.2407136","type":"journal-article","created":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T18:15:01Z","timestamp":1727806501000},"page":"1318-1334","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":10,"title":["Real-world cooking robot system from recipes based on food state recognition using foundation models and PDDL"],"prefix":"10.1080","volume":"38","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-3527-3087","authenticated-orcid":false,"given":"Naoaki","family":"Kanazawa","sequence":"first","affiliation":[{"name":"The Department of Mechano-Informatics, Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7464-7187","authenticated-orcid":false,"given":"Kento","family":"Kawaharazuka","sequence":"additional","affiliation":[{"name":"The Department of Mechano-Informatics, Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1429-4401","authenticated-orcid":false,"given":"Yoshiki","family":"Obinata","sequence":"additional","affiliation":[{"name":"The Department of Mechano-Informatics, Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"}]},{"given":"Kei","family":"Okada","sequence":"additional","affiliation":[{"name":"The Department of Mechano-Informatics, Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"}]},{"given":"Masayuki","family":"Inaba","sequence":"additional","affiliation":[{"name":"The Department of Mechano-Informatics, Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"}]}],"member":"301","published-online":{"date-parts":[[2024,10]]},"reference":[{"key":"e_1_3_2_2_1","doi-asserted-by":"crossref","unstructured":"Beetz M Klank U Kresse I et\u00a0al. Robotic roommates making pancakes. In: 2011 11th IEEE-RAS International Conference on Humanoid Robots; 2011. p. 529\u2013536.","DOI":"10.1109\/Humanoids.2011.6100855"},{"key":"e_1_3_2_3_1","doi-asserted-by":"crossref","unstructured":"Bollini M Tellex S Thompson T et\u00a0al. Interpreting and executing recipes with a cooking robot. In: Experimental Robotics; Springer; 2013. p. 481\u2013495.","DOI":"10.1007\/978-3-319-00065-7_33"},{"key":"e_1_3_2_4_1","doi-asserted-by":"crossref","unstructured":"Kazhoyan G Beetz M. Programming robotic agents with action descriptions. In: 2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS). IEEE; 2017. p. 103\u2013108.","DOI":"10.1109\/IROS.2017.8202144"},{"key":"e_1_3_2_5_1","doi-asserted-by":"publisher","DOI":"10.1186\/s40648-021-00204-6"},{"key":"e_1_3_2_6_1","doi-asserted-by":"crossref","unstructured":"Paulius D Dong KSP Sun Y. Task planning with a weighted functional object-oriented network. In: 2021 IEEE International Conference on Robotics and Automation (ICRA). IEEE; 2021. p. 3904\u20133910.","DOI":"10.1109\/ICRA48506.2021.9561680"},{"key":"e_1_3_2_7_1","doi-asserted-by":"crossref","unstructured":"Takata T Kiyokawa K Ramirez-Alpizar IG et\u00a0al. Efficient task\/motion planning for a dual-arm robot from language instructions and cooking images. In: 2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS). IEEE; 2022. p. 12058\u201312065.","DOI":"10.1109\/IROS47612.2022.9981280"},{"key":"e_1_3_2_8_1","doi-asserted-by":"crossref","unstructured":"Lenz I Knepper RA Saxena A. DeepMPC: learning deep latent features for model predictive control. In: Robotics: Science and Systems Vol. 10; Rome Italy; 2015. p. 25.","DOI":"10.15607\/RSS.2015.XI.012"},{"key":"e_1_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2016"},{"key":"e_1_3_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2016"},{"key":"e_1_3_2_11_1","doi-asserted-by":"crossref","unstructured":"Saito N Moura J Ogata T et\u00a0al. Structured motion generation with predictive learning: proposing subgoal for long-horizon manipulation. In: 2023 IEEE International Conference on Robotics and Automation. IEEE; 2023.","DOI":"10.1109\/ICRA48891.2023.10161046"},{"key":"e_1_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2016"},{"key":"e_1_3_2_13_1","doi-asserted-by":"crossref","unstructured":"Sochacki G Hughes J Hauser S et\u00a0al. Closed-loop robotic cooking of scrambled eggs with a salinity-based \u2018taste\u2019 sensor. In: 2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS). IEEE; 2021. p. 594\u2013600.","DOI":"10.1109\/IROS51168.2021.9636750"},{"key":"e_1_3_2_14_1","first-page":"108","article-title":"Mastication-enhanced taste-Based classification of multi-Ingredient dishes for robotic cooking","volume":"9","author":"Sochacki G","year":"2022","unstructured":"Sochacki G, Abdulali A, Iida F. Mastication-enhanced taste-Based classification of multi-Ingredient dishes for robotic cooking. Front Rob AI. 2022;9:108.","journal-title":"Front Rob AI"},{"key":"e_1_3_2_15_1","unstructured":"Paul R. Classifying cooking object's state using a tuned VGG convolutional neural network. arXiv preprint arXiv:180509391. 2018."},{"key":"e_1_3_2_16_1","unstructured":"Cobley B Boyle D. Onionbot: a system for collaborative computational cooking. arXiv preprint arXiv:201105039. 2020."},{"key":"e_1_3_2_17_1","doi-asserted-by":"crossref","unstructured":"Khan AM Ashrafee A Sayera R et\u00a0al. Rethinking cooking state recognition with vision transformers. In: 2022 25th International Conference on Computer and Information Technology (ICCIT). IEEE; 2022. p. 170\u2013175.","DOI":"10.1109\/ICCIT57492.2022.10055869"},{"key":"e_1_3_2_18_1","doi-asserted-by":"crossref","unstructured":"Kanazawa N Kawaharazuka K Obinata Y et\u00a0al. Recognition of heat-induced food state changes by time-series use of vision-language model for cooking robot. Intelligent Autonomous Systems 18. 2023.","DOI":"10.1007\/978-3-031-44851-5_42"},{"key":"e_1_3_2_19_1","unstructured":"hn M Brohan A Brown N Chebotar Y et\u00a0al. Do as i can not as i say: grounding language in robotic affordances. arXiv preprint arXiv:220401691. 2022."},{"key":"e_1_3_2_20_1","unstructured":"Skreta M Yoshikawa N Arellano-Rubach S et\u00a0al. Errors are useful prompts: instruction guided task programming with verifier-assisted iterative prompting. arXiv preprint arXiv:230314100. 2023;."},{"key":"e_1_3_2_21_1","unstructured":"Rana K Haviland J Garg S et\u00a0al. Sayplan: grounding large language models using 3d scene graphs for scalable task planning. arXiv preprint arXiv:230706135. 2023;."},{"key":"e_1_3_2_22_1","doi-asserted-by":"crossref","unstructured":"Shirai K Beltran-Hernandez CC Hamaya M et\u00a0al. Vision-language interpreter for robot task planning. arXiv preprint arXiv:231100967. 2023.","DOI":"10.1109\/ICRA57147.2024.10611112"},{"key":"e_1_3_2_23_1","unstructured":"OpenAI. GPT-4 Technical Report. arXiv preprint arXiv:230308774. 2023."},{"key":"e_1_3_2_24_1","unstructured":"Aeronautiques C Howe A Knoblock C et\u00a0al. PDDL | the planning domain definition language. Technical Report Tech Rep. 1998."},{"key":"e_1_3_2_25_1","unstructured":"Radford A Kim JW Hallacy C et\u00a0al. Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning; 2021. p.\u00a08748\u20138763."},{"key":"e_1_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3375257"},{"key":"e_1_3_2_27_1","unstructured":"Liu Z Bahety A Song S. Reflect: summarizing robot experiences for failure explanation and correction. arXiv preprint arXiv:230615724. 2023."},{"key":"e_1_3_2_28_1","unstructured":"Sou v\u02c7cek T Alayrac JB Miech A et\u00a0al. Look for the change: learning object states and state-modifying actions from untrimmed web videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition; 2022. p. 13956\u201313966."},{"key":"e_1_3_2_29_1","doi-asserted-by":"crossref","unstructured":"Saini N Wang H Swaminathan A et\u00a0al. Chop & learn: recognizing and generating object-state compositions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision; 2023. p. 20247\u201320258.","DOI":"10.1109\/ICCV51070.2023.01852"},{"key":"e_1_3_2_30_1","doi-asserted-by":"crossref","unstructured":"Xue Z Ashutosh K Grauman K. Learning object state changes in videos: An open-world perspective. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition; 2024. p. 18493\u201318503.","DOI":"10.1109\/CVPR52733.2024.01750"},{"key":"e_1_3_2_31_1","unstructured":"Anthropic. Claude 3.5 sonnet [https:\/\/www.anthropic.com\/news\/claude-3-5-sonnet]; Accessed: 2024-7-30."},{"key":"e_1_3_2_32_1","unstructured":"Reid M Savinov N Teplyashin D et\u00a0al. Gemini 1.5: unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:240305530. 2024."},{"key":"e_1_3_2_33_1","unstructured":"Tasse D Smith NA. Sour cream: toward semantic processing of recipes. Carnegie Mellon University Pittsburgh Tech Rep CMU-LTI-08-005. 2008."},{"key":"e_1_3_2_34_1","doi-asserted-by":"crossref","unstructured":"Jermsurawong J Habash N. Predicting the structure of cooking recipes. In: Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing; 2015. p. 781\u2013786.","DOI":"10.18653\/v1\/D15-1090"},{"key":"e_1_3_2_35_1","doi-asserted-by":"crossref","unstructured":"Chi C Feng S Du Y et\u00a0al. Diffusion policy: visuomotor policy learning via action diffusion. arXiv preprint arXiv:230304137. 2023.","DOI":"10.15607\/RSS.2023.XIX.026"}],"container-title":["Advanced Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/01691864.2024.2407136","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,17]],"date-time":"2024-10-17T17:39:29Z","timestamp":1729186769000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/01691864.2024.2407136"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,16]]},"references-count":34,"journal-issue":{"issue":"18","published-print":{"date-parts":[[2024,9,16]]}},"alternative-id":["10.1080\/01691864.2024.2407136"],"URL":"https:\/\/doi.org\/10.1080\/01691864.2024.2407136","relation":{},"ISSN":["0169-1864","1568-5535"],"issn-type":[{"value":"0169-1864","type":"print"},{"value":"1568-5535","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,16]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tadr20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tadr20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2024-04-08","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-07-30","order":1,"name":"revised","label":"Revised","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-09-05","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-10-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}