{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T10:43:55Z","timestamp":1762253035401,"version":"3.37.3"},"reference-count":247,"publisher":"Informa UK Limited","issue":"15-16","license":[{"start":{"date-parts":[[2019,6,24]],"date-time":"2019-06-24T00:00:00Z","timestamp":1561334400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001691","name":"Ministry of Education, Culture, Sports, Science, and Technology, Japan","doi-asserted-by":"publisher","award":["18H03308","16H06569"],"award-info":[{"award-number":["18H03308","16H06569"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003382","name":"Core Research for Evolutional Science and Technology","doi-asserted-by":"publisher","award":["15656632"],"award-info":[{"award-number":["15656632"]}],"id":[{"id":"10.13039\/501100003382","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["Advanced Robotics"],"published-print":{"date-parts":[[2019,8,18]]},"DOI":"10.1080\/01691864.2019.1632223","type":"journal-article","created":{"date-parts":[[2019,6,25]],"date-time":"2019-06-25T15:49:20Z","timestamp":1561477760000},"page":"700-730","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":41,"title":["Survey on frontiers of language and robotics"],"prefix":"10.1080","volume":"33","author":[{"given":"T.","family":"Taniguchi","sequence":"first","affiliation":[{"name":"Department of Information Science and Engineering, Ritsumeikan University, Kusatsu, Japan"}]},{"given":"D.","family":"Mochihashi","sequence":"additional","affiliation":[{"name":"The Institute of Statistical Mathematics, Tachikawa, Japan"},{"name":"SOKENDAI (The Graduate University for Advanced Studies), Tokyo, Japan"}]},{"given":"T.","family":"Nagai","sequence":"additional","affiliation":[{"name":"Graduate School of Engineering Science, Osaka University, Toyonaka, Osaka, Japan"}]},{"given":"S.","family":"Uchida","sequence":"additional","affiliation":[{"name":"Faculty of Languages and Cultures, Kyushu University, Fukuoka, Japan"}]},{"given":"N.","family":"Inoue","sequence":"additional","affiliation":[{"name":"Graduate School of Information Sciences, Tohoku University, Sendai, Japan"},{"name":"RIKEN Center for Advanced Intelligence Project, Chuo-ku, Japan"}]},{"given":"I.","family":"Kobayashi","sequence":"additional","affiliation":[{"name":"Advanced Sciences, Graduate School of Humanities and Sciences, Ochanomizu University, Tokyo, Japan"}]},{"given":"T.","family":"Nakamura","sequence":"additional","affiliation":[{"name":"Department of Mechanical Engineering and Intelligent Systems, The University of Electro-Communications, Chofu, Japan"}]},{"given":"Y.","family":"Hagiwara","sequence":"additional","affiliation":[{"name":"Department of Information Science and Engineering, Ritsumeikan University, Kusatsu, Japan"}]},{"given":"N.","family":"Iwahashi","sequence":"additional","affiliation":[{"name":"Department of Information and Communication Engineering, Okayama Prefectural University, Okayama, Japan"}]},{"given":"T.","family":"Inamura","sequence":"additional","affiliation":[{"name":"SOKENDAI (The Graduate University for Advanced Studies), Tokyo, Japan"},{"name":"National Institute of Informatics, Tokyo, Japan"}]}],"member":"301","published-online":{"date-parts":[[2019,6,24]]},"reference":[{"key":"CIT0001","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2014.09.031"},{"key":"CIT0002","unstructured":"Kanda T, Ishiguro H, Imai M, et\u00a0al. Body movement analysis of human\u2013robot interaction. International Joint Conferences on Artificial Intelligence (IJCAI); Acapulco, Mexico; Vol. 3; 2003. p. 177\u2013182."},{"key":"CIT0003","doi-asserted-by":"crossref","unstructured":"Okuno Y, Kanda T, Imai M, et\u00a0al. Providing route directions: design of robot's utterance, gesture, and timing. ACM\/IEEE International Conference on Human Robot Interaction; San Diego, California, USA; 2009. p. 53\u201360.","DOI":"10.1145\/1514095.1514108"},{"key":"CIT0004","doi-asserted-by":"publisher","DOI":"10.5898\/JHRI.6.1.Admoni"},{"key":"CIT0005","doi-asserted-by":"crossref","unstructured":"Mutlu B, Yamaoka F, Kanda T, et\u00a0al. Nonverbal leakage in robots: communication of intentions through seemingly unintentional behavior. ACM\/IEEE International Conference on Human Robot Interaction; San Diego, California, USA; 2009. p. 69\u201376.","DOI":"10.1145\/1514095.1514110"},{"key":"CIT0006","doi-asserted-by":"publisher","DOI":"10.1163\/016918610X493561"},{"key":"CIT0007","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2014.12.006"},{"key":"CIT0008","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"CIT0009","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-014-0629-7"},{"key":"CIT0010","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.40"},{"issue":"8","key":"CIT0011","first-page":"1489","volume":"33","author":"Wu J","year":"2010","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"CIT0012","doi-asserted-by":"publisher","DOI":"10.1016\/S0020-0255(03)00167-1"},{"key":"CIT0013","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e91-d.2.312"},{"key":"CIT0014","doi-asserted-by":"crossref","unstructured":"Hatori J, Kikuchi Y, Kobayashi S, et\u00a0al. Interactively picking real-world objects with unconstrained spoken language instructions. IEEE International Conference on Robotics and Automation (ICRA); Brisbane, Australia; 2018. p. 3774\u20133781.","DOI":"10.1109\/ICRA.2018.8460699"},{"key":"CIT0015","doi-asserted-by":"crossref","unstructured":"Anderson P, Wu Q, Teney D, et\u00a0al. Vision-and-language navigation: interpreting visually-grounded navigation instructions in real environments. IEEE\/CVF Conference on Computer Vision and Pattern Recognition; Piscataway, NJ; 2018. p. 3674\u20133683.","DOI":"10.1109\/CVPR.2018.00387"},{"key":"CIT0016","unstructured":"Hermann KM, Hill F, Green S, et\u00a0al. Grounded language learning in a simulated 3D world. CoRR. 2017. abs\/1706.06551."},{"key":"CIT0017","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2016.1164622"},{"key":"CIT0018","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2018.2867772"},{"key":"CIT0019","unstructured":"Iwahashi N. A method for forming mutual beliefs for communication through human\u2013robot multi-modal interaction. Proceedings of the Fourth SIGdial Workshop on Discourse and Dialogue; Sapporo, Japan; 2003. p. 79\u201386."},{"key":"CIT0020","doi-asserted-by":"publisher","DOI":"10.1016\/0167-2789(90)90087-6"},{"key":"CIT0021","doi-asserted-by":"publisher","DOI":"10.1080\/09540099208946620"},{"key":"CIT0022","doi-asserted-by":"crossref","unstructured":"Steels L. The symbol grounding problem has been solved, so what's next? Symbols and embodiment: debates on meaning and cognition. Oxford, UK: Oxford University Press; 2008. p. 223\u2013244.","DOI":"10.1093\/acprof:oso\/9780199217274.003.0012"},{"key":"CIT0023","volume-title":"Philosophy in the flesh","volume":"4","author":"Lakoff G","year":"1999"},{"key":"CIT0024","doi-asserted-by":"publisher","DOI":"10.1016\/j.pragma.2003.10.009"},{"volume-title":"From molecule to metaphor: a neural theory of language","year":"2008","author":"Feldman J.","key":"CIT0025"},{"key":"CIT0026","doi-asserted-by":"crossref","unstructured":"Huang PY, Liu F, Shiang SR, et\u00a0al. Attention-based multimodal neural machine translation. Proceedings of the First Conference on Machine Translation (WMT16); Berlin, Germany; Vol. 2; 2016. p. 639\u2013645.","DOI":"10.18653\/v1\/W16-2360"},{"key":"CIT0027","unstructured":"Kiros R, Salakhutdinov R, Zemel RS. Unifying visual-semantic embeddings with multimodal neural language models. Preprint; 2014. arXiv:14112539."},{"key":"CIT0028","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2587640"},{"key":"CIT0029","doi-asserted-by":"crossref","unstructured":"Antol S, Agrawal A, Lu J, et\u00a0al. VQA: visual question answering. Proceedings of the IEEE International Conference on Computer Vision; Santiago, Chile; 2015. p. 2425\u20132433.","DOI":"10.1109\/ICCV.2015.279"},{"key":"CIT0030","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2016.2594134"},{"key":"CIT0031","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2019.01.007"},{"key":"CIT0032","doi-asserted-by":"publisher","DOI":"10.2307\/2268661"},{"key":"CIT0033","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(93)90015-4"},{"key":"CIT0034","unstructured":"Gelfond M, Lifschitz V. The stable model semantics for logic programming. In: Kowalski R, Bowen, Kenneth, editors. Proceedings of International Logic Programming Conference and Symposium. MIT Press; 1988. p. 1070\u20131080."},{"key":"CIT0035","doi-asserted-by":"crossref","unstructured":"Sato T. A statistical learning method for logic programs with distributional semantics. The 12th International Conference on Logic Programming; Tokyo; 1995. p. 715\u2013729.","DOI":"10.7551\/mitpress\/4298.003.0069"},{"key":"CIT0036","first-page":"254","volume":"32","author":"Muggleton S.","year":"1996","journal-title":"Adv Induct Logic Program"},{"key":"CIT0037","unstructured":"De Raedt L, Kimmig A, Toivonen H. ProbLog: a probabilistic prolog and its application in link discovery. International Joint Conference on Artificial Intelligence; 2007. p. 2468\u20132473."},{"key":"CIT0038","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-006-5833-1"},{"key":"CIT0039","first-page":"1","volume":"18","author":"Bach SH","year":"2017","journal-title":"J Mach Learn Res (JMLR)"},{"key":"CIT0040","doi-asserted-by":"publisher","DOI":"10.1145\/116825.116838"},{"key":"CIT0041","doi-asserted-by":"publisher","DOI":"10.1017\/S1471068414000076"},{"key":"CIT0042","unstructured":"Mikolov T, Sutskever I, Chen K, et\u00a0al. Distributed representations of words and phrases and their compositionality. Advances in Neural Information Processing Systems; Stateline, Nevada, USA; 2013. p. 3111\u20133119."},{"key":"CIT0043","doi-asserted-by":"crossref","unstructured":"Pennington J, Socher R, Manning CD. GloVe: global vectors for word representation. Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing; Doha, Qatar; 2014. p. 1532\u20131543.","DOI":"10.3115\/v1\/D14-1162"},{"key":"CIT0044","unstructured":"Kiros R, Zhu Y, Salakhutdinov RR, et\u00a0al. Skip-thought vectors. Advances in Neural Information Processing Systems; Montreal, Canada; 2015. p. 3294\u20133302."},{"key":"CIT0045","doi-asserted-by":"crossref","unstructured":"Conneau A, Kiela D, Schwenk H, et\u00a0al. Supervised learning of universal sentence representations from natural language inference data. Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing; Copenhagen, Denmark; 2017. p. 670\u2013680.","DOI":"10.18653\/v1\/D17-1070"},{"key":"CIT0046","unstructured":"Cohen WW. Tensorlog: a differentiable deductive database. CoRR; 2016. abs\/1605.06523."},{"key":"CIT0047","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00219"},{"key":"CIT0048","unstructured":"Wang WY, Cohen WW. Learning first-order logic embeddings via matrix factorization. Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence; New York, USA; 2016. p. 2132\u20132138."},{"key":"CIT0049","unstructured":"Bowman SR, Potts C, Manning CD. Learning distributed word representations for natural logic reasoning; 2014. p. 10\u201313."},{"key":"CIT0050","doi-asserted-by":"crossref","unstructured":"Tian R, Okazaki N, Inui K. Learning semantically and additively compositional distributional representations. Annual Meeting of the Association for Computational Linguistics; 2016. p. 1277\u20131287.","DOI":"10.18653\/v1\/P16-1121"},{"key":"CIT0051","doi-asserted-by":"crossref","unstructured":"Yanaka H, Mineshima K, Mart\u00ednez-G\u00f3mez P, et\u00a0al. Determining semantic textual similarity using natural deduction proofs. Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing; Copenhagen, Denmark; 2017 Sep. Association for Computational Linguistics. p. 681\u2013691.","DOI":"10.18653\/v1\/D17-1071"},{"key":"CIT0052","unstructured":"Rockt\u00e4schel T, Riedel S. End-to-end differentiable proving. 2017. p. 3788\u20133800."},{"key":"CIT0053","doi-asserted-by":"crossref","unstructured":"Modi A. Event embeddings for semantic script modeling. Proceedings of the 20th SIGNLL Conference on Computational Natural Language Learning; Berlin, Germany; 2016. p. 75\u201383.","DOI":"10.18653\/v1\/K16-1008"},{"key":"CIT0054","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2018.2807452"},{"key":"CIT0055","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2017.2754499"},{"key":"CIT0056","unstructured":"Weber N, Balasubramanian N, Chambers N. Event representations with tensor-based compositions. CoRR; 2017. abs\/1711.07611."},{"key":"CIT0057","unstructured":"Bordes A, Usunier N, Garcia-Duran A, et\u00a0al. Translating embeddings for modeling multi-relational data. Advances in Neural Information Processing Systems 26 (NIPS); Stateline, Nevada, USA; 2013. p. 2787\u20132795."},{"key":"CIT0058","unstructured":"Jurafsky D, Martin JH. Speech and language processing. 2nd ed. Upper Saddle River, NJ: Prentice Hall; 2008. (Prentice hall series in artificial intelligence)."},{"key":"CIT0059","doi-asserted-by":"crossref","unstructured":"Kong L, Rush AM, Smith NA. Transforming dependencies into phrase structures. Denver, CO: North American Chapter of the Association for Computational Linguistics; 2015. p. 788\u2013798.","DOI":"10.3115\/v1\/N15-1080"},{"key":"CIT0060","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/6591.001.0001","volume-title":"The syntactic process","author":"Steedman M.","year":"2000"},{"key":"CIT0061","unstructured":"Shindo H, Miyao Y, Fujino A, et\u00a0al. Bayesian symbol-refined tree substitution grammars for syntactic parsing. Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics; Jeju Island, Korea; 2012. p. 440\u2013448."},{"key":"CIT0062","doi-asserted-by":"crossref","unstructured":"Matsuzaki T, Miyao Y, Tsujii J. Probabilistic CFG with latent annotations. Association for Computational Linguistics; Michigan, USA; 2005. p. 75\u201382.","DOI":"10.3115\/1219840.1219850"},{"key":"CIT0063","doi-asserted-by":"crossref","unstructured":"Klein D, Manning C. Corpus-based induction of syntactic structure: Models of dependency and constituency. Annual Conference of Association for Computational Linguistics; Barcelona, Spain; 2004. p. 478\u2013485.","DOI":"10.3115\/1218955.1219016"},{"key":"CIT0064","doi-asserted-by":"crossref","unstructured":"Headden WP III, Johnson M, McClosky D. Improving unsupervised dependency parsing with richer contexts and smoothing. Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies; Boulder, CO, USA; 2009. p. 101\u2013109.","DOI":"10.3115\/1620754.1620769"},{"key":"CIT0065","unstructured":"Spitkovsky VI, Alshawi H, Jurafsky D. From baby steps to leapfrog: how \u201cless is more\u201d in unsupervised pependency parsing. Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies; Los Angeles, USA; 2010. p. 751\u2013759."},{"key":"CIT0066","doi-asserted-by":"crossref","unstructured":"Jiang Y, Han W, Tu K. Unsupervised neural dependency parsing. Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing; Texas, USA; 2016. p. 763\u2013771.","DOI":"10.18653\/v1\/D16-1073"},{"key":"CIT0067","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0019467"},{"key":"CIT0068","unstructured":"Johnson M, Griffiths T, Goldwater S. Bayesian inference for PCFGs via Markov chain Monte Carlo. Human Language Technologies 2007: The Conference of the North American Chapter of the Association for Computational Linguistics; Proceedings of the Main Conference; Rochester, NY, USA; 2007. p. 139\u2013146."},{"key":"CIT0069","unstructured":"Pate JK, Johnson M. Grammar induction from (lots of) words alone. International Conference on Computational Linguistics; Osaka, Japan; 2016. p. 23\u201332."},{"key":"CIT0070","unstructured":"Levy RP, Reali F, Griffiths TL. Modeling the effects of memory on human online sentence processing with particle filters. Advances in Neural Information Processing Systems 21; Vancouver, BC, Canada; 2009. p. 937\u2013944."},{"key":"CIT0071","doi-asserted-by":"crossref","unstructured":"Hockenmaier J, Steedman M. Generative models for statistical parsing with combinatory categorial grammar. Annual Meeting of the Association for Computational Linguistics; Philadelphia, Pennsylvania, USA; 2002. p. 335\u2013342.","DOI":"10.3115\/1073083.1073139"},{"key":"CIT0072","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00211"},{"key":"CIT0073","doi-asserted-by":"publisher","DOI":"10.1198\/016214506000000302"},{"key":"CIT0074","unstructured":"Liang P, Petrov S, Jordan M, et\u00a0al. The infinite PCFG using hierarchical Dirichlet processes. Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL); Prague, Czech Republic; 2007. p. 688\u2013697."},{"key":"CIT0075","doi-asserted-by":"crossref","unstructured":"Mart\u00ednez-G\u00f3mez P, Mineshima K, Miyao Y, et\u00a0al. ccg2lambda: a compositional semantics system. ACL-2016 System Demonstrations; 2016. p. 85\u201390.","DOI":"10.18653\/v1\/P16-4015"},{"key":"CIT0076","unstructured":"Bansal M, Matuszek C, Andreas J, et\u00a0al. Proceedings of the first workshop on language grounding for robotics; 2017. Available from: https:\/\/robonlp2017.github.io."},{"key":"CIT0077","unstructured":"Poon H. Grounded unsupervised semantic parsing. ACL 2013; 2013. p. 933\u2013943."},{"key":"CIT0078","doi-asserted-by":"crossref","unstructured":"Poon H, Domingos P. Unsupervised semantic parsing. Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing; Singapore; 2009. p. 1\u201310.","DOI":"10.3115\/1699510.1699512"},{"key":"CIT0079","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00177"},{"key":"CIT0080","doi-asserted-by":"crossref","unstructured":"Vinyals O, Toshev A, Bengio S, et\u00a0al. Show and tell: a neural image caption generator. IEEE\/CVF Conference on Computer Vision and Pattern Recognition; Boston, MA, USA; 2015. p. 3156\u20133164.","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"CIT0081","unstructured":"Xu K, Ba J, Kiros R, et\u00a0al. Show, attend and tell: neural image caption generation with visual attention. International Conference on Machine Learning (ICML); Lille, France; 2015. p. 2048\u20132057."},{"key":"CIT0082","doi-asserted-by":"crossref","unstructured":"Karpathy A, Fei-Fei L. Deep visual-semantic alignments for generating image descriptions. IEEE\/CVF Conference on Computer Vision and Pattern Recognition; Boston, MA, USA; 2015. p. 3128\u20133137.","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"CIT0083","doi-asserted-by":"crossref","unstructured":"Thomason J, Sinapov J, Mooney RJ, et\u00a0al. Guiding exploratory behaviors for multi-modal grounding of linguistic descriptions. AAAI; 2018.","DOI":"10.18653\/v1\/W17-2803"},{"key":"CIT0084","doi-asserted-by":"crossref","unstructured":"Amiri S, Wei S, Zhang S, et\u00a0al. Multi-modal predicate identification using dynamically learned robot controllers. Proceedings of the 25th International Joint Conference on Artificial Intelligence; Stockholm, Sweden; 2018. p. 4638\u20134645.","DOI":"10.24963\/ijcai.2018\/645"},{"key":"CIT0085","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2016.1172507"},{"key":"CIT0086","doi-asserted-by":"crossref","unstructured":"Aly A, Taniguchi T, Mochihashi D. A probabilistic approach to unsupervised induction of combinatory categorial grammar in situated human-robot interaction. IEEE-RAS 18th International Conference on Humanoid Robots; Beijing, China; 2018. p. 1\u20139.","DOI":"10.1109\/HUMANOIDS.2018.8625009"},{"key":"CIT0087","doi-asserted-by":"publisher","DOI":"10.1075\/clip.2"},{"key":"CIT0088","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780199266647.001.0001","volume-title":"Linguistic categorization","author":"Taylor JR.","year":"2003"},{"key":"CIT0089","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511803864"},{"key":"CIT0090","doi-asserted-by":"publisher","DOI":"10.1086\/204009"},{"key":"CIT0091","doi-asserted-by":"publisher","DOI":"10.1207\/s15516709cog1104_2"},{"key":"CIT0092","doi-asserted-by":"crossref","unstructured":"Fillmore CJ. An alternative to checklist theories of meaning. Annual Meeting of the Berkeley Linguistics Society; Vol. 1; 1975. p. 123\u2013131.","DOI":"10.3765\/bls.v1i0.2315"},{"key":"CIT0093","unstructured":"Fillmore CJ. Frame semantics. Seoul: Hanshin Publishing Co.; 1982. p. 111\u2013137."},{"key":"CIT0094","doi-asserted-by":"publisher","DOI":"10.1111\/tops.12102"},{"key":"CIT0095","doi-asserted-by":"publisher","DOI":"10.1098\/rstb.2017.0131"},{"key":"CIT0096","unstructured":"Akira U. A distributional semantic model of visually indirect grounding for abstract words. Proceedings of NIPS 2018, Workshop on Visually Grounded Interaction and Language (ViGIL); Montreal, Canada; 2018."},{"key":"CIT0097","doi-asserted-by":"publisher","DOI":"10.3758\/BF03196968"},{"key":"CIT0098","unstructured":"Krizhevsky A, Sutskever I, Hinton GE. Imagenet classification with deep convolutional neural networks. Advances in Neural Information Processing Systems; Stateline, Nevada, USA; 2012. p. 1097\u20131105."},{"key":"CIT0099","doi-asserted-by":"crossref","unstructured":"Zeiler MD, Fergus R. Visualizing and understanding convolutional networks. European Conference on Computer Vision; Springer; 2014. p. 818\u2013833.","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"CIT0100","unstructured":"Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition. International Conference on Learning Representations; San Diego, CA; 2015."},{"key":"CIT0101","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, et\u00a0al. Deep residual learning for image recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; Las Vegas, Nevada; 2016. p. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"CIT0102","doi-asserted-by":"crossref","unstructured":"Szegedy C, Ioffe S, Vanhoucke V, et\u00a0al. Inception-v4, inception-resnet and the impact of residual connections on learning. AAAI; Vol. 4; 2017. p. 12.","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"CIT0103","unstructured":"Fergus R, Perona P, Zisserman A. Object class recognition by unsupervised scale-invariant learning. IEEE Conference on Computer Vision and Pattern Recognition; Madison, Wisconsin, USA; Vol. 2; 2003. p. 264\u2013271."},{"key":"CIT0104","unstructured":"Sivic J, Russell BC, Efros AA, et\u00a0al. Discovering object categories in image collections. IEEE International Conference on Computer Vision; Beijing, China; 2005. p. 17\u201320."},{"key":"CIT0105","unstructured":"Fei-Fei L. A Bayesian Hierarchical model for learning natural scene categories. IEEE Conference on Computer Vision and Pattern Recognition; San Diego, CA, USA; 2005. p. 524\u2013531."},{"key":"CIT0106","unstructured":"Wang C, Blei D, Fei-Fei L. Simultaneous image classification and annotation. IEEE Conference on Computer Vision and Pattern Recognition; Miami Beach, FL, USA; 2009. p. 1903\u20131910."},{"key":"CIT0107","unstructured":"Krause A, Perona P, Gomes RG. Discriminative clustering by regularized information maximization. Advances in Neural Information Processing Systems; Vancouver, Canada; 2010. p. 775\u2013783."},{"key":"CIT0108","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2353617"},{"key":"CIT0109","doi-asserted-by":"publisher","DOI":"10.1162\/1064546053278973"},{"key":"CIT0110","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2004.03.011"},{"key":"CIT0111","doi-asserted-by":"crossref","unstructured":"Ridge B, Skocaj D, Leonardis A. Self-supervised cross-modal online learning of basic object affordances for developmental robotic systems. IEEE International Conference on Robotics and Automation; Anchorage, Alaska, USA; 2010. p. 5047\u20135054.","DOI":"10.1109\/ROBOT.2010.5509544"},{"key":"CIT0112","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2010.05.002"},{"key":"CIT0113","doi-asserted-by":"publisher","DOI":"10.1177\/1059712313488423"},{"key":"CIT0114","doi-asserted-by":"crossref","unstructured":"Mangin O, Oudeyer PY. Learning semantic components from subsymbolic multimodal perception. IEEE Third Joint International Conference on Development and Learning and Epigenetic Robotics; Osaka, Japan; 2013. p. 1\u20137.","DOI":"10.1109\/DevLrn.2013.6652563"},{"key":"CIT0115","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0140732"},{"key":"CIT0116","doi-asserted-by":"crossref","unstructured":"Chen Y, Filliat D. Cross-situational noun and adjective learning in an interactive scenario. Joint IEEE International Conference on Development and Learning and Epigenetic Robotics; Providence, Rhode Island, USA; 2015. p. 129\u2013134.","DOI":"10.1109\/DEVLRN.2015.7346129"},{"key":"CIT0117","doi-asserted-by":"publisher","DOI":"10.1177\/1059712313497976"},{"key":"CIT0118","doi-asserted-by":"publisher","DOI":"10.1162\/jmlr.2003.3.4-5.993"},{"key":"CIT0119","unstructured":"Nakamura T, Nagai T, Iwahashi N. Multimodal object categorization by a robot. IEEE\/RSJ International Conference on Intelligent Robots and Systems; San Diego, CA; 2007. p. 2415\u20132420."},{"issue":"4","key":"CIT0120","first-page":"285","volume":"8","author":"Taniguchi A","year":"2016","journal-title":"IEEE Trans Cogn Dev Sys"},{"key":"CIT0121","doi-asserted-by":"crossref","unstructured":"Taniguchi A, Hagiwara Y, Taniguchi T, et\u00a0al. Online spatial concept and lexical acquisition with simultaneous localization and mapping. IEEE\/RSJ International Conference on Intelligent Robots and Systems; Vancouver, BC, Canada; 2017. p. 811\u2013818.","DOI":"10.1109\/IROS.2017.8202243"},{"key":"CIT0122","doi-asserted-by":"publisher","DOI":"10.1163\/016918611X595035"},{"key":"CIT0123","first-page":"277","volume":"22","author":"Barsalou LW.","year":"1999","journal-title":"Behav Brain Sci"},{"volume-title":"Louder than words: the new science of how the mind makes meaning","year":"2012","author":"Bergen BK.","key":"CIT0124"},{"key":"CIT0125","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2016.2552579"},{"key":"CIT0126","doi-asserted-by":"crossref","unstructured":"Mochihashi D, Yamada T, Ueda N. Bayesian unsupervised word segmentation with nested Pitman-Yor language modeling. Joint Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing; Singapore; Vol. 1; 2009. p. 100\u2013108.","DOI":"10.3115\/1687878.1687894"},{"key":"CIT0127","doi-asserted-by":"crossref","unstructured":"Yan Z, Zhang H, Piramuthu R, et\u00a0al. Hd-cnn: hierarchical deep convolutional neural networks for large scale visual recognition. Proceedings of the IEEE International Conference on Computer Vision; Beijing, China; 2015. p. 2740\u20132748.","DOI":"10.1109\/ICCV.2015.314"},{"key":"CIT0128","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-017-5443-x"},{"key":"CIT0129","doi-asserted-by":"publisher","DOI":"10.1145\/1667053.1667056"},{"key":"CIT0130","doi-asserted-by":"crossref","unstructured":"Ando Y, Nakamura T, Araki T, et\u00a0al. Formation of hierarchical object concept using hierarchical latent Dirichlet allocation. IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS); Tokyo, Japan; 2013. p. 2272\u20132279.","DOI":"10.1109\/IROS.2013.6696674"},{"key":"CIT0131","doi-asserted-by":"publisher","DOI":"10.3389\/fnbot.2018.00011"},{"key":"CIT0132","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2009.167"},{"key":"CIT0133","doi-asserted-by":"crossref","unstructured":"Chen X, Shrivastava A, Gupta A. Neil: extracting visual knowledge from web data. IEEE International Conference on Computer Vision; Sydney, Australia; 2013. p. 1409\u20131416.","DOI":"10.1109\/ICCV.2013.178"},{"key":"CIT0134","doi-asserted-by":"crossref","unstructured":"Taigman Y, Yang M, Ranzato M, et\u00a0al. Deepface: closing the gap to human-level performance in face verification. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; Columbus, Ohio, USA; 2014. p. 1701\u20131708.","DOI":"10.1109\/CVPR.2014.220"},{"key":"CIT0135","doi-asserted-by":"crossref","unstructured":"Cao Z, Simon T, Wei SE, et\u00a0al. Realtime multi-person 2D pose estimation using part affinity fields. IEEE Conference on Computer Vision and Pattern Recognition; Honolulu, HI, USA; Vol. 1; 2017. p. 7.","DOI":"10.1109\/CVPR.2017.143"},{"key":"CIT0136","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2738644"},{"key":"CIT0137","doi-asserted-by":"publisher","DOI":"10.7208\/chicago\/9780226470993.001.0001"},{"volume-title":"The way we think: conceptual blending and the mind's hidden complexities","year":"2008","author":"Fauconnier G","key":"CIT0138"},{"key":"CIT0139","unstructured":"Grady J. Foundations of meaning: primary metaphors and primary stress. 1997. Retrieved from https:\/\/escholarship.org\/uc\/item\/3g9427m2"},{"volume-title":"Cobuild guides to English 7: metaphor","year":"1995","author":"Deignan A.","key":"CIT0140"},{"volume-title":"Metaphors dictionary","year":"2001","author":"Sommer E","key":"CIT0141"},{"key":"CIT0142","doi-asserted-by":"publisher","DOI":"10.4324\/9780203945643"},{"volume-title":"Shogakukan dictionary of English lexical polysemy","year":"2007","author":"Seto KI","key":"CIT0143"},{"key":"CIT0144","doi-asserted-by":"publisher","DOI":"10.1075\/cf.8.2.01pet"},{"key":"CIT0145","doi-asserted-by":"publisher","DOI":"10.1093\/ijl\/16.3.235"},{"key":"CIT0146","doi-asserted-by":"publisher","DOI":"10.1075\/celcr.14"},{"volume-title":"A bilingual dictionary of English and Japanese metaphors","year":"2017","author":"Makino S","key":"CIT0147"},{"volume-title":"Metaphor and emotion: language, culture, and body in human feeling","year":"2003","author":"K\u00f6vecses Z.","key":"CIT0148"},{"key":"CIT0149","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511614408"},{"key":"CIT0150","doi-asserted-by":"publisher","DOI":"10.1007\/s00429-005-0039-z"},{"key":"CIT0151","doi-asserted-by":"publisher","DOI":"10.1016\/0926-6410(95)00038-0"},{"key":"CIT0152","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2015.2487837"},{"issue":"2","key":"CIT0153","doi-asserted-by":"crossref","first-page":"128","DOI":"10.5964\/bioling.8721","volume":"3","author":"Fujita K.","year":"2009","journal-title":"Biolinguistics"},{"key":"CIT0154","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2016.2594134"},{"key":"CIT0155","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2016.2614992"},{"key":"CIT0156","first-page":"70","volume":"3","author":"Horton TE","year":"2012","journal-title":"AVANT"},{"volume-title":"The ecological approach to visual perception","year":"1979","author":"Gibson JJ.","key":"CIT0157"},{"key":"CIT0158","doi-asserted-by":"publisher","DOI":"10.1177\/1059712307084689"},{"key":"CIT0159","unstructured":"Stoytchev A. Behavior-grounded representation of tool affordances. Proceedings of IEEE International Conference on Robotics and Automation; Barcelona, Spain; 2005. p. 3071\u20133076."},{"key":"CIT0160","doi-asserted-by":"crossref","unstructured":"Stoytchev A. Learning the affordances of tools using a behavior-grounded approach. In: Rome E, Hertzberg J, Dorffner G, editors. Towards affordance-based robot control. Berlin, Heidelberg: Springer; 2008. p. 140\u2013158.","DOI":"10.1007\/978-3-540-77915-5_10"},{"key":"CIT0161","doi-asserted-by":"crossref","unstructured":"Nakamura T, Nagai T. Forming object concept using bayesian network. 2010 Aug.","DOI":"10.5772\/10075"},{"key":"CIT0162","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"CIT0163","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-30301-5_60"},{"key":"CIT0164","doi-asserted-by":"publisher","DOI":"10.2200\/S00568ED1V01Y201402AIM028"},{"key":"CIT0165","doi-asserted-by":"publisher","DOI":"10.3389\/fnbot.2017.00067"},{"key":"CIT0166","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2016.2550591"},{"key":"CIT0167","doi-asserted-by":"crossref","unstructured":"Taniguchi T, Nagasaka S. Double articulation analyzer for unsegmented human motion using Pitman-Yor language model and infinite hidden Markov model. IEEE\/SICE International Symposium on System Integration; Kyoto, Japan; 2011. p. 250\u2013255.","DOI":"10.1109\/SII.2011.6147455"},{"key":"CIT0168","doi-asserted-by":"publisher","DOI":"10.1016\/S1364-6613(99)01327-3"},{"key":"CIT0169","doi-asserted-by":"crossref","unstructured":"Yokoya R, Ogata T, Tani J, et\u00a0al. Experience based imitation using rnnpb. 2006 IEEE\/RSJ International Conference on Intelligent Robots and Systems; Beijing, China; 2006. p. 3669\u20133674.","DOI":"10.1109\/IROS.2006.281724"},{"key":"CIT0170","unstructured":"Ho J, Ermon S. Generative adversarial imitation learning. In: Lee DD, Sugiyama M, Luxburg UV, et\u00a0al., editors. Advances in neural information processing systems 29. Curran Associates, Inc.; 2016. p. 4565\u20134573."},{"key":"CIT0171","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"CIT0172","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"CIT0173","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"CIT0174","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, et\u00a0al. Continuous control with deep reinforcement learning. CoRR; 2016. abs\/1509.02971."},{"issue":"1","key":"CIT0175","first-page":"1334","volume":"17","author":"Levine S","year":"2016","journal-title":"J Mach Learn Res(JMLR)"},{"key":"CIT0176","unstructured":"Hermann KM, Hill F, Green S, et\u00a0al. Grounded language learning in a simulated 3D world. CoRR; 2017. abs\/1706.06551."},{"key":"CIT0177","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-017-0468-y"},{"key":"CIT0178","doi-asserted-by":"publisher","DOI":"10.1017\/S0140525X00071235"},{"key":"CIT0179","doi-asserted-by":"publisher","DOI":"10.9793\/elsj1984.24.78"},{"key":"CIT0180","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2014.01.001"},{"key":"CIT0181","unstructured":"Garagnani M, Shastri L, Wendelken C. A connectionist model of planning via back-chaining search. Proceedings of the Annual Meeting of the Cognitive Science Society; California, USA; Vol. 24; 2002."},{"key":"CIT0182","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2018.07.006"},{"key":"CIT0183","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2852838"},{"key":"CIT0184","doi-asserted-by":"publisher","DOI":"10.1093\/acprof:oso\/9780198245537.001.0001"},{"volume-title":"Studies in the way of words","year":"1989","author":"Grice P.","key":"CIT0185"},{"volume-title":"Relevance: communication and cognition","year":"1986","author":"Sperber D","key":"CIT0186"},{"volume-title":"Understanding computers and cognition","year":"1986","author":"Winograd T","key":"CIT0187"},{"key":"CIT0188","unstructured":"Heidegger M. Being and time. Oxford, UK: BLACI WELL; 1927."},{"key":"CIT0189","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780195157338.001.0001","volume-title":"Mind: a brief introduction","author":"Searle JR.","year":"2004"},{"key":"CIT0190","unstructured":"Lakoff G. Woman, fire, and dangerous things. What Categories Reveal about the Mind. Chicago and London: The University of Chicago Press; 1987."},{"key":"CIT0191","unstructured":"Langacker R. Concept, image, and symbol: the cognitive basis of grammar. Berlin: Mouton de Gruyter; 1991."},{"key":"CIT0192","doi-asserted-by":"crossref","unstructured":"Iwahashi N. Robots that learn language: Developmental approach to human-machine conversations. Symbol grounding and beyond. Rome, Italy: Springer; 2006. p. 143\u2013167.","DOI":"10.1007\/11880172_12"},{"key":"CIT0193","doi-asserted-by":"publisher","DOI":"10.5772\/5188"},{"key":"CIT0194","unstructured":"Iwahashi N, Sugiura K, Taguchi R, et\u00a0al. Robots that learn to communicate: a developmental approach to personally and physically situated human-robot conversations. AAAI Fall Symposium: Dialog with Robots; 2010."},{"key":"CIT0195","doi-asserted-by":"publisher","DOI":"10.7210\/jrsj.28.978"},{"key":"CIT0196","doi-asserted-by":"publisher","DOI":"10.1163\/016918611X595044"},{"key":"CIT0197","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-011-9605-1"},{"key":"CIT0198","doi-asserted-by":"crossref","unstructured":"Araki T, Nakamura T, Nagai T, et\u00a0al. Online learning of concepts and words using multimodal LDA and hierarchical Pitman-Yor Language Model. IEEE\/RSJ International Conference on Intelligent Robots and Systems; Vilamoura, Algarve, Portugal; 2012. p. 1623\u20131630.","DOI":"10.1109\/IROS.2012.6385812"},{"key":"CIT0199","doi-asserted-by":"crossref","unstructured":"Nakamura T, Nagai T, Funakoshi K, et\u00a0al. Mutual Learning of an Object Concept and Language Model Based on MLDA and NPYLM. IEEE\/RSJ International Conference on Intelligent Robots and Systems; Chicago, IL USA; 2014. p. 600\u2013607.","DOI":"10.1109\/IROS.2014.6942621"},{"key":"CIT0200","doi-asserted-by":"publisher","DOI":"10.3389\/fnbot.2018.00025"},{"key":"CIT0201","doi-asserted-by":"publisher","DOI":"10.1126\/science.298.5598.1569"},{"key":"CIT0202","volume-title":"Rethinking innateness: a connectionist perspective on development","volume":"10","author":"Elman JL","year":"1998"},{"key":"CIT0203","unstructured":"Wu Y, Schuster M, Chen Z, et\u00a0al. Google's neural machine translation system: bridging the gap between human and machine translation. Preprint; 2016. arXiv:160908144."},{"key":"CIT0204","unstructured":"Lowe R, Pow N, Serban I, et\u00a0al. Incorporating unstructured textual knowledge sources into neural dialogue systems. Neural information processing systems workshop on machine learning for spoken language understanding; Montreal, Quebec, Canada; 2015."},{"key":"CIT0205","doi-asserted-by":"publisher","DOI":"10.1111\/j.1750-8606.2007.00003.x"},{"key":"CIT0206","doi-asserted-by":"publisher","DOI":"10.1207\/s15327078in0803_4"},{"volume-title":"Robots that learn language: a developmental approach to situated human-robot conversations","year":"2007","author":"Iwahashi N.","key":"CIT0207"},{"volume-title":"Collected works of M. A. K. Halliday","year":"2009","author":"Halliday MAK.","key":"CIT0208"},{"volume-title":"Halliday: system and function in language: selected papers","year":"1977","author":"Halliday MAK.","key":"CIT0209"},{"volume-title":"Language as social semiotic: the social interpretation of language and meaning","year":"1978","author":"Halliday MAK.","key":"CIT0210"},{"volume-title":"The meaning of meaning","year":"1923","author":"Malinowski B.","key":"CIT0211"},{"volume-title":"Language, context, and text: aspects of language in a social-semiotic perspective","year":"1991","author":"Halliday MAK","key":"CIT0212"},{"key":"CIT0213","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-009-8947-4"},{"key":"CIT0214","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, et\u00a0al. ImageNet: a large-scale hierarchical image database. 2009 IEEE Conference on Computer Vision and Pattern Recognition; Miami, Florida; 2009. p. 248\u2013255.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"CIT0215","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.128"},{"key":"CIT0216","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-007-0090-8"},{"key":"CIT0217","unstructured":"Lin Ty, Zitnick CL, Doll P. Microsoft COCO: common objects in context. p. 1\u201315."},{"key":"CIT0218","doi-asserted-by":"crossref","unstructured":"Quattoni A, Torralba A. Recognizing indoor scenes. 2009 IEEE Conference on Computer Vision and Pattern Recognition; Miami, Florida; 2009. p. 413\u2013420.","DOI":"10.1109\/CVPR.2009.5206537"},{"key":"CIT0219","doi-asserted-by":"crossref","unstructured":"Xiao J, Hays J, Ehinger KA, et\u00a0al. SUN database: large-scale scene recognition from abbey to zoo. Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition; San Francisco, CA; 2010. p. 3485\u20133492.","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"CIT0220","unstructured":"Zhou B, Lapedriza A, Xiao J, et\u00a0al. Learning deep features for scene recognition using places database. Advances in Neural Information Processing Systems (NIPS); Montr\u00e9al, Canada; 2014. p. 487\u2013495."},{"key":"CIT0221","doi-asserted-by":"publisher","DOI":"10.1089\/big.2016.0028"},{"key":"CIT0222","doi-asserted-by":"publisher","DOI":"10.1186\/s40648-014-0022-7"},{"key":"CIT0223","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00207"},{"key":"CIT0224","doi-asserted-by":"crossref","unstructured":"Rohrbach A, Rohrbach M, Qiu W, et\u00a0al. Coherent multi-sentence video description with variable level of detail. 2014. p. 184\u2013195. (Lecture notes in computer science; 8753. (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)).","DOI":"10.1007\/978-3-319-11752-2_15"},{"key":"CIT0225","doi-asserted-by":"crossref","unstructured":"Sigurdsson GA, Varol G, Wang X, et\u00a0al. Hollywood in homes: crowdsourcing data collection for activity understanding. 2016. p. 510\u2013526. (Lecture notes in computer science; 9905 LNCS. (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)).","DOI":"10.1007\/978-3-319-46448-0_31"},{"key":"CIT0226","unstructured":"Abu-El-Haija S, Kothari N, Lee J, et\u00a0al. Youtube-8m: a large-scale video classification benchmark. CoRR; 2016. abs\/1609.08675."},{"key":"CIT0227","unstructured":"Agrawal A, Lu J, Antol S, et\u00a0al. VQA: visual question answering. p. 1\u201325."},{"key":"CIT0228","doi-asserted-by":"crossref","unstructured":"Tapaswi M, Zhu Y, Stiefelhagen R, et\u00a0al. MovieQA: understanding stories in movies through question-answering. IEEE Conference on Computer Vision and Pattern Recognition; Las Vegas, NV; 2016. p. 4631\u20134640.","DOI":"10.1109\/CVPR.2016.501"},{"key":"CIT0229","doi-asserted-by":"crossref","unstructured":"Das A, Datta S, Gkioxari G, et\u00a0al. Embodied question answering. IEEE\/CVF Conference on Computer Vision and Pattern Recognition; Salt Lake City, Utah; 2018. p. 1\u201310.","DOI":"10.1109\/CVPR.2018.00008"},{"key":"CIT0230","unstructured":"Hermann KM, Hill F, Green S, et\u00a0al. Grounded language learning in a simulated 3D world. CoRR; 2017. abs\/1706.06551."},{"key":"CIT0231","unstructured":"MacMahon M, Stankiewicz B, Kuipers B. Walk the talk: connecting language, knowledge, and action in route instructions. AAAI Conference on Artificial Intelligence (AAAI); Boston, MA; 2006. p. 1475\u20131482."},{"key":"CIT0232","doi-asserted-by":"crossref","unstructured":"Mei H, Bansal M, Walter MR. Listen, attend, and walk: neural mapping of navigational instructions to action sequences. AAAI Conference on Artificial Intelligence (AAAI); Phoenix, AZ; 2016.","DOI":"10.1609\/aaai.v30i1.10364"},{"key":"CIT0233","unstructured":"de Vries H, Shuster K, Batra D, et\u00a0al. Talk the walk: navigating New York city through grounded dialogue. CoRR; 2018. abs\/1807.03367."},{"key":"CIT0234","doi-asserted-by":"crossref","unstructured":"Inamura T, Mizuchi Y. Robot competition to evaluate guidance skill for general users in VR environment. International Conference on Human-Robot Interaction; Daegu, Korea; 2019.","DOI":"10.1109\/HRI.2019.8673218"},{"key":"CIT0235","unstructured":"Beattie C, Leibo JZ, Teplyashin D, et\u00a0al. Deepmind lab. CoRR; 2016. abs\/1612.03801."},{"key":"CIT0236","unstructured":"Brockman G, Cheung V, Pettersson L, et\u00a0al. OpenAI gym; 2016."},{"key":"CIT0237","unstructured":"Brodeur S, Perez E, Anand A, et\u00a0al. Home: a household multimodal environment. CoRR; 2017. abs\/1711.11017."},{"key":"CIT0238","unstructured":"Kolve E, Mottaghi R, Gordon D, et\u00a0al. AI2-THOR: an interactive 3D environment for visual AI. CoRR; 2017. abs\/1712.05474."},{"key":"CIT0239","unstructured":"Savva M, Chang AX, Dosovitskiy A, et\u00a0al. MINOS: multimodal indoor simulator for navigation in complex environments. CoRR; 2017. abs\/1712.03931."},{"issue":"1","key":"CIT0240","first-page":"39","volume":"3","author":"Orkin J","year":"2007","journal-title":"J Game Dev (JOGD)"},{"key":"CIT0241","doi-asserted-by":"publisher","DOI":"10.5898\/JHRI.2.1.Breazeal"},{"key":"CIT0242","doi-asserted-by":"crossref","unstructured":"Inamura T, Shibata T, Sena H, et\u00a0al. Simulator platform that enables social interaction simulation \u2013 SIGVerse: SocioIntelliGenesis simulator. IEEE\/SICE International Symposium on System Integration; Sendai, Japan; 2010. p. 212\u2013217.","DOI":"10.1109\/SII.2010.5708327"},{"key":"CIT0243","doi-asserted-by":"crossref","unstructured":"Mizuchi Y, Inamura T. Cloud-based multimodal human-robot interaction simulator utilizing ROS and unity frameworks. IEEE\/SICE International Symposium on System Integration; Taipei, Taiwan; 2017. p. 948\u2013955.","DOI":"10.1109\/SII.2017.8279345"},{"key":"CIT0244","unstructured":"Quigley M, Conley K, Gerkey BP, et\u00a0al. ROS: an open-source Robot Operating System; 2009."},{"key":"CIT0245","doi-asserted-by":"crossref","unstructured":"Van Der Zant T, Iocchi L. RoboCup@Home: adaptive benchmarking of robot bodies and minds. 2011. p. 214\u2013225. (Lecture notes in computer science; Vol. 7072 LNAI (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)).","DOI":"10.1007\/978-3-642-25504-5_22"},{"key":"CIT0246","doi-asserted-by":"crossref","unstructured":"Dinan E, Logacheva V, Malykh V, et\u00a0al. The second conversational intelligence challenge (convai2). CoRR; 2019. abs\/1902.00098.","DOI":"10.1007\/978-3-030-29135-8_7"},{"key":"CIT0247","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2018.09.004"}],"container-title":["Advanced Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/tandfonline.com\/doi\/pdf\/10.1080\/01691864.2019.1632223","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/01691864.2019.1632223","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,20]],"date-time":"2024-07-20T06:16:53Z","timestamp":1721456213000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/01691864.2019.1632223"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,6,24]]},"references-count":247,"journal-issue":{"issue":"15-16","published-print":{"date-parts":[[2019,8,18]]}},"alternative-id":["10.1080\/01691864.2019.1632223"],"URL":"https:\/\/doi.org\/10.1080\/01691864.2019.1632223","relation":{},"ISSN":["0169-1864","1568-5535"],"issn-type":[{"type":"print","value":"0169-1864"},{"type":"electronic","value":"1568-5535"}],"subject":[],"published":{"date-parts":[[2019,6,24]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tadr20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tadr20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2019-03-06","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2019-05-29","order":1,"name":"revised","label":"Revised","group":{"name":"publication_history","label":"Publication History"}},{"value":"2019-06-02","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2019-06-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}