{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,16]],"date-time":"2025-04-16T05:39:07Z","timestamp":1744781947423,"version":"3.37.3"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2021,8,30]],"date-time":"2021-08-30T00:00:00Z","timestamp":1630281600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,8,30]],"date-time":"2021-08-30T00:00:00Z","timestamp":1630281600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100000104","name":"National Aeronautics and Space Administration","doi-asserted-by":"publisher","award":["NNX16AR61G"],"award-info":[{"award-number":["NNX16AR61G"]}],"id":[{"id":"10.13039\/100000104","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Science Foundation","award":["IIS-1652561","IIS-1717569"],"award-info":[{"award-number":["IIS-1652561","IIS-1717569"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Robot"],"published-print":{"date-parts":[[2022,1]]},"DOI":"10.1007\/s10514-021-10008-7","type":"journal-article","created":{"date-parts":[[2021,8,30]],"date-time":"2021-08-30T09:03:56Z","timestamp":1630314236000},"page":"83-98","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Affordance-based robot object retrieval"],"prefix":"10.1007","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5016-2230","authenticated-orcid":false,"given":"Thao","family":"Nguyen","sequence":"first","affiliation":[]},{"given":"Nakul","family":"Gopalan","sequence":"additional","affiliation":[]},{"given":"Roma","family":"Patel","sequence":"additional","affiliation":[]},{"given":"Matt","family":"Corsaro","sequence":"additional","affiliation":[]},{"given":"Ellie","family":"Pavlick","sequence":"additional","affiliation":[]},{"given":"Stefanie","family":"Tellex","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,8,30]]},"reference":[{"key":"10008_CR1","doi-asserted-by":"crossref","unstructured":"Calli, B.,Singh, A., Walsman, A., Srinivasa, S., Abbeel, P., and Dollar, A.,M. (2015). The YCB object and Model set: Towards common benchmarks for manipulation research. In Proceedings of the IEEE international conference on advanced robotics, pp. 510\u2013517.","DOI":"10.1109\/ICAR.2015.7251504"},{"key":"10008_CR2","doi-asserted-by":"crossref","unstructured":"Chao, Y-W., Wang Z., Mihalcea R., and Deng, J. (2015). Mining semantic affordances of visual object categories. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 4259\u20134267.","DOI":"10.1109\/CVPR.2015.7299054"},{"key":"10008_CR3","doi-asserted-by":"crossref","unstructured":"Chen, K., Choy, C. B., Savva, M., Chang, A. X., Funkhouser, T and Savarese, S. (2018). Text2Shape: Generating shapes from natural language by learning joint embeddings. In Asian conference on computer vision, pp. 100\u2013116. Springer.","DOI":"10.1007\/978-3-030-20893-6_7"},{"key":"10008_CR4","doi-asserted-by":"crossref","unstructured":"Cohen, V., Burchfiel, B., Nguyen, T., Gopalan, N., Tellex, S., and Konidaris, G. (2019). Grounding language attributes to objects using Bayesian Eigen objects. In Proceedings of the IEEE international conference on intelligent robots and systems.","DOI":"10.1109\/IROS40897.2019.8968603"},{"key":"10008_CR5","doi-asserted-by":"crossref","unstructured":"Do, T-T., Nguyen, A., and Reid, I. (2018). AffordanceNet: An end-to-end deep learning approach for object affordance detection. In Proceedings of the IEEE international conference on robotics and automation, pp. 1\u20135.","DOI":"10.1109\/ICRA.2018.8460902"},{"issue":"2","key":"10008_CR6","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1207\/s15516709cog1402_1","volume":"14","author":"Jeffrey L Elman","year":"1990","unstructured":"Elman, Jeffrey L. (1990). Finding structure in time. Cognitive Science, 14(2), 179\u2013211. https:\/\/doi.org\/10.1207\/s15516709cog1402_1.","journal-title":"Cognitive Science"},{"key":"10008_CR7","doi-asserted-by":"crossref","unstructured":"Fulda, N., Ricks, D., Murdoch, B., and Wingate D. (2017). What can you do with a rock? affordance extraction via word embeddings. arXiv preprint arXiv:1703.03429.","DOI":"10.24963\/ijcai.2017\/144"},{"key":"10008_CR8","doi-asserted-by":"crossref","unstructured":"Hatori, J., Kikuchi, Y., Kobayashi, S., Takahashi, K., Tsuboi, Y., Unno, Y., Ko, W., and Tan, J. (2018). Interactively picking real-world objects with unconstrained spoken language instructions. In Proceedings of the IEEE international conference on robotics and automation, pp. 3774\u20133781.","DOI":"10.1109\/ICRA.2018.8460699"},{"key":"10008_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., and Girshick, R. (2017). Mask R-CNN. In Proceedings of the IEEE international conference on computer vision, pp. 2961\u20132969.","DOI":"10.1109\/ICCV.2017.322"},{"key":"10008_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., and Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"10008_CR11","unstructured":"Honnibal, M and Montani, I (2017). spaCy 2: Natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing. To appear."},{"key":"10008_CR12","doi-asserted-by":"crossref","unstructured":"Hu, R., Xu, H., Rohrbach, M., Feng, J., Saenko, K., and Darrell, T. (2016). Natural language object retrieval. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 4555\u20134564.","DOI":"10.1109\/CVPR.2016.493"},{"key":"10008_CR13","volume-title":"The theory of affordances","author":"JG James","year":"1977","unstructured":"James, J. G. (1977). The theory of affordances. USA: Hilldale."},{"issue":"5","key":"10008_CR14","doi-asserted-by":"publisher","first-page":"378","DOI":"10.1037\/h0031619","volume":"76","author":"LF Joseph","year":"1971","unstructured":"Joseph, L. F. (1971). Measuring nominal scale agreement among many raters. Psychological Bulletin, 76(5), 378.","journal-title":"Psychological Bulletin"},{"key":"10008_CR15","unstructured":"Kelvin, X., Jimmy, B., Ryan, K., Kyunghyun, C., Aaron, C., Ruslan, S., Rich, Z and Yoshua, B. (2015). Show, attend and tell: Neural image caption generation with visual attention. In International conference on machine learning, pp. 2048\u20132057."},{"key":"10008_CR16","unstructured":"Kingma, D. P., and Ba, J. (2014). Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980."},{"key":"10008_CR17","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1162\/tacl_a_00220","volume":"1","author":"J Krishnamurthy","year":"2013","unstructured":"Krishnamurthy, J., & Kollar, T. (2013). Jointly learning to parse and perceive: Connecting natural language to the physical world. Transactions of the Association for Computational Linguistics, 1, 193\u2013206.","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"10008_CR18","doi-asserted-by":"crossref","unstructured":"Lin, T-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., and Lawrence, Z. C. (2014). Microsoft COCO: Common objects in context. In European conference on computer vision, pp. 740\u2013755. Springer.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"10008_CR19","doi-asserted-by":"crossref","unstructured":"Mallick, A., Pobil, A. P. D., and Cervera, E. (2018). Deep learning based object recognition for robot picking task. In Proceedings of the 12th international conference on ubiquitous information management and communication, pp. 1\u20139.","DOI":"10.1145\/3164541.3164628"},{"key":"10008_CR20","unstructured":"Mao, J., Xu, W., Yang, Y., Wang, J., Huang, Z., and Yuille, A. (2014). Deep captioning with multimodal recurrent neural networks (m-RNN). arXiv preprint arXiv:1412.6632."},{"key":"10008_CR21","unstructured":"Mikolov, T., Chen, K., Corrado, G., and Dean, J., (2013). Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781."},{"key":"10008_CR22","doi-asserted-by":"crossref","unstructured":"Myers, A., Teo, C. L., Ferm\u00fcller, C., and Aloimonos, Y. (2015). Affordance detection of tool parts from geometric features. In Proceedings of the IEEE international conference on robotics and automation, pp. 1374\u20131381.","DOI":"10.1109\/ICRA.2015.7139369"},{"key":"10008_CR23","doi-asserted-by":"publisher","unstructured":"Nguyen, T., Gopalan, N., Patel, R., Corsaro, M., Pavlick, E., and Tellex, S. (2020). Robot object retrieval with contextual natural language queries. In Proceedings of robotics: Science and systems, Corvalis, Oregon, USA. https:\/\/doi.org\/10.15607\/RSS.2020.XVI.080.","DOI":"10.15607\/RSS.2020.XVI.080"},{"key":"10008_CR24","doi-asserted-by":"crossref","unstructured":"Patterson, G., and Hays, J. (2012). SUN Attribute database: Discovering, annotating, and recognizing scene attributes. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2751\u20132758.","DOI":"10.1109\/CVPR.2012.6247998"},{"key":"10008_CR25","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., and Manning, C. D. (2014) GloVe: Global vectors for word representation. In Proceedings of the conference on empirical methods in natural language processing, pp. 1532\u20131543.","DOI":"10.3115\/v1\/D14-1162"},{"issue":"3","key":"10008_CR26","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Hao, S., Krause, J., Satheesh, S., Ma, S., et al. (2015). ImageNet large scale visual recognition challenge. International Journal of Computer Vision, 115(3), 211\u2013252. https:\/\/doi.org\/10.1007\/s11263-015-0816-y.","journal-title":"International Journal of Computer Vision"},{"key":"10008_CR27","doi-asserted-by":"crossref","unstructured":"Schlangen, D., Zarriess, S., and Kennington, C., (2016). Resolving references to objects in photographs using the words-as-classifiers model. In Proceedings of the 54th annual meeting of the association for computational linguistics, pp. 1213\u20131223. ISBN 9781510827585. http:\/\/arxiv.org\/abs\/1510.02125.","DOI":"10.18653\/v1\/P16-1115"},{"key":"10008_CR28","doi-asserted-by":"crossref","unstructured":"Shridhar, M., and Hsu, D. (2018). Interactive visual grounding of referring expressions for human-robot interaction. arXiv preprint arXiv:1806.03831.","DOI":"10.15607\/RSS.2018.XIV.028"},{"key":"10008_CR29","unstructured":"Speer, R., Chin, J., and Havasi, C. (2017). ConceptNet 5.5: An open multilingual graph of general knowledge. In Thirty-First AAAI conference on artificial intelligence."},{"key":"10008_CR30","unstructured":"Tan, M., and Le, Q. (2019). EfficientNet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning, pp. 6105\u20136114. PMLR."},{"key":"10008_CR31","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., and Erhan, D. (2015). Show and tell: A Neural image caption generator. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3156\u20133164.","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"10008_CR32","doi-asserted-by":"crossref","unstructured":"Whitney, D., Rosen, E., MacGlashan, J., Lawson L.S.W, and Stefanie, T. (2017). Reducing errors in object-fetching interactions through social feedback. In Proceedings of the IEEE international conference on robotics and automation, pp. 1006\u20131013.","DOI":"10.1109\/ICRA.2017.7989121"}],"container-title":["Autonomous Robots"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10514-021-10008-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10514-021-10008-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10514-021-10008-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,5]],"date-time":"2022-02-05T09:05:54Z","timestamp":1644051954000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10514-021-10008-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,30]]},"references-count":32,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2022,1]]}},"alternative-id":["10008"],"URL":"https:\/\/doi.org\/10.1007\/s10514-021-10008-7","relation":{},"ISSN":["0929-5593","1573-7527"],"issn-type":[{"type":"print","value":"0929-5593"},{"type":"electronic","value":"1573-7527"}],"subject":[],"published":{"date-parts":[[2021,8,30]]},"assertion":[{"value":"31 January 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 July 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 August 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}