{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T09:50:36Z","timestamp":1742982636747,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030905248"},{"type":"electronic","value":"9783030905255"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-90525-5_17","type":"book-chapter","created":{"date-parts":[[2021,11,1]],"date-time":"2021-11-01T21:04:36Z","timestamp":1635800676000},"page":"191-202","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Gaze Assisted Visual Grounding"],"prefix":"10.1007","author":[{"given":"Kritika","family":"Johari","sequence":"first","affiliation":[]},{"given":"Christopher Tay Zi","family":"Tong","sequence":"additional","affiliation":[]},{"given":"Vigneshwaran","family":"Subbaraju","sequence":"additional","affiliation":[]},{"given":"Jung-Jae","family":"Kim","sequence":"additional","affiliation":[]},{"given":"U-Xuan","family":"Tan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,11,2]]},"reference":[{"key":"17_CR1","doi-asserted-by":"crossref","unstructured":"Bhardwaj, R., Majumder, N., Poria, S., Hovy, E.: More identifiable yet equally performant transformers for text classification. arXiv preprint arXiv:2106.01269 (2021)","DOI":"10.18653\/v1\/2021.acl-long.94"},{"key":"17_CR2","doi-asserted-by":"crossref","unstructured":"Bloss, R.: Collaborative robots are rapidly providing major improvements in productivity, safety, programing ease, portability and cost while addressing many new applications. Ind. Robot Int. J. (2016)","DOI":"10.1108\/IR-05-2016-0148"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Chen, L., Ma, W., Xiao, J., Zhang, H., Chang, S.F.: Ref-NMS: breaking proposal bottlenecks in two-stage referring expression grounding. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 1036\u20131044 (2021)","DOI":"10.1609\/aaai.v35i2.16188"},{"key":"17_CR4","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"issue":"2","key":"17_CR5","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes (voc) challenge. Int. J. Comput. Vis. 88(2), 303\u2013338 (2010). https:\/\/doi.org\/10.1007\/s11263-009-0275-4","journal-title":"Int. J. Comput. Vis."},{"key":"17_CR6","doi-asserted-by":"crossref","unstructured":"Johari, K., Karumpulli, N., Tan, U.X.: Complementing speech interaction design with touch for multi-robot systems. In: TENCON 2019\u20132019 IEEE Region 10 Conference (TENCON), pp. 1400\u20131405. IEEE (2019)","DOI":"10.1109\/TENCON.2019.8929506"},{"key":"17_CR7","doi-asserted-by":"crossref","unstructured":"Kazemzadeh, S., Ordonez, V., Matten, M., Berg, T.: Referitgame: referring to objects in photographs of natural scenes. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 787\u2013798 (2014)","DOI":"10.3115\/v1\/D14-1086"},{"issue":"4","key":"17_CR8","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1007\/s13748-016-0094-0","volume":"5","author":"B Krawczyk","year":"2016","unstructured":"Krawczyk, B.: Learning from imbalanced data: open challenges and future directions. Prog. Artif. Intell. 5(4), 221\u2013232 (2016). https:\/\/doi.org\/10.1007\/s13748-016-0094-0","journal-title":"Prog. Artif. Intell."},{"key":"17_CR9","unstructured":"Krishna, R., et al.: Visual genome: Connecting language and vision using crowdsourced dense image annotations. arXiv preprint arXiv:1602.07332 (2016)"},{"key":"17_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014, Part V. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"17_CR11","series-title":"Human\u2013Computer Interaction Series","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1007\/978-1-4471-6392-3_3","volume-title":"Advances in Physiological Computing","author":"P Majaranta","year":"2014","unstructured":"Majaranta, P., Bulling, A.: Eye tracking and eye-based human\u2013computer interaction. In: Fairclough, S.H., Gilleade, K. (eds.) Advances in Physiological Computing. HIS, pp. 39\u201365. Springer, London (2014). https:\/\/doi.org\/10.1007\/978-1-4471-6392-3_3"},{"key":"17_CR12","doi-asserted-by":"crossref","unstructured":"Mao, J., Huang, J., Toshev, A., Camburu, O., Yuille, A.L., Murphy, K.: Generation and comprehension of unambiguous object descriptions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 11\u201320 (2016)","DOI":"10.1109\/CVPR.2016.9"},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Palinko, O., Rea, F., Sandini, G., Sciutti, A.: Robot reading human gaze: why eye tracking is better than head tracking for human-robot collaboration. In: 2016 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 5048\u20135054. IEEE (2016)","DOI":"10.1109\/IROS.2016.7759741"},{"key":"17_CR14","doi-asserted-by":"publisher","first-page":"55448","DOI":"10.1109\/ACCESS.2021.3071364","volume":"9","author":"KB Park","year":"2021","unstructured":"Park, K.B., Choi, S.H., Lee, J.Y., Ghasemi, Y., Mohammed, M., Jeong, H.: Hands-free human-robot interaction using multimodal gestures and deep learning in wearable mixed reality. IEEE Access 9, 55448\u201355464 (2021)","journal-title":"IEEE Access"},{"key":"17_CR15","doi-asserted-by":"crossref","unstructured":"Plummer, B.A., Wang, L., Cervantes, C.M., Caicedo, J.C., Hockenmaier, J., Lazebnik, S.: Flickr30k entities: collecting region-to-phrase correspondences for richer image-to-sentence models. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2641\u20132649 (2015)","DOI":"10.1109\/ICCV.2015.303"},{"key":"17_CR16","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: Towards real-time object detection with region proposal networks. arXiv preprint arXiv:1506.01497 (2015)"},{"key":"17_CR17","doi-asserted-by":"crossref","unstructured":"Sadhu, A., Chen, K., Nevatia, R.: Zero-shot grounding of objects from natural language queries. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4694\u20134703 (2019)","DOI":"10.1109\/ICCV.2019.00479"},{"issue":"6","key":"17_CR18","doi-asserted-by":"publisher","first-page":"558","DOI":"10.1177\/0278364918760992","volume":"37","author":"R Scalise","year":"2018","unstructured":"Scalise, R., Li, S., Admoni, H., Rosenthal, S., Srinivasa, S.S.: Natural language instructions for human-robot collaborative manipulation. Int. J. Robot. Res. 37(6), 558\u2013565 (2018)","journal-title":"Int. J. Robot. Res."},{"key":"17_CR19","doi-asserted-by":"crossref","unstructured":"Sharma, V.K., Murthy, L., Saluja, K.S., Mollyn, V., Sharma, G., Biswas, P.: Eye gaze controlled robotic arm for persons with ssmi. arXiv preprint arXiv:2005.11994 (2020)","DOI":"10.3233\/TAD-200264"},{"issue":"2\u20133","key":"17_CR20","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1177\/0278364919897133","volume":"39","author":"M Shridhar","year":"2020","unstructured":"Shridhar, M., Mittal, D., Hsu, D.: Ingress: interactive visual grounding of referring expressions. Int. J. Robot. Res. 39(2\u20133), 217\u2013232 (2020)","journal-title":"Int. J. Robot. Res."},{"key":"17_CR21","doi-asserted-by":"crossref","unstructured":"Sidenmark, L., Mardanbegi, D., Gomez, A.R., Clarke, C., Gellersen, H.: Bimodalgaze: seamlessly refined pointing with gaze and filtered gestural head movement. In: ACM Symposium on Eye Tracking Research and Applications, pp. 1\u20139 (2020)","DOI":"10.1145\/3379155.3391312"},{"key":"17_CR22","doi-asserted-by":"crossref","unstructured":"Stiefelhagen, R., Fugen, C., Gieselmann, R., Holzapfel, H., Nickel, K., Waibel, A.: Natural human-robot interaction using speech, head pose and gestures. In: 2004 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS) (IEEE Cat. No. 04CH37566), vol. 3, pp. 2422\u20132427. IEEE (2004)","DOI":"10.1109\/IROS.2004.1389771"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Wang, M.Y., Kogkas, A.A., Darzi, A., Mylonas, G.P.: Free-view, 3D gaze-guided, assistive robotic system for activities of daily living. In: 2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 2355\u20132361. IEEE (2018)","DOI":"10.1109\/IROS.2018.8594045"},{"key":"17_CR24","unstructured":"Wu, Y., Kirillov, A., Massa, F., Lo, W.Y., Girshick, R.: Detectron2 (2019). https:\/\/github.com\/facebookresearch\/detectron2"},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Yang, Z., Gong, B., Wang, L., Huang, W., Yu, D., Luo, J.: A fast and accurate one-stage approach to visual grounding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4683\u20134693 (2019)","DOI":"10.1109\/ICCV.2019.00478"},{"key":"17_CR26","doi-asserted-by":"crossref","unstructured":"Yu, L., et al.: Mattnet: modular attention network for referring expression comprehension. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1307\u20131315 (2018)","DOI":"10.1109\/CVPR.2018.00142"},{"key":"17_CR27","doi-asserted-by":"crossref","unstructured":"Zhou, Y., et al.: A real-time global inference network for one-stage referring expression comprehension. IEEE Trans. Neural Netw. Learn. Syst. (2021)","DOI":"10.1109\/TNNLS.2021.3090426"},{"key":"17_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1007\/978-3-319-10602-1_26","volume-title":"Computer Vision \u2013 ECCV 2014","author":"CL Zitnick","year":"2014","unstructured":"Zitnick, C.L., Doll\u00e1r, P.: Edge boxes: locating object proposals from edges. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014, Part V. LNCS, vol. 8693, pp. 391\u2013405. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_26"}],"container-title":["Lecture Notes in Computer Science","Social Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-90525-5_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T06:34:37Z","timestamp":1726036477000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-90525-5_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030905248","9783030905255"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-90525-5_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"2 November 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICSR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Social Robotics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 November 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 November 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"socrob2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.colips.org\/conferences\/icsr2021\/wp\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OCS","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"114","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"64","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"15","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"56% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.42","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0.49","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}