{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:25:30Z","timestamp":1742912730997,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":33,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819787487"},{"type":"electronic","value":"9789819787494"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-8749-4_6","type":"book-chapter","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T15:57:22Z","timestamp":1730303842000},"page":"81-95","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Enhancing Relevance and Efficiency in Visual Question Generation Through Redundant Object Filtering"],"prefix":"10.1007","author":[{"given":"Feifei","family":"Xu","sequence":"first","affiliation":[]},{"given":"Yingchen","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Zheng","family":"Zhong","sequence":"additional","affiliation":[]},{"given":"Guangzhen","family":"Li","sequence":"additional","affiliation":[]},{"given":"Wang","family":"Zhou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"key":"6_CR1","doi-asserted-by":"crossref","unstructured":"De Vries, H., Strub, F., Chandar, S., Pietquin, O., Larochelle, H., Courville, A.: Guesswhat?! visual object discovery through multi-modal dialogue. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5503\u20135512 (2017)","DOI":"10.1109\/CVPR.2017.475"},{"key":"6_CR2","doi-asserted-by":"crossref","unstructured":"Shekhar, R.,  et al.: Beyond task success: a closer look at jointly learning to see, ask, and GuessWhat. In: Proceedings of NAACL-HLT, pp. 2578\u20132587 (2019)","DOI":"10.18653\/v1\/N19-1265"},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Testoni, A., Bernardi, R.: Garbage in, flowers out: Noisy training data help generative models at test time. IJCoL Ital. J. Comput. Linguist. 8(8-1) (2023)","DOI":"10.4000\/ijcol.974"},{"key":"6_CR4","doi-asserted-by":"crossref","unstructured":"Zheng, D., Xu, Z., Meng, F., Wang, X., Wang, J., Zhou, J.: Enhancing visual dialog questioner with entity-based strategy learning and augmented guesser. In: Findings of the Association for Computational Linguistics: EMNLP 2021, pp. 1839\u20131851 (2021)","DOI":"10.18653\/v1\/2021.findings-emnlp.158"},{"key":"6_CR5","unstructured":"Zheng, D., et al.: Spot the difference: a cooperative object-referring game in non-perfectly co-observable scene. arXiv preprint arXiv:2203.08362 (2022)"},{"key":"6_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.107823","volume":"114","author":"L Zhao","year":"2021","unstructured":"Zhao, L., Lyu, X., Song, J., Gao, L.: GuessWhich? visual dialog with attentive memory network. Pattern Recognti. 114, 107823 (2021)","journal-title":"Pattern Recognti."},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Matsumori, S., Shingyouchi, K., Abe, Y., Fukuchi, Y., Sugiura, K., Imai, M.: Unified questioner transformer for descriptive question generation in goal-oriented visual dialogue. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1898\u20131907 (2021)","DOI":"10.1109\/ICCV48922.2021.00191"},{"key":"6_CR8","doi-asserted-by":"publisher","unstructured":"Kanazawa, S., Matsumori, S., Imai, M.: Improving goal-oriented visual dialogue by\u00a0asking fewer questions. In: Mantoro, T., Lee, M., Ayu, M.A., Wong, K.W., Hidayanto, A.N. (eds.) ICONIP 2021. LNCS, vol .13109, pp. 158\u2013169. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-92270-2_14","DOI":"10.1007\/978-3-030-92270-2_14"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Das, A.,  et al.: Visual dialog. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 326\u2013335 (2017)","DOI":"10.1109\/CVPR.2017.121"},{"key":"6_CR10","doi-asserted-by":"crossref","unstructured":"Wang, Y., Joty, S., Lyu, M., King, I., Xiong, C., Hoi, S.C.: VD-BERT: a unified vision and dialog transformer with BERT. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 3325\u20133338 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.269"},{"key":"6_CR11","doi-asserted-by":"crossref","unstructured":"Guo, D., Wang, H., Zhang, H., Zha, Z.-J., Wang, M.: Iterative context-aware graph inference for visual dialog. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10055\u201310064 (2020)","DOI":"10.1109\/CVPR42600.2020.01007"},{"key":"6_CR12","unstructured":"Abbasnejad, E., Wu, Q., Abbasnejad, I., Shi, J.Q., van den Hengel, A.: An active information seeking model for goal-oriented vision-and-language tasks. arXiv, vol. abs\/1812.06398 (2018)"},{"key":"6_CR13","doi-asserted-by":"crossref","unstructured":"Shukla, P., Elmadjian, C., Sharan, R., Kulkarni, V., Turk, M., Wang, W.Y.: What should i ask? Using conversationally informative rewards for goal-oriented visual dialog. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 6442\u20136451 (2020)","DOI":"10.18653\/v1\/P19-1646"},{"key":"6_CR14","doi-asserted-by":"crossref","unstructured":"Mazuecos, M., Luque, F.M., S\u00e1nchez, J., Maina, H., Vadora, T., Benotti, L.: Region under discussion for visual dialog. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 4745\u20134759 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.390"},{"key":"6_CR15","unstructured":"Mazuecos, M., Blackburn, P., Benotti, L.: The impact of answers in referential visual dialog. In: Proceedings of the Reasoning and Interaction Conference (ReInAct 2021), pp. 8\u201313 (2021)"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Pang, W., Wang, X.: Visual dialogue state tracking for question generation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 11831\u201311838 (2021)","DOI":"10.1609\/aaai.v34i07.6856"},{"key":"6_CR17","unstructured":"Sang-Woo, L., Tong, G., Sohee, Y., Jaejun, Y., JungWoo, H.: Large-scale answerer in questioner\u2019s mind for visual dialog question generation. In: Proceedings of International Conference on Learning Representations, ICLR (2019)"},{"key":"6_CR18","doi-asserted-by":"publisher","unstructured":"Bani, G., et al.: Adding object detection skills to visual dialogue agents. In: Leal-Taix\u00e9, L., Roth, S. (eds.) ECCV 2018, pp. 180\u2013187. LNCS, vol. 11132. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-11018-5_17","DOI":"10.1007\/978-3-030-11018-5_17"},{"key":"6_CR19","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"6_CR20","doi-asserted-by":"crossref","unstructured":"Strub, F., de Vries, H., Mary, J., Piot, B., Courville, A., Pietquin, O.: End-to-end optimization of goal-driven and visually grounded dialogue systems. In: International Joint Conference on Artificial Intelligence (2017)","DOI":"10.24963\/ijcai.2017\/385"},{"key":"6_CR21","doi-asserted-by":"crossref","unstructured":"Zhang, J., Wu, Q., Shen, C., Zhang, J., Lu, J., van den Hengel, A.: Asking the difficult questions: goal-oriented visual question generation via intermediate rewards. In: European Conference on Computer Vision (2017)","DOI":"10.1007\/978-3-030-01228-1_12"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"Abbasnejad, E., Wu, Q., Shi, J.Q., van den Hengel, A.: What\u2019s to know? Uncertainty as a guide to asking goal-oriented questions. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4150\u20134159 (2018)","DOI":"10.1109\/CVPR.2019.00428"},{"key":"6_CR23","doi-asserted-by":"crossref","unstructured":"Testoni, A., Bernardi, R.: Looking for confirmations: an effective and human-like visual dialogue strategy. In: Conference on Empirical Methods in Natural Language Processing (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.736"},{"key":"6_CR24","unstructured":"Shi, Y., Tan, Y. , Feng, F., Zheng, C., Wang, X.: Category-based strategy-driven question generator for visual dialogue. In: China National Conference on Chinese Computational Linguistics (2023)"},{"key":"6_CR25","doi-asserted-by":"crossref","unstructured":"Wang, Y., Xu, J., Sun, Y.: End-to-end transformer based model for image captioning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 2585\u20132594 (2022)","DOI":"10.1609\/aaai.v36i3.20160"},{"key":"6_CR26","doi-asserted-by":"publisher","first-page":"2451","DOI":"10.1162\/089976600300015015","volume":"12","author":"FA Gers","year":"2000","unstructured":"Gers, F.A., Schmidhuber, J., Cummins, F.: Learning to forget: continual prediction with LSTM. Neural Comput. 12, 2451\u20132471 (2000)","journal-title":"Neural Comput."},{"key":"6_CR27","unstructured":"Xu, Z., Feng, F., Wang, X., Yang, Y., Jiang, H., Ouyang, Z.: Answer-driven visual state estimator for goal-oriented visual dialogue. In: Proceedings of the 28th ACM International Conference on Multimedia (2022)"},{"key":"6_CR28","unstructured":"Serban, I., Sordoni, A., Bengio, Y., Courville, A.C., Pineau, J.: Hierarchical neural network generative models for movie dialogues. arXiv, vol. abs\/1507.04808 (2015)"},{"key":"6_CR29","unstructured":"Zhao, R., Tresp, V.: Improving goal-oriented visual dialog agents via advanced recurrent nets with tempered policy gradient. In: LaCATODA@ IJCAI, pp. 1\u20137 (2018)"},{"key":"6_CR30","unstructured":"Lee, S.-W., Heo, Y.-J., Zhang, B.-T.: Answerer in questioner\u2019s mind: information theoretic approach to goal-oriented visual dialog. In: Neural Information Processing Systems (2018)"},{"key":"6_CR31","unstructured":"Shekhar, R., Baumg\u00e4rtner, T., Venkatesh, A., Bruni, E., Bernardi, R., Fern\u00e1ndez, R.: Ask no more: deciding when to guess in referential visual dialogue. In: Proceedings of the 27th International Conference on Computational Linguistics, pp. 1218\u20131233 (2019)"},{"key":"6_CR32","doi-asserted-by":"crossref","unstructured":"Tu, T., Ping, Q., Thattai, G., Tur, G., Natarajan, P.: Learning better visual dialog agents with pretrained visual-linguistic representation. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5618\u20135627 (2021)","DOI":"10.1109\/CVPR46437.2021.00557"},{"key":"6_CR33","doi-asserted-by":"crossref","unstructured":"Pang, W., Wang, X.: Guessing state tracking for visual dialogue. arXiv preprint arXiv:2002.10340 (2020)","DOI":"10.1007\/978-3-030-58517-4_40"}],"container-title":["Communications in Computer and Information Science","Data Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8749-4_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T16:10:56Z","timestamp":1730304656000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8749-4_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819787487","9789819787494"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8749-4_6","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPCSEE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference of Pioneering Computer Scientists, Engineers and Educators","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Macao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpcsee2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.icpcsee.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}