{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T05:12:54Z","timestamp":1770354774091,"version":"3.49.0"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585914","type":"print"},{"value":"9783030585921","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58592-1_38","type":"book-chapter","created":{"date-parts":[[2020,11,3]],"date-time":"2020-11-03T00:34:03Z","timestamp":1604363643000},"page":"642-657","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":38,"title":["Learning Visual Commonsense for Robust Scene Graph Generation"],"prefix":"10.1007","author":[{"given":"Alireza","family":"Zareian","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhecan","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haoxuan","family":"You","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shih-Fu","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,11,3]]},"reference":[{"issue":"1","key":"38_CR1","first-page":"3563","volume":"15","author":"G Alain","year":"2014","unstructured":"Alain, G., Bengio, Y.: What regularized auto-encoders learn from the data-generating distribution. J. Mach. Learn. Res. 15(1), 3563\u20133593 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"38_CR2","doi-asserted-by":"crossref","unstructured":"Chen, T., Yu, W., Chen, R., Lin, L.: Knowledge-embedded routing network for scene graph generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6163\u20136171 (2019)","DOI":"10.1109\/CVPR.2019.00632"},{"key":"38_CR3","doi-asserted-by":"crossref","unstructured":"Chen, X., Li, L.J., Fei-Fei, L., Gupta, A.: Iterative visual reasoning beyond convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7239\u20137248 (2018)","DOI":"10.1109\/CVPR.2018.00756"},{"key":"38_CR4","doi-asserted-by":"crossref","unstructured":"Chuang, C.Y., Li, J., Torralba, A., Fidler, S.: Learning to act properly: predicting and explaining affordances from images. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 975\u2013983 (2018)","DOI":"10.1109\/CVPR.2018.00108"},{"key":"38_CR5","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"38_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"724","DOI":"10.1007\/978-3-030-01246-5_43","volume-title":"Computer Vision \u2013 ECCV 2018","author":"O Groth","year":"2018","unstructured":"Groth, O., Fuchs, F.B., Posner, I., Vedaldi, A.: ShapeStacks: learning vision-based physical intuition for generalised object stacking. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11205, pp. 724\u2013739. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_43"},{"key":"38_CR7","doi-asserted-by":"crossref","unstructured":"Gu, J., Zhao, H., Lin, Z., Li, S., Cai, J., Ling, M.: Scene graph generation with external knowledge and image reconstruction. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1969\u20131978 (2019)","DOI":"10.1109\/CVPR.2019.00207"},{"key":"38_CR8","unstructured":"Jiang, C., Xu, H., Liang, X., Lin, L.: Hybrid knowledge routed modules for large-scale object detection. In: Advances in Neural Information Processing Systems, pp. 1552\u20131563 (2018)"},{"key":"38_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1007\/978-3-030-01264-9_15","volume-title":"Computer Vision \u2013 ECCV 2018","author":"K Kato","year":"2018","unstructured":"Kato, K., Li, Y., Gupta, A.: Compositional learning for human object interaction. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) Computer Vision \u2013 ECCV 2018. LNCS, vol. 11218, pp. 247\u2013264. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01264-9_15"},{"key":"38_CR10","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"38_CR11","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)"},{"issue":"1","key":"38_CR12","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., et al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vis. 123(1), 32\u201373 (2017)","journal-title":"Int. J. Comput. Vis."},{"key":"38_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"506","DOI":"10.1007\/978-3-030-01261-8_30","volume-title":"Computer Vision \u2013 ECCV 2018","author":"KK Singh","year":"2018","unstructured":"Singh, K.K., Divvala, S., Farhadi, A., Lee, Y.J.: DOCK: detecting objects by transferring common-sense knowledge. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11217, pp. 506\u2013522. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01261-8_30"},{"key":"38_CR14","doi-asserted-by":"crossref","unstructured":"Lee, C.W., Fang, W., Yeh, C.K., Frank Wang, Y.C.: Multi-label zero-shot learning with structured knowledge graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1576\u20131585 (2018)","DOI":"10.1109\/CVPR.2018.00170"},{"key":"38_CR15","unstructured":"Li, Y., Tarlow, D., Brockschmidt, M., Zemel, R.: Gated graph sequence neural networks. arXiv preprint arXiv:1511.05493 (2015)"},{"key":"38_CR16","unstructured":"Lu, J., Batra, D., Parikh, D., Lee, S.: ViLBERT: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In: Advances in Neural Information Processing Systems, pp. 13\u201323 (2019)"},{"key":"38_CR17","unstructured":"Malinin, A., Gales, M.: Predictive uncertainty estimation via prior networks. In: Advances in Neural Information Processing Systems, pp. 7047\u20137058 (2018)"},{"key":"38_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"460","DOI":"10.1007\/978-3-030-01237-3_28","volume-title":"Computer Vision \u2013 ECCV 2018","author":"M Narasimhan","year":"2018","unstructured":"Narasimhan, M., Schwing, A.G.: Straight to the facts: learning knowledge base retrieval for factual visual question answering. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11212, pp. 460\u2013477. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01237-3_28"},{"key":"38_CR19","doi-asserted-by":"crossref","unstructured":"Qi, M., Wang, Y., Qin, J., Li, A.: KE-GAN: knowledge embedded generative adversarial networks for semi-supervised scene parsing. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5237\u20135246 (2019)","DOI":"10.1109\/CVPR.2019.00538"},{"key":"38_CR20","doi-asserted-by":"crossref","unstructured":"Romaszko, L., Williams, C.K., Moreno, P., Kohli, P.: Vision-as-inverse-graphics: Obtaining a rich 3D explanation of a scene from a single image. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 851\u2013859 (2017)","DOI":"10.1109\/ICCVW.2017.115"},{"key":"38_CR21","doi-asserted-by":"crossref","unstructured":"Speer, R., Chin, J., Havasi, C.: Conceptnet 5.5: an open multilingual graph of general knowledge. In: Thirty-First AAAI Conference on Artificial Intelligence (2017)","DOI":"10.1609\/aaai.v31i1.11164"},{"key":"38_CR22","doi-asserted-by":"crossref","unstructured":"Su, Z., Zhu, C., Dong, Y., Cai, D., Chen, Y., Li, J.: Learning visual knowledge memory networks for visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7736\u20137745 (2018)","DOI":"10.1109\/CVPR.2018.00807"},{"key":"38_CR23","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"38_CR24","unstructured":"Veli\u010dkovi\u0107, P., Cucurull, G., Casanova, A., Romero, A., Lio, P., Bengio, Y.: Graph attention networks. arXiv preprint arXiv:1710.10903 (2017)"},{"key":"38_CR25","doi-asserted-by":"crossref","unstructured":"Vincent, P., Larochelle, H., Bengio, Y., Manzagol, P.A.: Extracting and composing robust features with denoising autoencoders. In: Proceedings of the 25th International Conference on Machine Learning, pp. 1096\u20131103 (2008)","DOI":"10.1145\/1390156.1390294"},{"key":"38_CR26","doi-asserted-by":"crossref","unstructured":"Wang, T., Huang, J., Zhang, H., Sun, Q.: Visual commonsense R-CNN. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10760\u201310770 (2020)","DOI":"10.1109\/CVPR42600.2020.01077"},{"key":"38_CR27","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7794\u20137803 (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"38_CR28","doi-asserted-by":"crossref","unstructured":"Wang, X., Ye, Y., Gupta, A.: Zero-shot recognition via semantic embeddings and knowledge graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6857\u20136866 (2018)","DOI":"10.1109\/CVPR.2018.00717"},{"key":"38_CR29","doi-asserted-by":"crossref","unstructured":"Xu, D., Zhu, Y., Choy, C.B., Fei-Fei, L.: Scene graph generation by iterative message passing. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5410\u20135419 (2017)","DOI":"10.1109\/CVPR.2017.330"},{"key":"38_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"690","DOI":"10.1007\/978-3-030-01246-5_41","volume-title":"Computer Vision \u2013 ECCV 2018","author":"J Yang","year":"2018","unstructured":"Yang, J., Lu, J., Lee, S., Batra, D., Parikh, D.: Graph R-CNN for scene graph\u00a0generation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11205, pp. 690\u2013706. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_41"},{"key":"38_CR31","doi-asserted-by":"crossref","unstructured":"Yu, R., Li, A., Morariu, V.I., Davis, L.S.: Visual relationship detection with internal and external linguistic knowledge distillation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1974\u20131982 (2017)","DOI":"10.1109\/ICCV.2017.121"},{"key":"38_CR32","doi-asserted-by":"crossref","unstructured":"Zareian, A., Karaman, S., Chang, S.F.: Weakly supervised visual semantic parsing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3736\u20133745 (2020)","DOI":"10.1109\/CVPR42600.2020.00379"},{"key":"38_CR33","doi-asserted-by":"crossref","unstructured":"Zellers, R., Bisk, Y., Farhadi, A., Choi, Y.: From recognition to cognition: visual commonsense reasoning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6720\u20136731 (2019)","DOI":"10.1109\/CVPR.2019.00688"},{"key":"38_CR34","doi-asserted-by":"crossref","unstructured":"Zellers, R., Yatskar, M., Thomson, S., Choi, Y.: Neural motifs: scene graph parsing with global context. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5831\u20135840 (2018)","DOI":"10.1109\/CVPR.2018.00611"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58592-1_38","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:31:52Z","timestamp":1730593912000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58592-1_38"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585914","9783030585921"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58592-1_38","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"3 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}