{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T02:04:00Z","timestamp":1768356240241,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":63,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819556786","type":"print"},{"value":"9789819556793","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5679-3_28","type":"book-chapter","created":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T18:36:47Z","timestamp":1768329407000},"page":"403-417","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Rethinking the\u00a0Evaluation of\u00a0Scene Graph Generation"],"prefix":"10.1007","author":[{"given":"Jingyi","family":"Wang","sequence":"first","affiliation":[]},{"given":"Hanwei","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Zhidong","family":"Deng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,14]]},"reference":[{"key":"28_CR1","unstructured":"Bai, S., et al.: Qwen2.5-VL Technical Report. arXiv preprint arXiv:2502.13923 (2025)"},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Biswas, B.A., Ji, Q.: Probabilistic debiasing of scene graphs. In: CVPR, pp. 10429\u201310438 (2023)","DOI":"10.1109\/CVPR52729.2023.01005"},{"issue":"1","key":"28_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TPAMI.2021.3137605","volume":"45","author":"X Chang","year":"2021","unstructured":"Chang, X., Ren, P., Xu, P., Li, Z., Chen, X., Hauptmann, A.: A comprehensive survey of scene graphs: generation and application. TPAMI 45(1), 1\u201326 (2021)","journal-title":"TPAMI"},{"key":"28_CR4","doi-asserted-by":"crossref","unstructured":"Chaofan\u00a0Zheng, Xinyu\u00a0Lyu, L.G.B.D., Song, J.: Prototype-based embedding network for scene graph generation. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.02182"},{"key":"28_CR5","doi-asserted-by":"crossref","unstructured":"Chen, L., Song, Y., Lin, S., Wang, C., He, G.: Kumaraswamy wavelet for heterophilic scene graph generation. In: AAAI, vol.\u00a038, pp. 1138\u20131146 (2024)","DOI":"10.1609\/aaai.v38i2.27875"},{"key":"28_CR6","doi-asserted-by":"crossref","unstructured":"Chen, L., Wang, X., Lu, J., Lin, S., Wang, C., He, G.: Clip-driven open-vocabulary 3d scene graph generation via cross-modality contrastive learning. In: CVPR, pp. 27863\u201327873 (2024)","DOI":"10.1109\/CVPR52733.2024.02632"},{"key":"28_CR7","doi-asserted-by":"crossref","unstructured":"Chen, M., Li, L., Wang, W., Yang, Y.: DiffVSGG: diffusion-driven online video scene graph generation. In: CVPR (2025)","DOI":"10.1109\/CVPR52734.2025.02715"},{"key":"28_CR8","doi-asserted-by":"crossref","unstructured":"Chen, T., Yu, W., Chen, R., Lin, L.: Knowledge-embedded routing network for scene graph generation. In: CVPR, pp. 6163\u20136171 (2019)","DOI":"10.1109\/CVPR.2019.00632"},{"key":"28_CR9","doi-asserted-by":"crossref","unstructured":"Chen, Z., Wu, J., Lei, Z., Zhang, Z., Chen, C.: Expanding scene graph boundaries: fully open-vocabulary scene graph generation via visual-concept alignment and retention. In: ECCV (2024)","DOI":"10.1007\/978-3-031-72848-8_7"},{"key":"28_CR10","doi-asserted-by":"crossref","unstructured":"Fu, J.F., Zhang, T., Chen, K., Dou, Q.: Hybrid reciprocal transformer with triplet feature alignment for scene graph generation. In: CVPR (2025)","DOI":"10.1109\/CVPR52734.2025.00837"},{"key":"28_CR11","doi-asserted-by":"crossref","unstructured":"Gao, G., Liu, W., Chen, A., Geiger, A., Sch\u00f6lkopf, B.: GraphDreamer: compositional 3D scene synthesis from scene graphs. In: CVPR, pp. 21295\u201321304 (2024)","DOI":"10.1109\/CVPR52733.2024.02012"},{"key":"28_CR12","doi-asserted-by":"crossref","unstructured":"Goel, A., Fernando, B., Keller, F., Bilen, H.: Not all relations are equal: mining informative labels for scene graph generation. In: CVPR, pp. 15596\u201315606 (2022)","DOI":"10.1109\/CVPR52688.2022.01515"},{"key":"28_CR13","doi-asserted-by":"crossref","unstructured":"Hayder, Z., He, X.: DSGG: dense relation transformer for an end-to-end scene graph generation. In: CVPR, pp. 28317\u201328326 (2024)","DOI":"10.1109\/CVPR52733.2024.02675"},{"key":"28_CR14","unstructured":"Hildebrandt, M., Li, H., Koner, R., Tresp, V., G\u00fcnnemann, S.: Scene graph reasoning for visual question answering. CoRR abs\/2007.01072 (2020)"},{"key":"28_CR15","doi-asserted-by":"crossref","unstructured":"Huang, W., Ji, Y., Zhu, G., Li, Y., Liu, C.: Navigating the unseen: zero-shot scene graph generation via capsule-based equivariant features. In: CVPR (2025)","DOI":"10.1109\/CVPR52734.2025.02742"},{"key":"28_CR16","doi-asserted-by":"crossref","unstructured":"Huang, Y., et\u00a0al.: Structure-CLIP: towards scene graph knowledge to enhance multi-modal structured representations. In: AAAI, pp. 2417\u20132425 (2024)","DOI":"10.1609\/aaai.v38i3.28017"},{"key":"28_CR17","doi-asserted-by":"crossref","unstructured":"Im, J., Nam, J., Park, N., Lee, H., Park, S.: EGTR: extracting graph from transformer for scene graph generation. In: CVPR, pp. 24229\u201324238 (2024)","DOI":"10.1109\/CVPR52733.2024.02287"},{"key":"28_CR18","doi-asserted-by":"crossref","unstructured":"Jeon, J., Kim, K., Yoon, K., Park, C.: Semantic diversity-aware prototype-based learning for unbiased scene graph generation. In: ECCV (2024)","DOI":"10.1007\/978-3-031-73113-6_22"},{"key":"28_CR19","doi-asserted-by":"crossref","unstructured":"Johnson, J., et al.: Image retrieval using scene graphs. In: CVPR, pp. 3668\u20133678 (2015)","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"28_CR20","doi-asserted-by":"crossref","unstructured":"Jung, D., Kim, S., Kim, W.H., Cho, M.: Devil\u2019s on the edges: Selective quad attention for scene graph generation (2023)","DOI":"10.1109\/CVPR52729.2023.01790"},{"key":"28_CR21","doi-asserted-by":"crossref","unstructured":"Khandelwal, N., Liu, X., Zhang, M.: Adaptive visual scene understanding: incremental scene graph generation. In: NeurIPS (2024)","DOI":"10.52202\/079017-4335"},{"key":"28_CR22","doi-asserted-by":"crossref","unstructured":"Kim, K., et al.: Llm4SGG: large language models for weakly supervised scene graph generation. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.02674"},{"key":"28_CR23","doi-asserted-by":"crossref","unstructured":"Koch, S., Vaskevicius, N., Colosi, M., Hermosilla, P., Ropinski, T.: Open3DSG: open-vocabulary 3D scene graphs from point clouds with queryable objects and open-set relationships. In: CVPR, pp. 14183\u201314193 (2024)","DOI":"10.1109\/CVPR52733.2024.01345"},{"key":"28_CR24","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., et al.: Visual Genome: connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vision 123, 32\u201373 (2017)","journal-title":"Int. J. Comput. Vision"},{"key":"28_CR25","doi-asserted-by":"crossref","unstructured":"Li, J., Wang, Y., Guo, X., Yang, R., Li, W.: Leveraging predicate and triplet learning for scene graph generation. In: CVPR, pp. 28369\u201328379 (2024)","DOI":"10.1109\/CVPR52733.2024.02680"},{"key":"28_CR26","doi-asserted-by":"crossref","unstructured":"Li, L., Chen, G., Xiao, J., Yang, Y., Wang, C., Chen, L.: Compositional feature augmentation for unbiased scene graph generation. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01982"},{"key":"28_CR27","doi-asserted-by":"crossref","unstructured":"Li, L., Chen, L., Huang, Y., Zhang, Z., Zhang, S., Xiao, J.: The devil is in the labels: noisy label correction for robust scene graph generation. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01830"},{"key":"28_CR28","doi-asserted-by":"crossref","unstructured":"Li, R., Zhang, S., Wan, B., He, X.: Bipartite graph network with adaptive message passing for unbiased scene graph generation. In: CVPR, pp. 11109\u201311119 (2021)","DOI":"10.1109\/CVPR46437.2021.01096"},{"key":"28_CR29","unstructured":"Li, X., Chen, L., Shao, J., Xiao, S., Zhang, S., Xiao, J.: Rethinking the evaluation of unbiased scene graph generation. In: BMVC, p.\u00a0279 (2022)"},{"key":"28_CR30","doi-asserted-by":"crossref","unstructured":"Li, Y., Li, Z., Chen, H., Xu, L.: Unbiased video scene graph generation via visual and semantic dual debiasing. In: CVPR (2025)","DOI":"10.1109\/CVPR52734.2025.01774"},{"key":"28_CR31","doi-asserted-by":"crossref","unstructured":"Li, Y., Wang, T., Wu, K., Wang, L., Guo, X., Wang, W.: Fine-grained scene graph generation via sample-level bias prediction. In: ECCV (2024)","DOI":"10.1007\/978-3-031-73347-5_2"},{"key":"28_CR32","doi-asserted-by":"crossref","unstructured":"Liao, X., Wei, W., Chen, D., Fu, Y.: Uniq: unified decoder with task-specific queries for efficient scene graph generation. In: ACM MM, pp. 8815\u20138824 (2024)","DOI":"10.1145\/3664647.3681542"},{"key":"28_CR33","doi-asserted-by":"crossref","unstructured":"Lin, X., Ding, C., Zhan, Y., Li, Z., Tao, D.: HL-Net: heterophily learning network for scene graph generation. In: CVPR, pp. 19476\u201319485 (2022)","DOI":"10.1109\/CVPR52688.2022.01887"},{"key":"28_CR34","doi-asserted-by":"crossref","unstructured":"Lin, X., Ding, C., Zhang, J., Zhan, Y., Tao, D.: Ru-Net: regularized unrolling network for scene graph generation. In: CVPR, pp. 19457\u201319466 (2022)","DOI":"10.1109\/CVPR52688.2022.01885"},{"key":"28_CR35","doi-asserted-by":"crossref","unstructured":"Lorenz, J., Barthel, F., Kienzle, D., Lienhart, R.: HayStack: a panoptic scene graph dataset to evaluate rare predicate classes. In: ICCV, pp. 62\u201370 (2023)","DOI":"10.1109\/ICCVW60793.2023.00013"},{"key":"28_CR36","doi-asserted-by":"crossref","unstructured":"Lu, F., et al.: Benchmarking large vision-language models via directed scene graph for comprehensive image captioning. In: CVPR (2025)","DOI":"10.1109\/CVPR52734.2025.01827"},{"key":"28_CR37","doi-asserted-by":"crossref","unstructured":"Lv, C., Qi, M., Liu, L., Ma, H.: T2SG: traffic topology scene graph for topology reasoning in autonomous driving. In: CVPR (2025)","DOI":"10.1109\/CVPR52734.2025.01603"},{"issue":"11","key":"28_CR38","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1145\/219717.219748","volume":"38","author":"GA Miller","year":"1995","unstructured":"Miller, G.A.: Wordnet: a lexical database for English. Commun. ACM 38(11), 39\u201341 (1995)","journal-title":"Commun. ACM"},{"key":"28_CR39","doi-asserted-by":"crossref","unstructured":"Mitra, C., Huang, B., Darrell, T., Herzig, R.: Compositional chain-of-thought prompting for large multimodal models. In: CVPR, pp. 14420\u201314431 (2024)","DOI":"10.1109\/CVPR52733.2024.01367"},{"key":"28_CR40","doi-asserted-by":"crossref","unstructured":"Nag, S., Ghosh, U., Bose, S., Ta, C.K., Li, J., Chowdhury, A.K.R.: Conformal prediction and MLLM aided uncertainty quantification in scene graph generation. In: CVPR (2025)","DOI":"10.1109\/CVPR52734.2025.01090"},{"key":"28_CR41","unstructured":"Neau, M., Santos, P.E., Bosser, A.G., Buche, C.: React: real-time efficiency and accuracy compromise for tradeoffs in scene graph generation (2024). https:\/\/arxiv.org\/abs\/2405.16116"},{"key":"28_CR42","doi-asserted-by":"crossref","unstructured":"Nguyen, T.T., Nguyen, P., Cothren, J., Yilmaz, A., Luu, K.: HyperGLM: hypergraph for video scene graph generation and anticipation. In: CVPR (2025)","DOI":"10.1109\/CVPR52734.2025.02714"},{"key":"28_CR43","doi-asserted-by":"crossref","unstructured":"Peddi, R., Saurabh, Shrivastava, A.A., Singla, P., Gogate, V.: Towards unbiased and robust spatio-temporal scene graph generation and anticipation. In: CVPR (2025)","DOI":"10.1109\/CVPR52734.2025.00808"},{"key":"28_CR44","doi-asserted-by":"crossref","unstructured":"Phueaksri, I., Kastner, M.A., Kawanishi, Y., Komamizu, T., Ide, I.: An approach to generate a caption for an image collection using scene graph generation. IEEE Access (2023)","DOI":"10.1109\/ACCESS.2023.3332098"},{"key":"28_CR45","doi-asserted-by":"crossref","unstructured":"Tang, K., Niu, Y., Huang, J., Shi, J., Zhang, H.: Unbiased scene graph generation from biased training. In: CVPR, pp. 3713\u20133722. IEEE (2020)","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"28_CR46","doi-asserted-by":"crossref","unstructured":"Tang, K., Zhang, H., Wu, B., Luo, W., Liu, W.: Learning to compose dynamic tree structures for visual contexts. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00678"},{"key":"28_CR47","unstructured":"Team, G., et\u00a0al.: Gemini 1.5: unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:2403.05530 (2024)"},{"key":"28_CR48","doi-asserted-by":"crossref","unstructured":"Wang, G., Li, Z., Chen, Q., Liu, Y.: OED: towards one-stage end-to-end dynamic scene graph generation. In: CVPR, pp. 27938\u201327947 (2024)","DOI":"10.1109\/CVPR52733.2024.02639"},{"key":"28_CR49","doi-asserted-by":"crossref","unstructured":"Wang, J., Zhang, C., Huang, J., Ren, B., Deng, Z.: Improving scene graph generation with superpixel-based interaction learning. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 1809\u20131820 (2023)","DOI":"10.1145\/3581783.3611889"},{"key":"28_CR50","doi-asserted-by":"crossref","unstructured":"Wang, L., Yuan, Z., Chen, B.: Multi-granularity sparse relationship matrix prediction network for end-to-end scene graph generation. In: ECCV (2024)","DOI":"10.1007\/978-3-031-73007-8_7"},{"key":"28_CR51","doi-asserted-by":"crossref","unstructured":"Wu, S., Fei, H., Chua, T.S.: Universal scene graph generation. In: CVPR (2025)","DOI":"10.1109\/CVPR52734.2025.01321"},{"key":"28_CR52","doi-asserted-by":"crossref","unstructured":"Wu, S., Fei, H., Yang, J., Li, X., Li, J., Zhang, H., Chua, T.s.: Learning 4D panoptic scene graph generation from rich 2D visual scene. In: CVPR (2025)","DOI":"10.1109\/CVPR52734.2025.02285"},{"key":"28_CR53","doi-asserted-by":"crossref","unstructured":"Wu, Z., Gao, J., Xu, C.: Open-vocabulary video scene graph generation via union-aware semantic alignment. In: ACM MM, pp. 8566\u20138575 (2024)","DOI":"10.1145\/3664647.3681061"},{"key":"28_CR54","doi-asserted-by":"crossref","unstructured":"Wu, Z., Li, H., Chen, G., Yu, Z., Gu, X., Wang, Y.: 3D question answering with scene graph reasoning. In: Proceedings of the 32nd ACM International Conference on Multimedia, pp. 1370\u20131378 (2024)","DOI":"10.1145\/3664647.3681517"},{"key":"28_CR55","doi-asserted-by":"crossref","unstructured":"Xu, D., Zhu, Y., Choy, C.B., Fei-Fei, L.: Scene graph generation by iterative message passing. In: CVPR, pp. 3097\u20133106 (2017)","DOI":"10.1109\/CVPR.2017.330"},{"key":"28_CR56","unstructured":"You, H., et al.: Ferret: refer and ground anything anywhere at any granularity. In: ICLR (2024)"},{"key":"28_CR57","doi-asserted-by":"crossref","unstructured":"Yu, Q., Li, J., Wu, Y., Tang, S., Ji, W., Zhuang, Y.: Visually-prompted language model for fine-grained scene graph generation in an open world. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01971"},{"key":"28_CR58","doi-asserted-by":"crossref","unstructured":"Zellers, R., Yatskar, M., Thomson, S., Choi, Y.: Neural Motifs: scene graph parsing with global context. In: CVPR, pp. 5831\u20135840 (2018)","DOI":"10.1109\/CVPR.2018.00611"},{"key":"28_CR59","doi-asserted-by":"crossref","unstructured":"Zhang, A., et al.: Fine-grained scene graph generation with data transfer. In: ECCV, pp. 409\u2013424. Springer (2022)","DOI":"10.1007\/978-3-031-19812-0_24"},{"key":"28_CR60","doi-asserted-by":"crossref","unstructured":"Zhang, C., Stepputtis, S., Campbell, J., Sycara, K., Xie, Y.: Hiker-SGG: hierarchical knowledge enhanced robust scene graph generation. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.02667"},{"key":"28_CR61","doi-asserted-by":"crossref","unstructured":"Zhang, C., Delitzas, A., Wang, F., Zhang, R., Ji, X., Pollefeys, M., Engelmann, F.: Open-vocabulary functional 3D scene graphs for real-world indoor spaces. In: CVPR (2025)","DOI":"10.1109\/CVPR52734.2025.01807"},{"key":"28_CR62","doi-asserted-by":"crossref","unstructured":"Zhang, R., et al.: Synergetic prototype learning network for unbiased scene graph generation. In: ACM MM, pp. 945\u2013954 (2024)","DOI":"10.1145\/3664647.3680973"},{"key":"28_CR63","doi-asserted-by":"crossref","unstructured":"Zhu, X., Xing, Y., Wang, R., Wang, Y., Lan, X.: Calibration for long-tailed scene graph generation. In: ACM MM, pp. 3037\u20133046 (2024)","DOI":"10.1145\/3664647.3680818"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5679-3_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T18:36:55Z","timestamp":1768329415000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5679-3_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819556786","9789819556793"],"references-count":63,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5679-3_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"14 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}