{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T18:22:12Z","timestamp":1771957332882,"version":"3.50.1"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730290","type":"print"},{"value":"9783031730306","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T00:00:00Z","timestamp":1732406400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T00:00:00Z","timestamp":1732406400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73030-6_9","type":"book-chapter","created":{"date-parts":[[2024,11,25]],"date-time":"2024-11-25T16:58:01Z","timestamp":1732553881000},"page":"148-164","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["A Fair Ranking and\u00a0New Model for\u00a0Panoptic Scene Graph Generation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8893-4355","authenticated-orcid":false,"given":"Julian","family":"Lorenz","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexander","family":"Pest","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7829-1256","authenticated-orcid":false,"given":"Daniel","family":"Kienzle","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5721-243X","authenticated-orcid":false,"given":"Katja","family":"Ludwig","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4007-6889","authenticated-orcid":false,"given":"Rainer","family":"Lienhart","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,24]]},"reference":[{"key":"9_CR1","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision - ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.M. (eds.) Computer Vision - ECCV 2020, pp. 213\u2013229. Springer International Publishing, Cham (2020)"},{"key":"9_CR2","doi-asserted-by":"publisher","unstructured":"Chen, L., et al.: Multi-prototype space learning for commonsense-based scene graph generation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.38, no.2, pp. 1129\u20131137 (Mar 2024). https:\/\/doi.org\/10.1609\/aaai.v38i2.27874, https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/27874","DOI":"10.1609\/aaai.v38i2.27874"},{"key":"9_CR3","doi-asserted-by":"publisher","unstructured":"Chen, T., Yu, W., Chen, R., Lin, L.: Knowledge-embedded routing network for scene graph generation. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6156\u20136164 (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00632","DOI":"10.1109\/CVPR.2019.00632"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. arXiv preprint arxiv: 2112.01527v3 (2021)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"9_CR5","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"9_CR6","doi-asserted-by":"publisher","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 2980\u20132988 (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.322","DOI":"10.1109\/ICCV.2017.322"},{"key":"9_CR7","doi-asserted-by":"publisher","unstructured":"Jain, J., Li, J., Chiu, M., Hassani, A., Orlov, N., Shi, H.: OneFormer: one transformer to rule universal image segmentation. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2989\u20132998 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.00292","DOI":"10.1109\/CVPR52729.2023.00292"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Kim, B., Lee, J., Kang, J., Kim, E.S., Kim, H.J.: HOTR: end-to-end human-object interaction detection with transformers. In: CVPR. IEEE (2021)","DOI":"10.1109\/CVPR46437.2021.00014"},{"key":"9_CR9","doi-asserted-by":"publisher","unstructured":"Kirillov, A., He, K., Girshick, R., Rother, C., Doll\u00e1r, P.: Panoptic segmentation. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9396\u20139405 (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00963","DOI":"10.1109\/CVPR.2019.00963"},{"issue":"1","key":"9_CR10","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., et al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vision 123(1), 32\u201373 (2017). https:\/\/doi.org\/10.1007\/s11263-016-0981-7","journal-title":"Int. J. Comput. Vision"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Li, F., et al.: Mask DINO: towards a unified transformer-based framework for object detection and segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3041\u20133050 (June 2023)","DOI":"10.1109\/CVPR52729.2023.00297"},{"key":"9_CR12","doi-asserted-by":"publisher","unstructured":"Li, L., et al.: Panoptic scene graph generation with semantics-prototype learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 38, no. 4, pp. 3145\u20133153 (Mar 2024). https:\/\/doi.org\/10.1609\/aaai.v38i4.28098, https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/28098","DOI":"10.1609\/aaai.v38i4.28098"},{"key":"9_CR13","doi-asserted-by":"publisher","unstructured":"Li, L., Qin, Y., Ji, W., Zhou, Y., Zimmermann, R.: Domain-wise invariant learning for panoptic scene graph generation. In: ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3165\u20133169 (2024). https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10447193","DOI":"10.1109\/ICASSP48485.2024.10447193"},{"key":"9_CR14","doi-asserted-by":"publisher","unstructured":"Liang, N., Liu, Y., Sun, W., Xia, Y., Wang, F.: CKT-RCM: clip-based knowledge transfer and relational context mining for unbiased panoptic scene graph generation. In: ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3570\u20133574 (2024). https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10446810","DOI":"10.1109\/ICASSP48485.2024.10446810"},{"key":"9_CR15","doi-asserted-by":"publisher","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 936\u2013944 (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.106","DOI":"10.1109\/CVPR.2017.106"},{"key":"9_CR16","doi-asserted-by":"publisher","unstructured":"Lin, T.Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) Computer Vision - ECCV 2014, pp. 740\u2013755. Springer International Publishing, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"9_CR17","doi-asserted-by":"crossref","unstructured":"Lin, X., Ding, C., Zeng, J., Tao, D.: GPS-Net: graph property sensing network for scene graph generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (June 2020)","DOI":"10.1109\/CVPR42600.2020.00380"},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Lorenz, J., Barthel, F., Kienzle, D., Lienhart, R.: Haystack: a panoptic scene graph dataset to evaluate rare predicate classes. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) Workshops, pp. 62\u201370 (October 2023)","DOI":"10.1109\/ICCVW60793.2023.00013"},{"key":"9_CR19","doi-asserted-by":"publisher","unstructured":"Lu, C., Krishna, R., Bernstein, M., Fei-Fei, L.: Visual relationship detection with language priors. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) Computer Vision - ECCV 2016, pp. 852\u2013869. Lecture Notes in Computer Science, Springer International Publishing (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_51","DOI":"10.1007\/978-3-319-46448-0_51"},{"key":"9_CR20","doi-asserted-by":"publisher","unstructured":"Ludwig, K., Harzig, P., Lienhart, R.: Detecting arbitrary intermediate keypoints for human pose estimation with vision transformers. In: 2022 IEEE\/CVF Winter Conference on Applications of Computer Vision Workshops (WACVW), pp. 663\u2013671 (2022).https:\/\/doi.org\/10.1109\/WACVW54805.2022.00073","DOI":"10.1109\/WACVW54805.2022.00073"},{"issue":"6","key":"9_CR21","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2017). https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"9_CR22","doi-asserted-by":"publisher","unstructured":"Shao, S., et al.: Objects365: a large-scale, high-quality dataset for object detection. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 8429\u20138438 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00852","DOI":"10.1109\/ICCV.2019.00852"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Tang, K., Zhang, H., Wu, B., Luo, W., Liu, W.: Learning to compose dynamic tree structures for visual contexts. In: Conference on Computer Vision and Pattern Recognition (2019)","DOI":"10.1109\/CVPR.2019.00678"},{"key":"9_CR24","doi-asserted-by":"publisher","unstructured":"Wang, J., Wen, Z., Li, X., Guo, Z., Yang, J., Liu, Z.: Pair then relation: pair-Net for panoptic scene graph generation. https:\/\/doi.org\/10.48550\/arXiv.2307.08699, http:\/\/arxiv.org\/abs\/2307.08699","DOI":"10.48550\/arXiv.2307.08699"},{"key":"9_CR25","unstructured":"Wang, W., et\u00a0al.: The all-seeing project: towards panoptic visual recognition and understanding of the open world. arXiv preprint arXiv:2308.01907 (2023)"},{"key":"9_CR26","doi-asserted-by":"crossref","unstructured":"Xu, D., Zhu, Y., Choy, C., Fei-Fei, L.: Scene graph generation by iterative message passing. In: Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.330"},{"key":"9_CR27","doi-asserted-by":"crossref","unstructured":"Yang, J., Ang, Y.Z., Guo, Z., Zhou, K., Zhang, W., Liu, Z.: Panoptic scene graph generation. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19812-0_11"},{"key":"9_CR28","unstructured":"Yang, J., et al.: 4D panoptic scene graph generation. In: Oh, A., Naumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds.) Advances in Neural Information Processing Systems, vol.\u00a036, pp. 69692\u201369705. Curran Associates, Inc. (2023). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/dc6319dde4fb182b22fb902da9418566-Paper-Conference.pdf"},{"key":"9_CR29","doi-asserted-by":"crossref","unstructured":"Zellers, R., Yatskar, M., Thomson, S., Choi, Y.: Neural motifs: scene graph parsing with global context. In: Conference on Computer Vision and Pattern Recognition (2018)","DOI":"10.1109\/CVPR.2018.00611"},{"key":"9_CR30","doi-asserted-by":"publisher","first-page":"409","DOI":"10.1007\/978-3-031-19812-0_24","volume-title":"Computer Vision - ECCV 2022","author":"A Zhang","year":"2022","unstructured":"Zhang, A., et al.: Fine-grained scene graph generation with data transfer. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision - ECCV 2022, pp. 409\u2013424. Springer Nature Switzerland, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19812-0_24"},{"key":"9_CR31","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Shi, M., Caesar, H.: HiLo: exploiting high low frequency relations for unbiased panoptic scene graph generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 21637\u201321648 (October 2023)","DOI":"10.1109\/ICCV51070.2023.01978"},{"key":"9_CR32","unstructured":"Zhou, Z., Shi, M., Caesar, H.: VLPrompt: vision-language prompting for panoptic scene graph generation. arXiv preprint arXiv:2311.16492 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73030-6_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,25]],"date-time":"2024-11-25T17:13:07Z","timestamp":1732554787000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73030-6_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,24]]},"ISBN":["9783031730290","9783031730306"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73030-6_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,24]]},"assertion":[{"value":"24 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}