{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T18:27:14Z","timestamp":1743100034996,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":34,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819785049"},{"type":"electronic","value":"9789819785056"}],"license":[{"start":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T00:00:00Z","timestamp":1730937600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T00:00:00Z","timestamp":1730937600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8505-6_16","type":"book-chapter","created":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T22:03:55Z","timestamp":1730930635000},"page":"223-238","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Relation Detection with Transformers for Panoptic Scene Graph Generation"],"prefix":"10.1007","author":[{"given":"Chang","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenchao","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shilin","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liqun","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaotao","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,7]]},"reference":[{"key":"16_CR1","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229. Springer (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, Y., Zhang, Y., Guo, Y.: Panet: A context based predicate association network for scene graph generation. In: 2019 IEEE International Conference on Multimedia and Expo (ICME), pp. 508\u2013513. IEEE (2019)","DOI":"10.1109\/ICME.2019.00094"},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"Cheng, B., Collins, M.D., Zhu, Y., Liu, T., Huang, T.S., Adam, H., Chen, L.C.: Panoptic-deeplab: A simple, strong, and fast baseline for bottom-up panoptic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12475\u201312485 (2020)","DOI":"10.1109\/CVPR42600.2020.01249"},{"key":"16_CR4","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1290\u20131299 (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"16_CR5","first-page":"17864","volume":"34","author":"B Cheng","year":"2021","unstructured":"Cheng, B., Schwing, A., Kirillov, A.: Per-pixel classification is not all you need for semantic segmentation. Adv. Neural. Inf. Process. Syst. 34, 17864\u201317875 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR6","unstructured":"Cong, W., Wang, W., Lee, W.C.: Scene graph generation via conditional random fields. arXiv preprint arXiv:1811.08075 (2018)"},{"key":"16_CR7","doi-asserted-by":"crossref","unstructured":"Dai, B., Zhang, Y., Lin, D.: Detecting visual relationships with deep relational networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3076\u20133086 (2017)","DOI":"10.1109\/CVPR.2017.352"},{"key":"16_CR8","unstructured":"Dinh, L., Sohl-Dickstein, J., Bengio, S.: Density estimation using real nvp. arXiv preprint arXiv:1605.08803 (2016)"},{"key":"16_CR9","unstructured":"Ghosh, S., Burachas, G., Ray, A., Ziskind, A.: Generating natural language explanations for visual question answering using scene graphs and visual attention. arXiv preprint arXiv:1902.05715 (2019)"},{"key":"16_CR10","doi-asserted-by":"crossref","unstructured":"Gkanatsios, N., Pitsikalis, V., Koutras, P., Zlatintsi, A., Maragos, P.: Deeply supervised multimodal attentional translation embeddings for visual relationship detection. In: 2019 IEEE International Conference on Image Processing (ICIP), pp. 1840\u20131844. IEEE (2019)","DOI":"10.1109\/ICIP.2019.8803106"},{"key":"16_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"11","key":"16_CR12","doi-asserted-by":"publisher","first-page":"3820","DOI":"10.1109\/TPAMI.2020.2992222","volume":"43","author":"ZS Hung","year":"2020","unstructured":"Hung, Z.S., Mallya, A., Lazebnik, S.: Contextual translation embedding for visual relationship detection and scene graph generation. IEEE Trans. Pattern Anal. Mach. Intell. 43(11), 3820\u20133832 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR13","doi-asserted-by":"crossref","unstructured":"Johnson, J., Krishna, R., Stark, M., Li, L.J., Shamma, D., Bernstein, M., Fei-Fei, L.: Image retrieval using scene graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3668\u20133678 (2015)","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"16_CR14","doi-asserted-by":"crossref","unstructured":"Khademi, M., Schulte, O.: Deep generative probabilistic graph neural networks for scene graph generation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 11237\u201311245 (2020)","DOI":"10.1609\/aaai.v34i07.6783"},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"Kirillov, A., He, K., Girshick, R., Rother, C., Doll\u00e1r, P.: Panoptic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9404\u20139413 (2019)","DOI":"10.1109\/CVPR.2019.00963"},{"key":"16_CR16","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., Zhu, Y., Groth, O., Johnson, J., Hata, K., Kravitz, J., Chen, S., Kalantidis, Y., Li, L.J., Shamma, D.A., et al.: Visual genome: Connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vision 123, 32\u201373 (2017)","journal-title":"Int. J. Comput. Vision"},{"issue":"4","key":"16_CR17","first-page":"4552","volume":"45","author":"Y Li","year":"2022","unstructured":"Li, Y., Zhao, H., Qi, X., Chen, Y., Qi, L., Wang, L., Li, Z., Sun, J., Jia, J.: Fully convolutional networks for panoptic segmentation with point-based supervision. IEEE Trans. Pattern Anal. Mach. Intell. 45(4), 4552\u20134568 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR18","doi-asserted-by":"crossref","unstructured":"Li, Y., Ouyang, W., Wang, X., Tang, X.: Vip-cnn: Visual phrase guided convolutional neural network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1347\u20131356 (2017)","DOI":"10.1109\/CVPR.2017.766"},{"key":"16_CR19","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"16_CR20","doi-asserted-by":"crossref","unstructured":"Lin, X., Ding, C., Zeng, J., Tao, D.: Gps-net: Graph property sensing network for scene graph generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3746\u20133753 (2020)","DOI":"10.1109\/CVPR42600.2020.00380"},{"key":"16_CR21","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"16_CR22","doi-asserted-by":"crossref","unstructured":"Tang, K., Zhang, H., Wu, B., Luo, W., Liu, W.: Learning to compose dynamic tree structures for visual contexts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6619\u20136628 (2019)","DOI":"10.1109\/CVPR.2019.00678"},{"key":"16_CR23","doi-asserted-by":"crossref","unstructured":"Wang, J., Wen, Z., Li, X., Guo, Z., Yang, J., Liu, Z.: Pair then relation: Pair-net for panoptic scene graph generation. arXiv preprint arXiv:2307.08699 (2023)","DOI":"10.1109\/TPAMI.2024.3442301"},{"key":"16_CR24","doi-asserted-by":"crossref","unstructured":"Wang, R., Wei, Z., Li, P., Zhang, Q., Huang, X.: Storytelling from an image stream using scene graphs. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 9185\u20139192 (2020)","DOI":"10.1609\/aaai.v34i05.6455"},{"key":"16_CR25","doi-asserted-by":"crossref","unstructured":"Wang, W., Wang, R., Shan, S., Chen, X.: Sketching image gist: Human-mimetic hierarchical scene graph generation. In: European Conference on Computer Vision, pp. 222\u2013239. Springer (2020)","DOI":"10.1007\/978-3-030-58601-0_14"},{"key":"16_CR26","first-page":"20508","volume":"33","author":"Y Wu","year":"2020","unstructured":"Wu, Y., Zhang, G., Xu, H., Liang, X., Lin, L.: Auto-panoptic: Cooperative multi-component architecture search for panoptic segmentation. Adv. Neural. Inf. Process. Syst. 33, 20508\u201320519 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR27","doi-asserted-by":"crossref","unstructured":"Xu, D., Zhu, Y., Choy, C.B., Fei-Fei, L.: Scene graph generation by iterative message passing. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5410\u20135419 (2017)","DOI":"10.1109\/CVPR.2017.330"},{"key":"16_CR28","doi-asserted-by":"crossref","unstructured":"Yang, J., Ang, Y.Z., Guo, Z., Zhou, K., Zhang, W., Liu, Z.: Panoptic scene graph generation. In: European Conference on Computer Vision, pp. 178\u2013196. Springer (2022)","DOI":"10.1007\/978-3-031-19812-0_11"},{"key":"16_CR29","doi-asserted-by":"crossref","unstructured":"Yin, G., Sheng, L., Liu, B., Yu, N., Wang, X., Shao, J., Loy, C.C.: Zoom-net: Mining deep feature interactions for visual relationship recognition. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 322\u2013338 (2018)","DOI":"10.1007\/978-3-030-01219-9_20"},{"key":"16_CR30","doi-asserted-by":"crossref","unstructured":"Zellers, R., Yatskar, M., Thomson, S., Choi, Y.: Neural motifs: Scene graph parsing with global context. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5831\u20135840 (2018)","DOI":"10.1109\/CVPR.2018.00611"},{"key":"16_CR31","unstructured":"Zhang, C., Chao, W.L., Xuan, D.: An empirical study on leveraging scene graphs for visual question answering. arXiv preprint arXiv:1907.12133 (2019)"},{"key":"16_CR32","first-page":"10326","volume":"34","author":"W Zhang","year":"2021","unstructured":"Zhang, W., Pang, J., Chen, K., Loy, C.C.: K-net: Towards unified image segmentation. Adv. Neural. Inf. Process. Syst. 34, 10326\u201310338 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR33","doi-asserted-by":"crossref","unstructured":"Zhong, Y., Wang, L., Chen, J., Yu, D., Li, Y.: Comprehensive image captioning via scene graph decomposition. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XIV 16, pp. 211\u2013229. Springer (2020)","DOI":"10.1007\/978-3-030-58568-6_13"},{"key":"16_CR34","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Shi, M., Caesar, H.: Hilo: Exploiting high low frequency relations for unbiased panoptic scene graph generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 21637\u201321648 (2023)","DOI":"10.1109\/ICCV51070.2023.01978"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8505-6_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T22:05:31Z","timestamp":1730930731000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8505-6_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,7]]},"ISBN":["9789819785049","9789819785056"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8505-6_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,7]]},"assertion":[{"value":"7 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}