{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T20:18:35Z","timestamp":1743106715476,"version":"3.40.3"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030926588"},{"type":"electronic","value":"9783030926595"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-92659-5_13","type":"book-chapter","created":{"date-parts":[[2022,1,13]],"date-time":"2022-01-13T07:09:18Z","timestamp":1642057758000},"page":"206-221","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Video Instance Segmentation with\u00a0Recurrent Graph Neural Networks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2553-3367","authenticated-orcid":false,"given":"Joakim","family":"Johnander","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0418-9694","authenticated-orcid":false,"given":"Emil","family":"Brissman","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6144-9520","authenticated-orcid":false,"given":"Martin","family":"Danelljan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6096-3648","authenticated-orcid":false,"given":"Michael","family":"Felsberg","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,1,13]]},"reference":[{"key":"13_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"158","DOI":"10.1007\/978-3-030-58621-8_10","volume-title":"Computer Vision \u2013 ECCV 2020","author":"A Athar","year":"2020","unstructured":"Athar, A., Mahadevan, S., Os\u0306ep, A., Leal-Taix\u00e9, L., Leibe, B.: STEm-Seg: spatio-temporal embeddings for instance segmentation in videos. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12356, pp. 158\u2013177. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58621-8_10"},{"key":"13_CR2","unstructured":"Battaglia, P.W., et al.: Relational inductive biases, deep learning, and graph networks. CoRR abs\/1806.01261 (2018)"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Benenson, R., Popov, S., Ferrari, V.: Large-scale interactive object segmentation with human annotators. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01197"},{"key":"13_CR4","doi-asserted-by":"crossref","unstructured":"Berg, A., Johnander, J., Durand de Gevigney, F., Ahlberg, J., Felsberg, M.: Semi-automatic annotation of objects in visual-thermal video. In: Proceedings of the IEEE International Conference on Computer Vision Workshops (2019)","DOI":"10.1109\/ICCVW.2019.00277"},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Berman, M., Rannen Triki, A., Blaschko, M.B.: The lov\u00e1sz-softmax loss: a tractable surrogate for the optimization of the intersection-over-union measure in neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4413\u20134421 (2018)","DOI":"10.1109\/CVPR.2018.00464"},{"key":"13_CR6","doi-asserted-by":"crossref","unstructured":"Bertasius, G., Torresani, L.: Classifying, segmenting, and tracking object instances in video with mask propagation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9739\u20139748 (2020)","DOI":"10.1109\/CVPR42600.2020.00976"},{"key":"13_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"342","DOI":"10.1007\/978-3-030-01258-8_21","volume-title":"Computer Vision \u2013 ECCV 2018","author":"G Bertasius","year":"2018","unstructured":"Bertasius, G., Torresani, L., Shi, J.: Object detection in video with spatiotemporal sampling networks. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11216, pp. 342\u2013357. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01258-8_21"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Bolya, D., Zhou, C., Xiao, F., Lee, Y.J.: YOLACT: real-time instance segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 9157\u20139166 (2019)","DOI":"10.1109\/ICCV.2019.00925"},{"key":"13_CR9","doi-asserted-by":"crossref","unstructured":"Bras\u00f3, G., Leal-Taix\u00e9, L.: Learning a neural solver for multiple object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6247\u20136257 (2020)","DOI":"10.1109\/CVPR42600.2020.00628"},{"issue":"3","key":"13_CR10","doi-asserted-by":"publisher","first-page":"305","DOI":"10.1049\/ip-vis:20050052","volume":"153","author":"T Burghardt","year":"2006","unstructured":"Burghardt, T., \u0106ali\u0107, J.: Analysing animal behaviour in wildlife videos using face detection and tracking. IEE Proc.-Vis. Image Signal Process. 153(3), 305\u2013312 (2006)","journal-title":"IEE Proc.-Vis. Image Signal Process."},{"key":"13_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-030-58568-6_1","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Cao","year":"2020","unstructured":"Cao, J., Anwer, R.M., Cholakkal, H., Khan, F.S., Pang, Y., Shao, L.: SipMask: spatial information preservation for fast image and video instance segmentation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12359, pp. 1\u201318. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58568-6_1"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Chen, K., et al.: Hybrid task cascade for instance segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4974\u20134983 (2019)","DOI":"10.1109\/CVPR.2019.00511"},{"key":"13_CR13","doi-asserted-by":"crossref","unstructured":"Cho, K., van Merri\u00ebnboer, B., Bahdanau, D., Bengio, Y.: On the properties of neural machine translation: encoder-decoder approaches. Syntax, Semantics and Structure in Statistical Translation, p. 103 (2014)","DOI":"10.3115\/v1\/W14-4012"},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"issue":"10","key":"13_CR15","doi-asserted-by":"publisher","first-page":"2222","DOI":"10.1109\/TNNLS.2016.2582924","volume":"28","author":"K Greff","year":"2016","unstructured":"Greff, K., Srivastava, R.K., Koutn\u00edk, J., Steunebrink, B.R., Schmidhuber, J.: LSTM: a search space odyssey. IEEE Trans. Neural Netw. Learn. Syst. 28(10), 2222\u20132232 (2016)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"13_CR16","unstructured":"Han, W., et al.: SEQ-NMS for video object detection. arXiv preprint arXiv:1602.08465 (2016)"},{"key":"13_CR17","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"8","key":"13_CR18","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"13_CR19","doi-asserted-by":"crossref","unstructured":"Izquierdo, R., Quintanar, A., Parra, I., Fern\u00e1ndez-Llorca, D., Sotelo, M.: The prevention dataset: a novel benchmark for prediction of vehicles intentions. In: 2019 IEEE Intelligent Transportation Systems Conference (ITSC), pp. 3114\u20133121. IEEE (2019)","DOI":"10.1109\/ITSC.2019.8917433"},{"key":"13_CR20","doi-asserted-by":"publisher","first-page":"1956","DOI":"10.1007\/s11263-020-01316-z","volume":"128","author":"A Kuznetsova","year":"2020","unstructured":"Kuznetsova, A., et al.: The open images dataset v4: unified image classification, object detection, and visual relationship detection at scale. IJCV 128, 1956\u20131981 (2020)","journal-title":"IJCV"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Luiten, J., Torr, P., Leibe, B.: Video instance segmentation 2019: a winning approach for combined detection, segmentation, classification and tracking. In: Proceedings of the IEEE International Conference on Computer Vision Workshops (2019)","DOI":"10.1109\/ICCVW.2019.00088"},{"key":"13_CR22","doi-asserted-by":"crossref","unstructured":"Luiten, J., Zulfikar, I.E., Leibe, B.: UnOVOST: unsupervised offline video object segmentation and tracking. In: 2020 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 1989\u20131998. IEEE (2020)","DOI":"10.1109\/WACV45572.2020.9093285"},{"key":"13_CR23","unstructured":"Murphy, K.P.: Conjugate Bayesian analysis of the Gaussian distribution. def 1(2$$\\sigma $$2), 16 (2007)"},{"key":"13_CR24","unstructured":"Paszke, A., et al.: Automatic differentiation in PyTorch (2017)"},{"key":"13_CR25","doi-asserted-by":"crossref","unstructured":"Sarlin, P.E., DeTone, D., Malisiewicz, T., Rabinovich, A.: SuperGlue: learning feature matching with graph neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4938\u20134947 (2020)","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"T\u2019Jampens, R., Hernandez, F., Vandecasteele, F., Verstockt, S.: Automatic detection, tracking and counting of birds in marine video content. In: 2016 Sixth International Conference on Image Processing Theory, Tools and Applications (IPTA), pp. 1\u20136. IEEE (2016)","DOI":"10.1109\/IPTA.2016.7821031"},{"key":"13_CR27","doi-asserted-by":"crossref","unstructured":"Voigtlaender, P., Chai, Y., Schroff, F., Adam, H., Leibe, B., Chen, L.C.: FEELVOS: Fast end-to-end embedding learning for video object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 9481\u20139490 (2019)","DOI":"10.1109\/CVPR.2019.00971"},{"key":"13_CR28","doi-asserted-by":"publisher","unstructured":"Weng, X., Wang, Y., Man, Y., Kitani, K.M.: GNN3DMOT: graph neural network for 3d multi-object tracking with 2d\u20133d multi-feature learning. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2020, Seattle, WA, USA, June 13\u201319, 2020, pp. 6498\u20136507. IEEE (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.00653","DOI":"10.1109\/CVPR42600.2020.00653"},{"key":"13_CR29","doi-asserted-by":"crossref","unstructured":"Wojke, N., Bewley, A., Paulus, D.: Simple online and realtime tracking with a deep association metric. In: 2017 IEEE International Conference on Image Processing (ICIP), pp. 3645\u20133649. IEEE (2017)","DOI":"10.1109\/ICIP.2017.8296962"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Yang, L., Fan, Y., Xu, N.: Video instance segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5188\u20135197 (2019)","DOI":"10.1109\/ICCV.2019.00529"},{"key":"13_CR31","doi-asserted-by":"crossref","unstructured":"Yang, L., Wang, Y., Xiong, X., Yang, J., Katsaggelos, A.K.: Efficient video object segmentation via network modulation. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00680"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Yu, F., et al.: Bdd100k: A diverse driving dataset for heterogeneous multitask learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2636\u20132645 (2020)","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"13_CR33","doi-asserted-by":"crossref","unstructured":"Zhang, X.Y., Wu, X.J., Zhou, X., Wang, X.G., Zhang, Y.Y.: Automatic detection and tracking of maneuverable birds in videos. In: 2008 International Conference on Computational Intelligence and Security, vol. 1, pp. 185\u2013189. IEEE (2008)","DOI":"10.1109\/CIS.2008.46"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-92659-5_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,5]],"date-time":"2022-05-05T14:47:11Z","timestamp":1651762031000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-92659-5_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030926588","9783030926595"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-92659-5_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"13 January 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DAGM GCPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"DAGM German Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bonn","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 October 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"43","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dagm2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.dagm-gcpr.de\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"116","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"46","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"40% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.95","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}