{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,25]],"date-time":"2026-06-25T15:51:35Z","timestamp":1782402695490,"version":"3.54.5"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031546044","type":"print"},{"value":"9783031546051","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-54605-1_2","type":"book-chapter","created":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T11:43:10Z","timestamp":1709811790000},"page":"18-33","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["A Trimodal Dataset: RGB, Thermal, and\u00a0Depth for\u00a0Human Segmentation and\u00a0Temporal Action Detection"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0482-902X","authenticated-orcid":false,"given":"Christian","family":"Stippel","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3129-5054","authenticated-orcid":false,"given":"Thomas","family":"Heitzinger","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5217-2854","authenticated-orcid":false,"given":"Martin","family":"Kampel","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,3,8]]},"reference":[{"key":"2_CR1","doi-asserted-by":"crossref","unstructured":"Brenner, M., Reyes, N.H., Susnjak, T., Barczak, A.L.: RGB-D and thermal sensor fusion: a systematic literature review. arXiv preprint arXiv:2305.11427 (2023)","DOI":"10.1109\/ACCESS.2023.3301119"},{"key":"2_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"833","DOI":"10.1007\/978-3-030-01234-2_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"L-C Chen","year":"2018","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11211, pp. 833\u2013851. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_49"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"2_CR4","doi-asserted-by":"crossref","unstructured":"Dalal, N., Triggs, B.: Histograms of oriented gradients for human detection. In: 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR 2005), vol. 1, pp. 886\u2013893. IEEE (2005)","DOI":"10.1109\/CVPR.2005.177"},{"key":"2_CR5","doi-asserted-by":"crossref","unstructured":"Davis, J., Keck, M.: A two-stage approach to person detection in thermal imagery. In: Proceeding of Workshop on Applications of Computer Vision (WACV) (2005)","DOI":"10.1109\/ACVMOT.2005.14"},{"key":"2_CR6","doi-asserted-by":"crossref","unstructured":"Donahue, J., et al.: Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2625\u20132634 (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"2_CR7","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"2_CR8","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The PASCAL visual object classes (VOC) challenge. Int. J. Comput. Vision 88, 303\u2013338 (2010)","journal-title":"Int. J. Comput. Vision"},{"key":"2_CR9","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1016\/j.neucom.2016.05.094","volume":"212","author":"C Gao","year":"2016","unstructured":"Gao, C., et al.: Infar dataset: infrared action recognition at different times. Neurocomputing 212, 36\u201347 (2016)","journal-title":"Neurocomputing"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The KITTI vision benchmark suite. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 3354\u20133361. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"2_CR12","unstructured":"Heitzinger, T., Kampel, M.: A foundation for 3d human behavior detection in privacy-sensitive domains. In: 32nd British Machine Vision Conference 2021, BMVC 2021, 22\u201325 November 2021, p. 305. BMVA Press (2021). https:\/\/www.bmvc2021-virtualconference.com\/assets\/papers\/1254.pdf"},{"key":"2_CR13","doi-asserted-by":"crossref","unstructured":"Heitzinger, T., Kampel, M.: IPT: a dataset for identity preserved tracking in closed domains. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 8228\u20138234. IEEE (2021)","DOI":"10.1109\/ICPR48806.2021.9412979"},{"issue":"1","key":"2_CR14","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2012","unstructured":"Ji, S., Xu, W., Yang, M., Yu, K.: 3D convolutional neural networks for human action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 35(1), 221\u2013231 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2_CR15","unstructured":"Jocher, G., Chaurasia, A., Qiu, J.: YOLO by Ultralytics, January 2023. https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"2_CR16","unstructured":"Kirillov, A., et al.: Segment anything. arXiv preprint arXiv:2304.02643 (2023)"},{"key":"2_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"606","DOI":"10.1007\/978-3-030-11024-6_46","volume-title":"Computer Vision \u2013 ECCV 2018 Workshops","author":"VV Kniaz","year":"2019","unstructured":"Kniaz, V.V., Knyaz, V.A., Hlad\u016fvka, J., Kropatsch, W.G., Mizginov, V.: ThermalGAN: multimodal color-to-thermal image translation for person re-identification in multispectral dataset. In: Leal-Taix\u00e9, L., Roth, S. (eds.) ECCV 2018. LNCS, vol. 11134, pp. 606\u2013624. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-11024-6_46"},{"key":"2_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2_CR20","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Video swin transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3202\u20133211 (2022)","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"2_CR21","unstructured":"Miezianko, R.: Terravic research infrared database. In: IEEE OTCBVS WS Series Bench (2005)"},{"key":"2_CR22","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1007\/s11263-016-0901-x","volume":"118","author":"C Palmero","year":"2016","unstructured":"Palmero, C., Clap\u00e9s, A., Bahnsen, C., M\u00f8gelmose, A., Moeslund, T.B., Escalera, S.: Multi-modal RGB-depth-thermal human body segmentation. Int. J. Comput. Vision 118, 217\u2013239 (2016)","journal-title":"Int. J. Comput. Vision"},{"key":"2_CR23","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28 (2015)"},{"key":"2_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-Net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"key":"2_CR25","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: ImageNet large scale visual recognition challenge. Int. J. Comput. Vision 115, 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vision"},{"key":"2_CR26","doi-asserted-by":"crossref","unstructured":"Shahroudy, A., Liu, J., Ng, T.T., Wang, G.: NTU RGB+ D: a large scale dataset for 3d human activity analysis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1010\u20131019 (2016)","DOI":"10.1109\/CVPR.2016.115"},{"key":"2_CR27","doi-asserted-by":"crossref","unstructured":"Shivakumar, S.S., Rodrigues, N., Zhou, A., Miller, I.D., Kumar, V., Taylor, C.J.: PST900: RGB-thermal calibration, dataset and segmentation network. In: 2020 IEEE International Conference on Robotics and Automation (ICRA), pp. 9441\u20139447. IEEE (2020)","DOI":"10.1109\/ICRA40945.2020.9196831"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Sigurdsson, G.A., Divvala, S., Farhadi, A., Gupta, A.: Asynchronous temporal fields for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 585\u2013594 (2017)","DOI":"10.1109\/CVPR.2017.599"},{"key":"2_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"510","DOI":"10.1007\/978-3-319-46448-0_31","volume-title":"Computer Vision \u2013 ECCV 2016","author":"GA Sigurdsson","year":"2016","unstructured":"Sigurdsson, G.A., Varol, G., Wang, X., Farhadi, A., Laptev, I., Gupta, A.: Hollywood in homes: crowdsourcing data collection for activity understanding. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 510\u2013526. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_31"},{"key":"2_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"746","DOI":"10.1007\/978-3-642-33715-4_54","volume-title":"Computer Vision \u2013 ECCV 2012","author":"N Silberman","year":"2012","unstructured":"Silberman, N., Hoiem, D., Kohli, P., Fergus, R.: Indoor segmentation and support inference from RGBD images. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7576, pp. 746\u2013760. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33715-4_54"},{"key":"2_CR31","doi-asserted-by":"crossref","unstructured":"Strohmayer, J., Kampel, M.: A compact tri-modal camera unit for RGBDT vision. In: 2022 the 5th International Conference on Machine Vision and Applications (ICMVA), pp. 34\u201342 (2022)","DOI":"10.1145\/3523111.3523116"},{"key":"2_CR32","unstructured":"Tkachenko, M., Malyuk, M., Holmanyuk, A., Liubimov, N.: Label studio: data labeling software (2020\u20132022). https:\/\/github.com\/heartexlabs\/label-studio"},{"key":"2_CR33","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3d convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4489\u20134497 (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"2_CR34","doi-asserted-by":"crossref","unstructured":"Wang, C.Y., Bochkovskiy, A., Liao, H.Y.M.: YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7464\u20137475 (2023)","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"2_CR35","unstructured":"Zhang, H., Goodfellow, I., Metaxas, D., Odena, A.: Self-attention generative adversarial networks. In: International Conference on Machine Learning, pp. 7354\u20137363. PMLR (2019)"},{"key":"2_CR36","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., Torralba, A.: Scene parsing through ADE20K dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 633\u2013641 (2017)","DOI":"10.1109\/CVPR.2017.544"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-54605-1_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,13]],"date-time":"2024-11-13T23:22:37Z","timestamp":1731540157000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-54605-1_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031546044","9783031546051"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-54605-1_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"8 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DAGM GCPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"DAGM German Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Heidelberg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"45","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dagm2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.dagm-gcpr.de\/year\/2023","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"76","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"40","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"53% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}