{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T12:54:06Z","timestamp":1743080046677,"version":"3.40.3"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031490170"},{"type":"electronic","value":"9783031490187"}],"license":[{"start":{"date-parts":[[2023,11,27]],"date-time":"2023-11-27T00:00:00Z","timestamp":1701043200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,27]],"date-time":"2023-11-27T00:00:00Z","timestamp":1701043200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-49018-7_40","type":"book-chapter","created":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T23:02:21Z","timestamp":1701039741000},"page":"564-574","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["YOLOMM \u2013 You Only\u00a0Look Once\u00a0for\u00a0Multi-modal Multi-tasking"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-4753-8846","authenticated-orcid":false,"given":"Filipe","family":"Campos","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3725-2707","authenticated-orcid":false,"given":"Francisco Gon\u00e7alves","family":"Cerqueira","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5189-6228","authenticated-orcid":false,"given":"Ricardo P. M.","family":"Cruz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3760-2473","authenticated-orcid":false,"given":"Jaime S.","family":"Cardoso","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,27]]},"reference":[{"issue":"8\u20139","key":"40_CR1","doi-asserted-by":"publisher","first-page":"959","DOI":"10.1177\/02783649211006735","volume":"40","author":"J Behley","year":"2021","unstructured":"Behley, J., et al.: Towards 3D LiDAR-based semantic scene understanding of 3D point cloud sequences: the SemanticKITTI dataset. Int. J. Robot. Res. 40(8\u20139), 959\u2013967 (2021). https:\/\/doi.org\/10.1177\/02783649211006735","journal-title":"Int. J. Robot. Res."},{"key":"40_CR2","doi-asserted-by":"crossref","unstructured":"Bolya, D., Zhou, C., Xiao, F., Lee, Y.J.: YOLACT: real-time instance segmentation (2019)","DOI":"10.1109\/ICCV.2019.00925"},{"key":"40_CR3","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: nuScenes: a multimodal dataset for autonomous driving (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"40_CR4","doi-asserted-by":"publisher","unstructured":"Chan, C.Y.: Advancements, prospects, and impacts of automated driving systems. Int. J. Transp. Sci. Technol. 6(3), 208\u2013216 (2017). https:\/\/doi.org\/10.1016\/j.ijtst.2017.07.008, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S2046043017300035. safer Road Infrastructure and Operation Management","DOI":"10.1016\/j.ijtst.2017.07.008"},{"key":"40_CR5","doi-asserted-by":"crossref","unstructured":"Chen, X., Ma, H., Wan, J., Li, B., Xia, T.: Multi-view 3D object detection network for autonomous driving. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1907\u20131915 (2017)","DOI":"10.1109\/CVPR.2017.691"},{"key":"40_CR6","doi-asserted-by":"crossref","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"40_CR7","unstructured":"Deschaud, J.E.: KITTI-CARLA: a KITTI-like dataset generated by CARLA Simulator. arXiv e-prints: arXiv:2109.00892 (2021)"},{"issue":"70","key":"40_CR8","doi-asserted-by":"publisher","first-page":"4101","DOI":"10.21105\/joss.04101","volume":"7","author":"NS Detlefsen","year":"2022","unstructured":"Detlefsen, N.S., et al.: TorchMetrics - measuring reproducibility in PyTorch. J. Open Sour. Softw. 7(70), 4101 (2022). https:\/\/doi.org\/10.21105\/joss.04101","journal-title":"J. Open Sour. Softw."},{"key":"40_CR9","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The KITTI vision benchmark suite. In: Conference on Computer Vision and Pattern Recognition (CVPR) (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"40_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition (2015)","DOI":"10.1109\/CVPR.2016.90"},{"key":"40_CR11","doi-asserted-by":"crossref","unstructured":"Heuer, F., Mantowsky, S., Bukhari, S.S., Schneider, G.: MultiTask-CenterNet (MCN): efficient and diverse multitask learning using an anchor free approach (2021)","DOI":"10.1109\/ICCVW54120.2021.00116"},{"issue":"6","key":"40_CR12","doi-asserted-by":"publisher","first-page":"2877","DOI":"10.3390\/app12062877","volume":"12","author":"DG Lee","year":"2022","unstructured":"Lee, D.G., Kim, Y.K.: Joint semantic understanding with a multilevel branch for driving perception. Appl. Sci. 12(6), 2877 (2022). https:\/\/doi.org\/10.3390\/app12062877","journal-title":"Appl. Sci."},{"key":"40_CR13","doi-asserted-by":"crossref","first-page":"3292","DOI":"10.1109\/TPAMI.2022.3179507","volume":"45","author":"Y Liao","year":"2022","unstructured":"Liao, Y., Xie, J., Geiger, A.: KITTI-360: a novel dataset and benchmarks for urban scene understanding in 2D and 3D. Pattern Anal. Mach. Intell. (PAMI) 45, 3292\u2013310 (2022)","journal-title":"Pattern Anal. Mach. Intell. (PAMI)"},{"key":"40_CR14","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"40_CR15","doi-asserted-by":"publisher","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 936\u2013944 (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.106","DOI":"10.1109\/CVPR.2017.106"},{"key":"40_CR16","doi-asserted-by":"crossref","unstructured":"Liu, S., Qi, L., Qin, H., Shi, J., Jia, J.: Path aggregation network for instance segmentation (2018)","DOI":"10.1109\/CVPR.2018.00913"},{"key":"40_CR17","doi-asserted-by":"crossref","unstructured":"Milioto, A., Vizzo, I., Behley, J., Stachniss, C.: RangeNet++: fast and accurate LiDAR semantic segmentation. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS) (2019)","DOI":"10.1109\/IROS40897.2019.8967762"},{"key":"40_CR18","doi-asserted-by":"crossref","unstructured":"Paek, D.H., Kong, S.H., Wijaya, K.T.: K-lane: lidar lane dataset and benchmark for urban roads and highways. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshop on Autonomous Driving (WAD) (2022)","DOI":"10.1109\/CVPRW56347.2022.00491"},{"key":"40_CR19","doi-asserted-by":"crossref","unstructured":"Sheeny, M., De Pellegrin, E., Mukherjee, S., Ahrabian, A., Wang, S., Wallace, A.: RADIATE: a radar dataset for automotive perception. arXiv preprint: arXiv:2010.09076 (2020)","DOI":"10.1109\/ICRA48506.2021.9562089"},{"key":"40_CR20","doi-asserted-by":"crossref","unstructured":"Sun, P., et al.: Scalability in perception for autonomous driving: Waymo open dataset. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"40_CR21","unstructured":"Vu, D., Ngo, B., Phan, H.: HybridNets: end-to-end perception network (2022)"},{"key":"40_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11633-022-1339-y","volume":"19","author":"D Wu","year":"2022","unstructured":"Wu, D., et al.: YOLOP: you only look once for panoptic driving perception. Mach. Intell. Res. 19, 1\u201313 (2022)","journal-title":"Mach. Intell. Res."},{"key":"40_CR23","doi-asserted-by":"crossref","unstructured":"Yu, F., et al.: BDD100K: a diverse driving dataset for heterogeneous multitask learning (2020)","DOI":"10.1109\/CVPR42600.2020.00271"}],"container-title":["Lecture Notes in Computer Science","Progress in Pattern Recognition, Image Analysis, Computer Vision, and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-49018-7_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T10:59:37Z","timestamp":1730631577000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-49018-7_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,27]]},"ISBN":["9783031490170","9783031490187"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-49018-7_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023,11,27]]},"assertion":[{"value":"27 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CIARP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Iberoamerican Congress on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Coimbra","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 November 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ciarp2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Conftool","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"106","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"61","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"58% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}