{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:17:27Z","timestamp":1772119047241,"version":"3.50.1"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T00:00:00Z","timestamp":1711670400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T00:00:00Z","timestamp":1711670400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1007\/s00138-024-01525-3","type":"journal-article","created":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T07:01:42Z","timestamp":1711695702000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["YOLOMH: you only look once for multi-task driving perception with high efficiency"],"prefix":"10.1007","volume":"35","author":[{"given":"Liu","family":"Fang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sun","family":"Bowen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2837-4727","authenticated-orcid":false,"given":"Miao","family":"Jianxi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Su","family":"Weixing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,3,29]]},"reference":[{"issue":"6","key":"1525_CR1","doi-asserted-by":"publisher","first-page":"550","DOI":"10.1007\/s11633-022-1339-y","volume":"19","author":"D Wu","year":"2022","unstructured":"Wu, D., Liao, M.W., Zhang, W.T., et al.: Yolop: you only look once for panoptic driving perception. Mach. Intell. Res. 19(6), 550\u2013562 (2022)","journal-title":"Mach. Intell. Res."},{"key":"1525_CR2","doi-asserted-by":"crossref","unstructured":"Wang, J., Wu, Q.M., Zhang, N.: You only look at once for real-time and generic multi-task. arxiv preprint arxiv:2310.01641 (2023)","DOI":"10.1109\/TVT.2024.3394350"},{"issue":"8","key":"1525_CR3","doi-asserted-by":"publisher","first-page":"6469","DOI":"10.1109\/JIOT.2020.3043716","volume":"8","author":"L Liu","year":"2020","unstructured":"Liu, L., Lu, S., Zhong, R., et al.: Computing systems for autonomous driving: state of the art and challenges. IEEE Internet Things J. 8(8), 6469\u20136486 (2020)","journal-title":"IEEE Internet Things J."},{"key":"1525_CR4","doi-asserted-by":"crossref","unstructured":"Zou, Z., Chen, K., Shi, Z., et al.: Object detection in 20 years: a survey. Proc. IEEE (2023)","DOI":"10.1109\/JPROC.2023.3238524"},{"key":"1525_CR5","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., et al.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 580\u2013587 (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"1525_CR6","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"1525_CR7","unstructured":"Ren, S., He, K., Girshick, R., et al.: Faster r-cnn: towards real-time object detection with region proposal networks. Adv. Neural Inf. Process. Syst. 28 (2015)"},{"key":"1525_CR8","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., et al.: You only look once: unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"1525_CR9","unstructured":"Redmon, J., Farhadi, A.: Yolov3: an incremental improvement. arxiv preprint arxiv:1804.02767 (2018)"},{"key":"1525_CR10","unstructured":"Bochkovskiy, A., Wang, C.Y., Liao, H.Y.M.: Yolov4: optimal speed and accuracy of object detection. arxiv preprint arxiv:2004.10934 (2020)"},{"key":"1525_CR11","unstructured":"Ge, Z., Liu, S., Wang, F., et al.: Yolox: exceeding yolo series in 2021. arxiv preprint arxiv:2107.08430 (2021)"},{"key":"1525_CR12","doi-asserted-by":"crossref","unstructured":"Wang, C.Y., Bochkovskiy, A., Liao, H.Y.M.: YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7464\u20137475 (2023)","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"1525_CR13","unstructured":"Terven, J., Cordova-Esparza, D.: A comprehensive review of YOLO: from YOLOv1 to YOLOv8 and beyond[J]. arxiv preprint arxiv:2304.00501 (2023)"},{"key":"1525_CR14","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-Assisted Intervention-MICCAI: 18th International Conference, Munich, Germany, October 5\u20139, 2015, Proceedings, Part III 18. Springer 2015, 234\u2013241 (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"issue":"12","key":"1525_CR15","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","volume":"39","author":"V Badrinarayanan","year":"2017","unstructured":"Badrinarayanan, V., Kendall, A., Cipolla, R.: Segnet: a deep convolutional encoder-decoder architecture for image segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 39(12), 2481\u20132495 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1525_CR16","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., et al.: Pyramid scene parsing network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2881\u20132890 (2017)","DOI":"10.1109\/CVPR.2017.660"},{"issue":"2","key":"1525_CR17","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1145\/3140659.3080254","volume":"45","author":"A Parashar","year":"2017","unstructured":"Parashar, A., Rhu, M., Mukkara, A., et al.: SCNN: an accelerator for compressed-sparse convolutional neural networks. ACM SIGARCH Comput. Archit. News 45(2), 27\u201340 (2017)","journal-title":"ACM SIGARCH Comput. Archit. News"},{"key":"1525_CR18","doi-asserted-by":"crossref","unstructured":"Hou, Y., Ma, Z., Liu, C., et al.: Learning lightweight lane detection cnns by self attention distillation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1013\u20131021 (2019)","DOI":"10.1109\/ICCV.2019.00110"},{"key":"1525_CR19","unstructured":"Yu, F., Xian, W., Chen, Y., et al.: Bdd100k: a diverse driving video database with scalable annotation tooling, 2(5), 6. arxiv preprint arxiv:1805.04687 (2018)"},{"key":"1525_CR20","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., et al.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"1525_CR21","doi-asserted-by":"crossref","unstructured":"Yu, F., Koltun, V., Funkhouser, T.: Dilated residual networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 472\u2013480 (2017)","DOI":"10.1109\/CVPR.2017.75"},{"key":"1525_CR22","doi-asserted-by":"crossref","unstructured":"Howard, A., Sandler, M., Chu, G., et al.: Searching for mobilenetv3. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1314\u20131324 (2019)","DOI":"10.1109\/ICCV.2019.00140"},{"key":"1525_CR23","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1525_CR24","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K.I., et al.: The pascal visual object classes (VOC) challenge. Int. J. Comput. Vis. 88, 303\u2013338 (2010)","journal-title":"Int. J. Comput. Vis."},{"key":"1525_CR25","doi-asserted-by":"crossref","unstructured":"Jampani, V., Sun, D., Liu, M.Y., et al.: Superpixel sampling networks. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 352\u2013368 (2018)","DOI":"10.1007\/978-3-030-01234-2_22"},{"key":"1525_CR26","doi-asserted-by":"crossref","unstructured":"Xu, H., Wang, S., Cai, X., et al.: Curvelane-nas: unifying lane-sensitive architecture search and adaptive point blending. In: Computer Vision-ECCV, 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XV 16. Springer International Publishing 2020, 689\u2013704 (2020)","DOI":"10.1007\/978-3-030-58555-6_41"},{"key":"1525_CR27","unstructured":"Han, C., Zhao, Q., Zhang, S., et al.: Yolopv2: better, faster, stronger for panoptic driving perception. arxiv preprint arxiv:2208.11434 (2022)"},{"key":"1525_CR28","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., et al.: Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"1525_CR29","unstructured":"Vu, D., Ngo, B., Phan, H.: Hybridnets: end-to-end perception network. arxiv preprint arxiv:2203.09035 (2022)"},{"key":"1525_CR30","doi-asserted-by":"crossref","unstructured":"Tan, M., Pang, R., Le, Q.V.: Efficientdet: scalable and efficient object detection. In; Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10781\u201310790 (2020)","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"1525_CR31","doi-asserted-by":"crossref","unstructured":"Chen, L.C., Zhu, Y., Papandreou, G., et al.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Proceedings of the European Conference on Computer vision (ECCV), 801\u2013818 (2018)","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"1525_CR32","unstructured":"Zhou, X., Wang, D., Kr\u00e4henb\u00fchl, P.: Objects as points. arxiv preprint arxiv:1904.07850 (2019)"},{"key":"1525_CR33","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., et al.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"issue":"1","key":"1525_CR34","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1109\/TCI.2016.2644865","volume":"3","author":"H Zhao","year":"2016","unstructured":"Zhao, H., Gallo, O., Frosio, I., et al.: Loss functions for image restoration with neural networks. IEEE Trans. Comput. Imaging 3(1), 47\u201357 (2016)","journal-title":"IEEE Trans. Comput. Imaging"},{"key":"1525_CR35","unstructured":"Huang, X., Wang, X., Lv, W., et al.: PP-YOLOv2: a practical object detector. arxiv preprint arxiv:2104.10419 (2021)"},{"key":"1525_CR36","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Wang, P., Liu, W., et al.: Distance-IoU loss: Faster and better learning for bounding box regression. In: Proceedings of the AAAI Conference on Artificial Intelligence 34(07), 12993\u201313000 (2020)","DOI":"10.1609\/aaai.v34i07.6999"},{"issue":"1","key":"1525_CR37","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1109\/TII.2021.3085669","volume":"18","author":"J Li","year":"2021","unstructured":"Li, J., Chen, J., Sheng, B., et al.: Automatic detection and classification system of domestic waste via multimodel cascaded convolutional neural network. IEEE Trans. Ind. Inf. 18(1), 163\u2013173 (2021)","journal-title":"IEEE Trans. Ind. Inf."}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01525-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-024-01525-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01525-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,14]],"date-time":"2024-05-14T00:19:21Z","timestamp":1715645961000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-024-01525-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,29]]},"references-count":37,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2024,5]]}},"alternative-id":["1525"],"URL":"https:\/\/doi.org\/10.1007\/s00138-024-01525-3","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-3724985\/v1","asserted-by":"object"}]},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"value":"0932-8092","type":"print"},{"value":"1432-1769","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3,29]]},"assertion":[{"value":"8 December 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 February 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 February 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 March 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"44"}}