{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T00:32:29Z","timestamp":1768005149231,"version":"3.49.0"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031704413","type":"print"},{"value":"9783031704420","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70442-0_20","type":"book-chapter","created":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T08:09:40Z","timestamp":1725955780000},"page":"327-343","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Full-Page Music Symbols Recognition: State-of-the-Art Deep Model Comparison for\u00a0Handwritten and\u00a0Printed Music Scores"],"prefix":"10.1007","author":[{"given":"Ali","family":"Yesilkanat","sequence":"first","affiliation":[]},{"given":"Yann","family":"Soullard","sequence":"additional","affiliation":[]},{"given":"Bertrand","family":"Co\u00fcasnon","sequence":"additional","affiliation":[]},{"given":"Nathalie","family":"Girard","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,11]]},"reference":[{"key":"20_CR1","unstructured":"Bai, Y., Mei, J., Yuille, A.L., Xie, C.: Are transformers more robust than CNNs? In: Ranzato, M., Beygelzimer, A., Dauphin, Y., Liang, P., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems - NeurIPS, vol.\u00a034, pp. 26831\u201326843. Curran Associates, Inc. (2021)"},{"key":"20_CR2","doi-asserted-by":"crossref","unstructured":"Cai, Z., Vasconcelos, N.: Cascade R-CNN: delving into high quality object detection. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6154\u20136162 (2018)","DOI":"10.1109\/CVPR.2018.00644"},{"key":"20_CR3","doi-asserted-by":"crossref","unstructured":"Calvo-Zaragoza Jr., J., J.H., Pacha, A.: Understanding optical music recognition. ACM Comput. Surv. (CSUR) 53(4), 1\u201335 (2020)","DOI":"10.1145\/3397499"},{"key":"20_CR4","doi-asserted-by":"publisher","unstructured":"Calvo-Zaragoza, J., Rizo, D.: End-to-end neural optical music recognition of monophonic scores. Appl. Sci. (2018). https:\/\/doi.org\/10.3390\/app8040606","DOI":"10.3390\/app8040606"},{"key":"20_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"issue":"2","key":"20_CR6","doi-asserted-by":"publisher","first-page":"936","DOI":"10.1109\/TSMC.2020.3005231","volume":"52","author":"G Chen","year":"2022","unstructured":"Chen, G., et al.: A survey of the four pillars for small object detection: multiscale representation, contextual information, super-resolution, and region proposal. IEEE Trans. Syst. Man Cybern.: Syst. 52(2), 936\u2013953 (2022). https:\/\/doi.org\/10.1109\/TSMC.2020.3005231","journal-title":"IEEE Trans. Syst. Man Cybern.: Syst."},{"key":"20_CR7","doi-asserted-by":"publisher","unstructured":"Co\u00fcasnon, B.: DMOS: a generic document recognition method, application to an automatic generator of musical scores, mathematical formulae and table structures recognition systems. In: 6th International Conference on Document Analysis and Recognition (ICDAR 2001), pp. 215\u2013220. IEEE Computer Society (2001). https:\/\/doi.org\/10.1109\/ICDAR.2001.953786","DOI":"10.1109\/ICDAR.2001.953786"},{"issue":"2\u20133","key":"20_CR8","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1007\/s10032-005-0148-5","volume":"8","author":"B Co\u00fcasnon","year":"2006","unstructured":"Co\u00fcasnon, B.: DMOS, a generic document recognition method: application to table structure analysis in a general and in a specific way. Int. J. Doc. Anal. Recognit. 8(2\u20133), 111\u2013122 (2006). https:\/\/doi.org\/10.1007\/s10032-005-0148-5","journal-title":"Int. J. Doc. Anal. Recognit."},{"key":"20_CR9","unstructured":"Co\u00fcasnon, B., R\u00e9tif, B.: Using a grammar for a reliable full score recognition system. In: Proceedings of the 1995 International Computer Music Conference, ICMC 1995, pp. 187\u2013194 (1995)"},{"key":"20_CR10","unstructured":"Dai, J., Li, Y., He, K., Sun, J.: R-FCN: object detection via region-based fully convolutional networks. In: Proceedings of the NIPS, pp. 379\u2013387 (2016)"},{"key":"20_CR11","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: Proceedings of the CVPR, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"20_CR12","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, S.M.A., Van Gool, L., Williams, C.K.I., Winn, J., Zisserman, A.: The pascal visual object classes challenge: a retrospective. IJCV 111, 98\u2013136 (2015). https:\/\/doi.org\/10.1007\/s11263-014-0733-5","journal-title":"IJCV"},{"key":"20_CR13","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1007\/s10032-011-0168-2","volume":"15","author":"A Forn\u00e9s","year":"2012","unstructured":"Forn\u00e9s, A., Dutta, A., Gordo, A., Llad\u00f3s, J.: CVC-MUSCIMA: a ground truth of handwritten music score images for writer identification and staff removal. IJDAR 15, 243\u2013251 (2012). https:\/\/doi.org\/10.1007\/s10032-011-0168-2","journal-title":"IJDAR"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the CVPR (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"20_CR15","unstructured":"Haji\u010d\u00a0Jr., J., Pecina, P.: Detecting noteheads in handwritten scores with convnets and bounding box regression. arXiv preprint arXiv:1708.01806 (2017)"},{"key":"20_CR16","doi-asserted-by":"crossref","unstructured":"Haji\u010d, J., Pecina, P.: The MUSCIMA++ dataset for handwritten optical music recognition. In: Proceedings of the ICDAR, pp. 39\u201346 (2017)","DOI":"10.1109\/ICDAR.2017.16"},{"key":"20_CR17","unstructured":"Haji\u010d\u00a0Jr, J., Dorfer, M., Widmer, G., Pecina, P.: Towards full-pipeline handwritten OMR with musical symbol detection by U-Nets. In: ISMIR (2018)"},{"key":"20_CR18","doi-asserted-by":"publisher","first-page":"2645","DOI":"10.3390\/app9132645","volume":"9","author":"Z Huang","year":"2019","unstructured":"Huang, Z., Jia, X., Guo, Y.: State-of-the-art model for music object recognition with deep learning. Appl. Sci. 9, 2645 (2019). https:\/\/doi.org\/10.3390\/app9132645","journal-title":"Appl. Sci."},{"key":"20_CR19","unstructured":"Ilya, L., Frank, H., et\u00a0al.: Decoupled weight decay regularization. In: ICLR (2019)"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the ICCV, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"20_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"20_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"W Liu","year":"2016","unstructured":"Liu, W., et al.: SSD: single shot multibox detector. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 21\u201337. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2"},{"key":"20_CR23","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"20_CR24","unstructured":"MuseScore: Free music composition and notation software $$|$$ musescore (2023). https:\/\/musescore.org. Accessed 23 June 2023"},{"key":"20_CR25","doi-asserted-by":"crossref","unstructured":"Pacha, A., Choi, K.Y., Co\u00fcasnon, B., Ricquebourg, Y., Zanibbi, R., Eidenberger, H.: Handwritten music object detection: open issues and baseline results. In: Proceedings of the DAS, pp. 163\u2013168 (2018)","DOI":"10.1109\/DAS.2018.51"},{"key":"20_CR26","doi-asserted-by":"publisher","first-page":"1488","DOI":"10.3390\/app8091488","volume":"8","author":"A Pacha","year":"2018","unstructured":"Pacha, A., Haji\u010d, J., Calvo-Zaragoza, J.: A baseline for general music object detection with deep learning. Appl. Sci. 8, 1488 (2018). https:\/\/doi.org\/10.3390\/app8091488","journal-title":"Appl. Sci."},{"key":"20_CR27","doi-asserted-by":"crossref","unstructured":"Redmon, J., Farhadi, A.: YOLO9000: better, faster, stronger. In: Proceedings of the CVPR, pp. 7263\u20137271 (2017)","DOI":"10.1109\/CVPR.2017.690"},{"key":"20_CR28","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. IEEE TPAMI 39, 1137\u20131149 (2017). https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","journal-title":"IEEE TPAMI"},{"key":"20_CR29","doi-asserted-by":"publisher","unstructured":"Ru, Y.: Computer assisted chord detection using deep learning and YOLOv4 neural network model. JPCS (2021). https:\/\/doi.org\/10.1088\/1742-6596\/2083\/4\/042017","DOI":"10.1088\/1742-6596\/2083\/4\/042017"},{"key":"20_CR30","unstructured":"Shatri, E., Fazekas, G.: DoReMi: first glance at a universal OMR dataset. arXiv preprint arXiv:2107.07786 (2021)"},{"key":"20_CR31","unstructured":"Shatri, E., Fazekas, G.: Optical music recognition: state of the art and major challenges. In: Gottfried, R., Hajdu, G., Sello, J., Anatrini, A., MacCallum, J. (eds.) Proceedings of the International Conference on Technologies for Music Notation and Representation \u2013 TENOR 2020\/21, pp. 175\u2013184. Hamburg University for Music and Theater, Hamburg (2020)"},{"key":"20_CR32","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Ioffe, S., Vanhoucke, V., Alemi, A.A.: Inception-v4, Inception-ResNet and the impact of residual connections on learning. In: AAAI (2017)","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"20_CR33","doi-asserted-by":"crossref","unstructured":"Tuggener, L., Elezi, I., Schmidhuber, J., Pelillo, M., Stadelmann, T.: DeepScores-a dataset for segmentation, detection and classification of tiny objects. In: Proceedings of the ICPR, pp. 3704\u20133709 (2018)","DOI":"10.1109\/ICPR.2018.8545307"},{"key":"20_CR34","unstructured":"Tuggener, L., Elezi, I., Schmidhuber, J., Stadelmann, T.: Deep watershed detector for music object recognition. In: Proceedings of the ISMIR, pp. 271\u2013278 (2018)"},{"key":"20_CR35","doi-asserted-by":"crossref","unstructured":"Tuggener, L., Satyawan, Y.P., Pacha, A., Schmidhuber, J., Stadelmann, T.: The DeepScoresV2 dataset and benchmark for music object detection. In: Proceedings of the ICPR, pp. 9188\u20139195 (2021)","DOI":"10.1109\/ICPR48806.2021.9412290"},{"key":"20_CR36","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J Wang","year":"2021","unstructured":"Wang, J., et al.: Deep high-resolution representation learning for visual recognition. TPAMI 43, 3349\u20133364 (2021). https:\/\/doi.org\/10.1109\/TPAMI.2020.2983686","journal-title":"TPAMI"},{"key":"20_CR37","unstructured":"Yang, J., Li, C., Dai, X., Gao, J.: Focal modulation networks. In: Proceedings of the NeurIPS, pp. 4203\u20134217 (2022)"},{"key":"20_CR38","doi-asserted-by":"publisher","first-page":"665","DOI":"10.1007\/s00138-017-0844-4","volume":"28","author":"JC Zaragoza","year":"2017","unstructured":"Zaragoza, J.C., Pertusa, A., Oncina, J.: Staff-line detection and removal using a convolutional neural network. Mach. Vis. Appl. 28, 665\u2013674 (2017). https:\/\/doi.org\/10.1007\/s00138-017-0844-4","journal-title":"Mach. Vis. Appl."},{"key":"20_CR39","unstructured":"Zaragoza, J.C., Vigliensoni, G., Fujinaga, I.: A machine learning framework for the categorization of elements in images of musical documents. In: TENOR (2017)"},{"key":"20_CR40","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: DINO: DETR with improved denoising anchor boxes for end-to-end object detection. In: Proceedings of the ICLR, pp. 7329\u20137338 (2023)","DOI":"10.1109\/CVPR52729.2023.00708"},{"key":"20_CR41","doi-asserted-by":"publisher","first-page":"9773","DOI":"10.1007\/s00521-023-08216-6","volume":"35","author":"Y Zhang","year":"2023","unstructured":"Zhang, Y., Huang, Z., Zhang, Y., Ren, K.: A detector for page-level handwritten music object recognition based on deep learning. Neural Comput. Appl. 35, 9773\u20139787 (2023). https:\/\/doi.org\/10.1007\/s00521-023-08216-6","journal-title":"Neural Comput. Appl."}],"container-title":["Lecture Notes in Computer Science","Document Analysis Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70442-0_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T23:12:41Z","timestamp":1732749161000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70442-0_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031704413","9783031704420"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70442-0_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"11 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"DAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Document Analysis Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"das2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/das2024.seecs.edu.pk\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}