{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T16:23:24Z","timestamp":1747153404825,"version":"3.40.5"},"publisher-location":"Cham","reference-count":19,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031734960"},{"type":"electronic","value":"9783031734977"}],"license":[{"start":{"date-parts":[[2024,11,16]],"date-time":"2024-11-16T00:00:00Z","timestamp":1731715200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,16]],"date-time":"2024-11-16T00:00:00Z","timestamp":1731715200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73497-7_8","type":"book-chapter","created":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T03:59:37Z","timestamp":1731643177000},"page":"91-102","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Evaluation of\u00a0Lyrics Extraction from\u00a0Folk Music Sheets Using Vision Language Models (VLMs)"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0976-2784","authenticated-orcid":false,"given":"Andr\u00e9","family":"Sales Mendes","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0493-4471","authenticated-orcid":false,"given":"\u00c1lvaro","family":"Lozano Murciego","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9981-4586","authenticated-orcid":false,"given":"Lu\u00eds Augusto","family":"Silva","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0291-7627","authenticated-orcid":false,"given":"Diego M.","family":"Jim\u00e9nez-Bravo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2344-1012","authenticated-orcid":false,"given":"Mar\u00eda","family":"Navarro-C\u00e1ceres","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3884-2687","authenticated-orcid":false,"given":"Gilberto","family":"Bernardes","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,16]]},"reference":[{"key":"8_CR1","unstructured":"ABBYY: ABBYY FineReader (2023). https:\/\/www.abbyy.com\/en-eu\/finereader. Accessed 25 May 2024"},{"key":"8_CR2","doi-asserted-by":"crossref","unstructured":"Andrea, P., Zahra, A.: Music note position recognition in optical music recognition using convolutional neural network. Int. J. Arts Technol. 13(1), 45\u201360 (2021)","DOI":"10.1504\/IJART.2021.115764"},{"key":"8_CR3","unstructured":"Audiveris: Audiveris omr (2021). https:\/\/audiveris.github.io\/audiveris. Accessed 25 May 2024"},{"key":"8_CR4","unstructured":"Biblioteca Nacional: Presentaci\u00f3n del fondo de m\u00fasica tradicional imf-csic en madrid (2024). https:\/\/www.musicatradicional.eu\/. Accessed 25 May 2024"},{"key":"8_CR5","doi-asserted-by":"publisher","unstructured":"Cai, J., Peng, L., Tang, Y., Liu, C., Li, P.: Th-GAN: generative adversarial network based transfer learning for historical Chinese character recognition. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 178\u2013183 (2019). https:\/\/doi.org\/10.1109\/ICDAR.2019.00037","DOI":"10.1109\/ICDAR.2019.00037"},{"key":"8_CR6","doi-asserted-by":"publisher","unstructured":"Calvo-Zaragoza, J., Hajic, J., Pacha, A.: Understanding optical music recognition. ACM Comput. Surv. (CSUR) 53(4) (7 2020). https:\/\/doi.org\/10.1145\/3397499, https:\/\/dl.acm.org\/doi\/10.1145\/3397499","DOI":"10.1145\/3397499"},{"key":"8_CR7","unstructured":"Castellanos, F.J., Calvo-Zaragoza, J., Inesta, J.M.: A neural approach for full-page optical music recognition of mensural documents. In: ISMIR, pp. 558\u2013565 (2020)"},{"key":"8_CR8","unstructured":"Google Cloud: Google Cloud Vision OCR (2024). https:\/\/cloud.google.com\/vision. Accessed 25 May 2024"},{"key":"8_CR9","unstructured":"Hu, S., et al.: MiniCPM: Unveiling the Potential of Small Language Models with Scalable Training Strategies (2024). https:\/\/arxiv.org\/abs\/2404.06395v3"},{"key":"8_CR10","doi-asserted-by":"publisher","unstructured":"Huang, Y., Lv, T., Cui, L., Lu, Y., Wei, F.: Layoutlmv3: pre-training for document AI with unified text and image masking. In: Proceedings of the 30th ACM International Conference on Multimedia. MM \u201922, pp. 4083\u20134091. Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3503161.3548112","DOI":"10.1145\/3503161.3548112"},{"key":"8_CR11","doi-asserted-by":"crossref","unstructured":"Lee, S., Son, S.J., Oh, J., Kwak, N.: Handwritten music symbol classification using deep convolutional neural networks. In: 2016 International Conference on Information Science and Security (ICISS), pp.\u00a01\u20135. IEEE (2016)","DOI":"10.1109\/ICISSEC.2016.7885856"},{"key":"8_CR12","unstructured":"Liu, Y., et al.: On the hidden mystery of OCR in large multimodal models (2023). https:\/\/arxiv.org\/abs\/2305.07895v5"},{"key":"8_CR13","doi-asserted-by":"publisher","unstructured":"Mendes, A.S., Lozano\u00a0Murciego, L., Silva, L.A., Jim\u00e9nez\u00a0Bravo, D.M., Navarro\u00a0C\u00e1ceres, M., Bernardes, G.: salesmendesandre\/dataset-Evaluation-of-Lyrics- Extraction-from-Folk-Music-Sheets-Using-Vision- Language-Model: Dataset Release v1 (2024). https:\/\/doi.org\/10.5281\/zenodo.12662916","DOI":"10.5281\/zenodo.12662916"},{"key":"8_CR14","unstructured":"OpenAI: OpenAI API (2024). https:\/\/platform.openai.com\/docs\/models\/gpt-4o. Accessed 03 July 2024"},{"key":"8_CR15","doi-asserted-by":"publisher","unstructured":"Saluja, R., Punjabi, M., Carman, M., Ramakrishnan, G., Chaudhuri, P.: Sub-word embeddings for OCR corrections in highly fusional Indic languages. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 160\u2013165 (2019). https:\/\/doi.org\/10.1109\/ICDAR.2019.00034","DOI":"10.1109\/ICDAR.2019.00034"},{"key":"8_CR16","doi-asserted-by":"publisher","unstructured":"Smith, R.: An overview of the tesseract OCR engine. In: Ninth International Conference on Document Analysis and Recognition (ICDAR 2007), vol.\u00a02, pp. 629\u2013633 (2007). https:\/\/doi.org\/10.1109\/ICDAR.2007.4376991","DOI":"10.1109\/ICDAR.2007.4376991"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Tensmeyer, C., Davis, B., Wigington, C., Lee, I., Barrett, B.: Pagenet: page boundary extraction in historical handwritten documents. In: Proceedings of the 4th International Workshop on Historical Document Imaging and Processing, pp. 59\u201364 (2017)","DOI":"10.1145\/3151509.3151522"},{"key":"8_CR18","unstructured":"Wang, W., et al.: Cogvlm: visual expert for pretrained language models (2023)"},{"key":"8_CR19","doi-asserted-by":"publisher","unstructured":"van\u00a0der Wel, E., Ullrich, K.: Optical music recognition with convolutional sequence-to-sequence models. In: Proceedings of the 18th International Society for Music Information Retrieval Conference, pp. 731\u2013737. ISMIR (2018). https:\/\/doi.org\/10.5281\/zenodo.1415664","DOI":"10.5281\/zenodo.1415664"}],"container-title":["Lecture Notes in Computer Science","Progress in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73497-7_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T05:07:26Z","timestamp":1731647246000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73497-7_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,16]]},"ISBN":["9783031734960","9783031734977"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73497-7_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,16]]},"assertion":[{"value":"16 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare relevant to this article\u2019s content.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"EPIA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"EPIA Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Viana do Castelo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"epia2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/epia2024.pt","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}