{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T23:15:53Z","timestamp":1780355753043,"version":"3.54.1"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2023,8,25]],"date-time":"2023-08-25T00:00:00Z","timestamp":1692921600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,25]],"date-time":"2023-08-25T00:00:00Z","timestamp":1692921600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["IJDAR"],"published-print":{"date-parts":[[2024,3]]},"DOI":"10.1007\/s10032-023-00449-4","type":"journal-article","created":{"date-parts":[[2023,8,25]],"date-time":"2023-08-25T05:01:45Z","timestamp":1692939705000},"page":"97-109","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["A deep learning-based solution for digitization of invoice images with automatic invoice generation and labelling"],"prefix":"10.1007","volume":"27","author":[{"given":"Halil","family":"Arslan","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yunus Emre","family":"I\u015f\u0131k","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yasin","family":"G\u00f6rmez","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2023,8,25]]},"reference":[{"key":"449_CR1","doi-asserted-by":"crossref","unstructured":"Yu, W., Lu, N., Qi, X., Gong, P., Xiao, R.: PICK: processing key information extraction from documents using improved graph learning-convolutional networks. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 4363\u20134370. IEEE (2021)","DOI":"10.1109\/ICPR48806.2021.9412927"},{"key":"449_CR2","doi-asserted-by":"crossref","unstructured":"Rusinol, M., Benkhelfallah, T., Poulain\u00a0dAndecy, V.: Field extraction from administrative documents by incremental structural templates. In: 2013 12th International Conference on Document Analysis and Recognition, pp. 1100\u20131104. IEEE (2013)","DOI":"10.1109\/ICDAR.2013.223"},{"key":"449_CR3","doi-asserted-by":"publisher","first-page":"78398","DOI":"10.1109\/ACCESS.2022.3192828","volume":"10","author":"H Arslan","year":"2022","unstructured":"Arslan, H.: End to end invoice processing application based on key fields extraction. IEEE Access 10, 78398\u201378413 (2022)","journal-title":"IEEE Access"},{"key":"449_CR4","doi-asserted-by":"crossref","unstructured":"Singh, P., Varadarajan, S., Singh, A.N., Srivastava, M.M.: Multi-domain document layout understanding using few-shot object detection. In: International Conference on Image Analysis and Recognition, pp. 89\u201399. Springer (2020)","DOI":"10.1007\/978-3-030-50516-5_8"},{"key":"449_CR5","doi-asserted-by":"publisher","unstructured":"Rodriguez-Cruz, R.P., Avila-Garcia, M.S., Hernandez-Luquin, M.F.: Automatic generation of printed representations of ecuadorian electronic invoices through XML data binding. J. Adv. Inf. Technol. (JAIT) 7(4) (2016). https:\/\/doi.org\/10.12720\/jait.7.4.271-275","DOI":"10.12720\/jait.7.4.271-275"},{"key":"449_CR6","unstructured":"Nishanth, A.: Dynamic invoicing from HTML templates using make (2022). https:\/\/pdf4me.com\/blog\/dynamic-invoicing-from-html-templates-using-integromat\/"},{"key":"449_CR7","doi-asserted-by":"crossref","unstructured":"Castrejon, L., Kundu, K., Urtasun, R., Fidler, S.: Annotating object instances with a polygon-rnn. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5230\u20135238 (2017)","DOI":"10.1109\/CVPR.2017.477"},{"key":"449_CR8","doi-asserted-by":"crossref","unstructured":"Acuna, D., Ling, H., Kar, A., Fidler, S.: Efficient interactive annotation of segmentation datasets with polygon-rnn++. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 859\u2013868 (2018)","DOI":"10.1109\/CVPR.2018.00096"},{"key":"449_CR9","doi-asserted-by":"crossref","unstructured":"Ling, H., Gao, J., Kar, A., Chen, W., Fidler, S.: Fast interactive object annotation with curve-gcn. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5257\u20135266 (2019)","DOI":"10.1109\/CVPR.2019.00540"},{"issue":"4","key":"449_CR10","doi-asserted-by":"publisher","first-page":"0250093","DOI":"10.1371\/journal.pone.0250093","volume":"16","author":"F Englbrecht","year":"2021","unstructured":"Englbrecht, F., Ruider, I.E., Bausch, A.R.: Automatic image annotation for fluorescent cell nuclei segmentation. PLoS ONE 16(4), 0250093 (2021)","journal-title":"PLoS ONE"},{"key":"449_CR11","doi-asserted-by":"crossref","unstructured":"Adhikari, B., Peltomaki, J., Puura, J., Huttunen, H.: Faster bounding box annotation for object detection in indoor scenes. In: 2018 7th European Workshop on Visual Information Processing (EUVIP), pp. 1\u20136. IEEE (2018)","DOI":"10.1109\/EUVIP.2018.8611732"},{"key":"449_CR12","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1016\/j.neucom.2022.02.012","volume":"484","author":"X Zhang","year":"2022","unstructured":"Zhang, X., Zhao, C., Luo, H., Zhao, W., Zhong, S., Tang, L., Peng, J., Fan, J.: Automatic learning for object detection. Neurocomputing 484, 260\u2013272 (2022)","journal-title":"Neurocomputing"},{"key":"449_CR13","doi-asserted-by":"crossref","unstructured":"Zhong, X., Tang, J., Yepes, A.J.: Publaynet: largest dataset ever for document layout analysis. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1015\u20131022. IEEE (2019)","DOI":"10.1109\/ICDAR.2019.00166"},{"issue":"7","key":"449_CR14","doi-asserted-by":"publisher","first-page":"78","DOI":"10.3390\/data6070078","volume":"6","author":"D Baviskar","year":"2021","unstructured":"Baviskar, D., Ahirrao, S., Kotecha, K.: Multi-layout invoice document dataset (MIDD): a dataset for named entity recognition. Data 6(7), 78 (2021)","journal-title":"Data"},{"key":"449_CR15","doi-asserted-by":"publisher","first-page":"101494","DOI":"10.1109\/ACCESS.2021.3096739","volume":"9","author":"D Baviskar","year":"2021","unstructured":"Baviskar, D., Ahirrao, S., Kotecha, K.: Multi-layout unstructured invoice documents dataset: a dataset for template-free invoice processing and its evaluation using AI approaches. IEEE Access 9, 101494\u2013101512 (2021)","journal-title":"IEEE Access"},{"key":"449_CR16","unstructured":"Kawaguchi, K., Kaelbling, L.P., Bengio, Y.: Generalization in deep learning. arXiv:1710.05468 (2017)"},{"issue":"2","key":"449_CR17","doi-asserted-by":"publisher","first-page":"125","DOI":"10.3390\/info11020125","volume":"11","author":"A Buslaev","year":"2020","unstructured":"Buslaev, A., Iglovikov, V.I., Khvedchenya, E., Parinov, A., Druzhinin, M., Kalinin, A.A.: Albumentations: fast and flexible image augmentations. Information 11(2), 125 (2020)","journal-title":"Information"},{"key":"449_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106622","volume":"212","author":"X He","year":"2021","unstructured":"He, X., Zhao, K., Chu, X.: AutoML: a survey of the state-of-the-art. Knowl.-Based Syst. 212, 106622 (2021)","journal-title":"Knowl.-Based Syst."},{"key":"449_CR19","doi-asserted-by":"publisher","unstructured":"Khalifa, N.E., Loey, M., Mirjalili, S.: A comprehensive survey of recent trends in deep learning for digital images augmentation. Artif. Intell. Rev. 55, 2351\u20132377 (2022). https:\/\/doi.org\/10.1007\/s10462-021-10066-4","DOI":"10.1007\/s10462-021-10066-4"},{"key":"449_CR20","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 580\u2013587 (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"449_CR21","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"449_CR22","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. Adv. Neural Inf. Process. Syst. (NIPS), 28 (2015)"},{"key":"449_CR23","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"issue":"5","key":"449_CR24","doi-asserted-by":"publisher","first-page":"1483","DOI":"10.1109\/TPAMI.2019.2956516","volume":"43","author":"Z Cai","year":"2019","unstructured":"Cai, Z., Vasconcelos, N.: Cascade R-CNN: high quality object detection and instance segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 43(5), 1483\u20131498 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"449_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2022.103514","volume":"126","author":"SSA Zaidi","year":"2022","unstructured":"Zaidi, S.S.A., Ansari, M.S., Aslam, A., Kanwal, N., Asghar, M., Lee, B.: A survey of modern deep learning based object detection models. Digit. Signal Process. 126, 103514 (2022)","journal-title":"Digit. Signal Process."},{"key":"449_CR26","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S.K., Girshick, R.B., Farhadi, A.: You only look once: unified, real-time object detection. CoRR arXiv:1506.02640 (2015)","DOI":"10.1109\/CVPR.2016.91"},{"key":"449_CR27","unstructured":"D. Thuan.: Evolution of YOLO algorithm and YOLOv5: the state-of-the-art object detection algorithm (2021)"},{"key":"449_CR28","doi-asserted-by":"publisher","first-page":"1066","DOI":"10.1016\/j.procs.2022.01.135","volume":"199","author":"P Jiang","year":"2022","unstructured":"Jiang, P., Ergu, D., Liu, F., Cai, Y., Ma, B.: A review of Yolo algorithm developments. Procedia Comput. Sci. 199, 1066\u20131073 (2022). https:\/\/doi.org\/10.1016\/j.procs.2022.01.135","journal-title":"Procedia Comput. Sci."},{"key":"449_CR29","unstructured":"Redmon, J., Farhadi, A.: Yolov3: an incremental improvement. CoRR arXiv:1804.02767 (2018)"},{"key":"449_CR30","unstructured":"Bochkovskiy, A., Wang, C., Liao, H.M.: Yolov4: optimal speed and accuracy of object detection. CoRR arXiv:2004.10934 (2020)"},{"key":"449_CR31","doi-asserted-by":"publisher","unstructured":"Jocher, G.: YOLOv5 by ultralytics. https:\/\/doi.org\/10.5281\/zenodo.3908559. https:\/\/github.com\/ultralytics\/yolov5","DOI":"10.5281\/zenodo.3908559"},{"key":"449_CR32","doi-asserted-by":"crossref","unstructured":"Lin, T., Goyal, P., Girshick, R.B., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. CoRR arXiv:1708.02002 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"449_CR33","first-page":"355","volume-title":"Computer Vision\u2014ECCV 2020","author":"K Kim","year":"2020","unstructured":"Kim, K., Lee, H.S.: Probabilistic anchor assignment with iou prediction for object detection. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) Computer Vision\u2014ECCV 2020, pp. 355\u2013371. Springer, Cham (2020)"},{"key":"449_CR34","doi-asserted-by":"crossref","unstructured":"Feng, C., Zhong, Y., Gao, Y., Scott, M.R., Huang, W.: TOOD: task-aligned one-stage object detection. CoRR arXiv:2108.07755 (2021)","DOI":"10.1109\/ICCV48922.2021.00349"},{"key":"449_CR35","unstructured":"Jung, A.B.: Imgaug. https:\/\/github.com\/aleju\/imgaug. Accessed 30 Oct 2018 (2018)"},{"key":"449_CR36","doi-asserted-by":"crossref","unstructured":"Guo, Y., Shi, H., Kumar, A., Grauman, K., Rosing, T., Feris, R.S.: Spottune: transfer learning through adaptive fine-tuning. CoRR arXiv:1811.08737 (2018)","DOI":"10.1109\/CVPR.2019.00494"}],"container-title":["International Journal on Document Analysis and Recognition (IJDAR)"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10032-023-00449-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10032-023-00449-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10032-023-00449-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,15]],"date-time":"2024-02-15T04:34:14Z","timestamp":1707971654000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10032-023-00449-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,25]]},"references-count":36,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2024,3]]}},"alternative-id":["449"],"URL":"https:\/\/doi.org\/10.1007\/s10032-023-00449-4","relation":{},"ISSN":["1433-2833","1433-2825"],"issn-type":[{"value":"1433-2833","type":"print"},{"value":"1433-2825","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,25]]},"assertion":[{"value":"27 March 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 August 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 August 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}