{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:28:16Z","timestamp":1771950496625,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,1,15]],"date-time":"2022-01-15T00:00:00Z","timestamp":1642204800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,1,15]]},"DOI":"10.1145\/3523150.3523154","type":"proceedings-article","created":{"date-parts":[[2022,4,13]],"date-time":"2022-04-13T21:39:55Z","timestamp":1649885995000},"page":"20-25","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["A Deep Learning-Based System for Document Layout Analysis"],"prefix":"10.1145","author":[{"given":"Hong Tai","family":"Tran","sequence":"first","affiliation":[{"name":"Faculty of Computer Science &amp; Engineering, Ho Chi Minh City University of Technology (HCMUT), Vietnam"}]},{"given":"Nam Quan","family":"Nguyen","sequence":"additional","affiliation":[{"name":"Research Department, Cinnamon AI, Vietnam"}]},{"given":"Tuan Anh","family":"Tran","sequence":"additional","affiliation":[{"name":"Faculty of Computer Science &amp; Engineering, Ho Chi Minh City University of Technology (HCMUT), Vietnam"}]},{"given":"Xuan Toan","family":"Mai","sequence":"additional","affiliation":[{"name":"Faculty of Computer Science &amp; Engineering, Ho Chi Minh City University of Technology (HCMUT), Vietnam"}]},{"given":"Quoc Thang","family":"Nguyen","sequence":"additional","affiliation":[{"name":"Faculty of Computer Science &amp; Engineering, Ho Chi Minh City University of Technology (HCMUT), Vietnam"}]}],"member":"320","published-online":{"date-parts":[[2022,4,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Document layout analysis: A comprehensive survey ACM Computing Surveys (CSUR)","author":"Binmakhashen G. M.","unstructured":"G. M. Binmakhashen , S. A. Mahmoud . 2019. Document layout analysis: A comprehensive survey ACM Computing Surveys (CSUR) , vol. 52 , no. 6, pp. 1\u201336. https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3355610 G. M. Binmakhashen, S. A. Mahmoud. 2019. Document layout analysis: A comprehensive survey ACM Computing Surveys (CSUR), vol. 52, no. 6, pp. 1\u201336. https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3355610"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.1990.118223"},{"key":"e_1_3_2_1_3_1","first-page":"952","volume-title":"QC","author":"Ha J.","year":"1995","unstructured":"J. Ha , R. M. Haralick , I. T. Phillips . 1995 . Recursive X-Y cut using bounding boxes of connected components, in\u00a0Proceedings of ICDAR1995, Montreal , QC , Canada , pp. 952 - 955 . https:\/\/ieeexplore.ieee.org\/abstract\/document\/602059\/ J. Ha, R. M. Haralick, I. T. Phillips. 1995. Recursive X-Y cut using bounding boxes of connected components, in\u00a0Proceedings of ICDAR1995, Montreal, QC, Canada, pp. 952-955. https:\/\/ieeexplore.ieee.org\/abstract\/document\/602059\/"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/2.144436"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2005.185"},{"key":"e_1_3_2_1_6_1","first-page":"1015","volume-title":"Proceedings of 10 th ICDAR","author":"Mudit A.","unstructured":"A. Mudit and D. David . 2009. Voronoi++: A dynamic page segmentation approach based on Voronoi and Docstrum features , in Proceedings of 10 th ICDAR , pp. 1011\u2013 1015 , IEEE. https:\/\/ieeexplore.ieee.org\/abstract\/document\/5277532 A. Mudit and D. David. 2009. Voronoi++: A dynamic page segmentation approach based on Voronoi and Docstrum features, in Proceedings of 10 th ICDAR, pp. 1011\u2013 1015, IEEE. https:\/\/ieeexplore.ieee.org\/abstract\/document\/5277532"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1006\/cviu.1998.0684"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.584106"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815330.1815375"},{"issue":"3","key":"e_1_3_2_1_10_1","first-page":"118","article-title":"Document page segmentation using neuro-fuzzy approach","volume":"8","author":"Kise K.","year":"2006","unstructured":"K. Kise , A. Sato , M. Iwata . 2006 . Document page segmentation using neuro-fuzzy approach , Applied Soft Computing , vol. 8 , no. 3 , pp. 118 \u2013 126 . https:\/\/www.sciencedirect.com\/science\/article\/abs\/pii\/S1568494606001050 K. Kise, A. Sato, M. Iwata. 2006. Document page segmentation using neuro-fuzzy approach, Applied Soft Computing, vol. 8, no. 3, pp. 118\u2013126. https:\/\/www.sciencedirect.com\/science\/article\/abs\/pii\/S1568494606001050","journal-title":"Applied Soft Computing"},{"key":"e_1_3_2_1_11_1","first-page":"1607","volume-title":"Proceedings of TENCON 2010-2010 IEEE Region 10 Conference","author":"Yucun P.","unstructured":"P. Yucun , Z. Qunfei , and K. Seiichiro . 2010. Document layout analysis and reading order determination for a reading robot , in Proceedings of TENCON 2010-2010 IEEE Region 10 Conference , pp. 1607 - 1612 . https:\/\/ieeexplore.ieee.org\/abstract\/document\/5686038 P. Yucun, Z. Qunfei, and K. Seiichiro. 2010. Document layout analysis and reading order determination for a reading robot, in Proceedings of TENCON 2010-2010 IEEE Region 10 Conference, pp. 1607-1612. https:\/\/ieeexplore.ieee.org\/abstract\/document\/5686038"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.70837"},{"key":"e_1_3_2_1_13_1","first-page":"241","volume-title":"Proceedings of 10 th ICDAR","author":"S.","year":"2009","unstructured":"S. R. 2009 . Hybrid page layout analysis via tab-stop detection , in Proceedings of 10 th ICDAR , pp. 241 - 245 , IEEE, https:\/\/ieeexplore.ieee.org\/abstract\/document\/5277715 S. R. 2009. Hybrid page layout analysis via tab-stop detection, in Proceedings of 10 th ICDAR, pp. 241-245, IEEE, https:\/\/ieeexplore.ieee.org\/abstract\/document\/5277715"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of 12th ICDAR, 958-962","author":"C. K., Y.","year":"2013","unstructured":"C. K., Y. F., and L. C.-L. 2013 . Hybrid page segmentation with efficient whitespace rectangles extraction and grouping , in Proceedings of 12th ICDAR, 958-962 . https:\/\/ieeexplore.ieee.org\/abstract\/document\/6628759 C. K., Y. F., and L. C.-L. 2013. Hybrid page segmentation with efficient whitespace rectangles extraction and grouping, in Proceedings of 12th ICDAR, 958-962. https:\/\/ieeexplore.ieee.org\/abstract\/document\/6628759"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-016-0265-3"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2017.05.030"},{"issue":"1","key":"e_1_3_2_1_17_1","first-page":"1","article-title":"A comprehensive survey of mostly textual document segmentation algorithms since 2008","volume":"64","author":"S. E., P.","year":"2017","unstructured":"S. E., P. G.-K., and O. J.-M. 2017 . A comprehensive survey of mostly textual document segmentation algorithms since 2008 , Pattern Recognition , vol. 64 , no. 1 , pp. 1 \u2013 14 . https:\/\/www.sciencedirect.com\/science\/article\/abs\/pii\/S0031320316303399 S. E., P. G.-K., and O. J.-M. 2017. A comprehensive survey of mostly textual document segmentation algorithms since 2008, Pattern Recognition, vol. 64, no. 1, pp. 1\u201314. https:\/\/www.sciencedirect.com\/science\/article\/abs\/pii\/S0031320316303399","journal-title":"Pattern Recognition"},{"issue":"10","key":"e_1_3_2_1_18_1","article-title":"Separation of text and non-text in document layout analysis using a recursive filter","volume":"9","author":"Anh T. T.","year":"2015","unstructured":"T. T. Anh , N. In-Seop , and K. Soo-Hyung 2015 . Separation of text and non-text in document layout analysis using a recursive filter ., KSII Transactions on Internet & Information Systems , vol. 9 , no. 10 . http:\/\/itiis.org\/digital-library\/20920 T. T. Anh, N. In-Seop, and K. Soo-Hyung 2015. Separation of text and non-text in document layout analysis using a recursive filter., KSII Transactions on Internet & Information Systems, vol. 9, no. 10. http:\/\/itiis.org\/digital-library\/20920","journal-title":"KSII Transactions on Internet & Information Systems"},{"key":"e_1_3_2_1_19_1","volume-title":"Pattern recognition and image analysis","author":"Zkin A.","year":"1813","unstructured":"A. Vil\u00e2A Zkin , I. Safonov , M. Egorova . 2013. Algorithm for segmentation of documents based on texture features , Pattern recognition and image analysis , vol. 23 , 153\u2013159. https:\/\/link.springer.com\/article\/10.1134\/S105466 1813 010136 A. Vil\u00e2AZkin, I. Safonov, M. Egorova. 2013. Algorithm for segmentation of documents based on texture features, Pattern recognition and image analysis, vol. 23, 153\u2013159. https:\/\/link.springer.com\/article\/10.1134\/S1054661813010136"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-015-0753-z"},{"key":"e_1_3_2_1_21_1","unstructured":"K. Isaak P. Sergio S. Concetto P. Carmelo G. Daniela G. D and M. P. 2019. A saliency-based convolutional neural network for table and chart detection in digitized documents in Proceeding of ICIAP. https:\/\/arxiv.org\/abs\/1804.06236  K. Isaak P. Sergio S. Concetto P. Carmelo G. Daniela G. D and M. P. 2019. A saliency-based convolutional neural network for table and chart detection in digitized documents in Proceeding of ICIAP. https:\/\/arxiv.org\/abs\/1804.06236"},{"key":"e_1_3_2_1_22_1","first-page":"261","volume-title":"Proceeding of ICDAR","author":"He D.","unstructured":"D. He , S. Cohen , B. Price , D. Kifer , and C. L. Giles . 2017. Multi-scale multi-task FCN for semantic page segmentation and table detection , in Proceeding of ICDAR , pp. 254\u2013 261 . https:\/\/ieeexplore.ieee.org\/abstract\/document\/8269981\/ D. He, S. Cohen, B. Price, D. Kifer, and C. L. Giles. 2017. Multi-scale multi-task FCN for semantic page segmentation and table detection, in Proceeding of ICDAR, pp. 254\u2013261. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8269981\/"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence. https:\/\/arxiv.org\/abs\/1806","author":"Quiros L.","unstructured":"L. Quiros , Multi-task handwritten document layout analysis. 2018 . Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence. https:\/\/arxiv.org\/abs\/1806 .08852 L. Quiros, Multi-task handwritten document layout analysis. 2018. Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence. https:\/\/arxiv.org\/abs\/1806.08852"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-019-00332-1"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/147"},{"key":"e_1_3_2_1_26_1","volume-title":"Kuronet: Pre-modern Japanese kuzushiji character recognition with deep learning, arXiv preprint arXiv:1910.09433. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8978045","author":"Tarin C.","year":"2019","unstructured":"C. Tarin , L. Alex , and K. Asanobu . 2019 . Kuronet: Pre-modern Japanese kuzushiji character recognition with deep learning, arXiv preprint arXiv:1910.09433. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8978045 C. Tarin, L. Alex, and K. Asanobu. 2019. Kuronet: Pre-modern Japanese kuzushiji character recognition with deep learning, arXiv preprint arXiv:1910.09433. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8978045"},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of ICDAR, IEEE. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8977963","author":"Antonio Z. X. T. J.J.-Y.","year":"2019","unstructured":"Z. X. T. J.J.-Y. Antonio . 2019 . Publaynet: largest dataset ever for document layout analysis , in Proceedings of ICDAR, IEEE. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8977963 Z. X. T. J.J.-Y. Antonio. 2019. Publaynet: largest dataset ever for document layout analysis, in Proceedings of ICDAR, IEEE. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8977963"},{"key":"e_1_3_2_1_28_1","first-page":"778","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"Kaiming H.","unstructured":"H. Kaiming , Z. Xiangyu , R. Shaoqing , and S. Jian . 2016. Deep residual learning for image recognition , in Proceedings of the IEEE conference on computer vision and pattern recognition , pp. 770\u2013 778 . https:\/\/openaccess.thecvf.com\/content_cvpr_2016\/html\/He_Deep_Residual_Learning_CVPR_2016_paper.html H. Kaiming, Z. Xiangyu, R. Shaoqing, and S. Jian. 2016. Deep residual learning for image recognition, in Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778. https:\/\/openaccess.thecvf.com\/content_cvpr_2016\/html\/He_Deep_Residual_Learning_CVPR_2016_paper.html"},{"key":"e_1_3_2_1_29_1","volume-title":"A threshold selection method from gray-level histograms","author":"Nobuyuki Otsu","unstructured":"Otsu and Nobuyuki . 1979. A threshold selection method from gray-level histograms , IEEE transactions on systems, man, and cybernetics, vol. 9 , no. 1, pp. 62\u201366. https:\/\/ieeexplore.ieee.org\/document\/4310076 Otsu and Nobuyuki. 1979. A threshold selection method from gray-level histograms, IEEE transactions on systems, man, and cybernetics, vol. 9, no. 1, pp. 62\u201366. https:\/\/ieeexplore.ieee.org\/document\/4310076"},{"key":"e_1_3_2_1_30_1","unstructured":"A. Apostolos B. David P. Christos and P. Stefan. 2009. A realistic dataset for performance evaluation of document layout analysis in Proceeding of 10 th ICDAR pp. 296-300 IEEE. https:\/\/ieeexplore.ieee.org\/document\/5277696  A. Apostolos B. David P. Christos and P. Stefan. 2009. A realistic dataset for performance evaluation of document layout analysis in Proceeding of 10 th ICDAR pp. 296-300 IEEE. https:\/\/ieeexplore.ieee.org\/document\/5277696"},{"key":"e_1_3_2_1_31_1","volume-title":"ICDAR2019 competition on recognition of documents with complex layouts-RDCL2019","author":"Christian C.","unstructured":"C. Christian , A. Apostolos , and P. Stefan . 2019 . ICDAR2019 competition on recognition of documents with complex layouts-RDCL2019 , in Proceeding of ICDAR, 1521\u20131526. https:\/\/ieeexplore.ieee.org\/document\/8978185 C. Christian, A. Apostolos, and P. Stefan. 2019. ICDAR2019 competition on recognition of documents with complex layouts-RDCL2019, in Proceeding of ICDAR, 1521\u20131526. https:\/\/ieeexplore.ieee.org\/document\/8978185"},{"key":"e_1_3_2_1_32_1","first-page":"241","volume-title":"International Conference on Medical image computing and computer-assisted intervention","author":"Olaf R.","unstructured":"R. Olaf , F. Philipp , and B. Thomas . 2015. U-net: Convolutional networks for biomedical image segmentation , in International Conference on Medical image computing and computer-assisted intervention , pp. 234\u2013 241 . https:\/\/arxiv.org\/abs\/1505.04597 R. Olaf, F. Philipp, and B. Thomas. 2015. U-net: Convolutional networks for biomedical image segmentation, in International Conference on Medical image computing and computer-assisted intervention, pp. 234\u2013241. https:\/\/arxiv.org\/abs\/1505.04597"},{"key":"e_1_3_2_1_33_1","first-page":"8260","volume-title":"Proceeding of NIPS2019","author":"Hugo T.","unstructured":"T. Hugo , V. Andrea , D. Matthijs , and H. J\u00e9gou . 2019. Fixing the train-test resolution discrepancy , in Proceeding of NIPS2019 , pp. 8250\u2013 8260 . https:\/\/arxiv.org\/abs\/1906.06423 T. Hugo, V. Andrea, D. Matthijs, and H. J\u00e9gou. 2019. Fixing the train-test resolution discrepancy, in Proceeding of NIPS2019, pp. 8250\u20138260. https:\/\/arxiv.org\/abs\/1906.06423"},{"key":"e_1_3_2_1_34_1","volume-title":"Adam: A method for stochastic optimization, arXiv preprint arXiv:1412.6980. https:\/\/www.researchgate.net\/publication\/269935079_Adam_A_Method_for_Stochastic_Optimization","author":"Kingma D. P.","year":"2014","unstructured":"D. P. Kingma and J. Ba . 2014 . Adam: A method for stochastic optimization, arXiv preprint arXiv:1412.6980. https:\/\/www.researchgate.net\/publication\/269935079_Adam_A_Method_for_Stochastic_Optimization D. P. Kingma and J. Ba. 2014. Adam: A method for stochastic optimization, arXiv preprint arXiv:1412.6980. https:\/\/www.researchgate.net\/publication\/269935079_Adam_A_Method_for_Stochastic_Optimization"},{"key":"e_1_3_2_1_35_1","first-page":"1408","volume-title":"Proceedings of ICDAR2011","author":"Christian C.","year":"2011","unstructured":"C. Christian , P. Stefan , A. Apostolos . 2011 . Scenario driven in-depth performance evaluation of document layout analysis methods . In: Proceedings of ICDAR2011 , pp. 1404\u2013 1408 . https:\/\/ieeexplore.ieee.org\/document\/6065541 C. Christian, P. Stefan, A. Apostolos. 2011. Scenario driven in-depth performance evaluation of document layout analysis methods. In: Proceedings of ICDAR2011, pp. 1404\u20131408. https:\/\/ieeexplore.ieee.org\/document\/6065541"},{"key":"e_1_3_2_1_36_1","first-page":"48","volume-title":"Proceedings of ICDAR2011","author":"Christian C.","year":"2011","unstructured":"C. Christian , P. Stefan , A. Apostolos . 2011 . Aletheia \u2013 an Advanced Document Layout and Text Ground-Truthing System for Production Environments , in Proceedings of ICDAR2011 , pp. 48 - 52 . https:\/\/ieeexplore.ieee.org\/document\/6065274 C. Christian, P. Stefan, A. Apostolos. 2011. Aletheia \u2013 an Advanced Document Layout and Text Ground-Truthing System for Production Environments, in Proceedings of ICDAR2011, pp. 48-52. https:\/\/ieeexplore.ieee.org\/document\/6065274"},{"key":"e_1_3_2_1_37_1","unstructured":"VDA2020 dataset https:\/\/drive.google.com\/file\/d\/1jENyx_PMrquzbtZinKSWaLwSRUFZjYgq  VDA2020 dataset https:\/\/drive.google.com\/file\/d\/1jENyx_PMrquzbtZinKSWaLwSRUFZjYgq"},{"key":"e_1_3_2_1_38_1","first-page":"770","volume-title":"NV","author":"He K.","unstructured":"K. He , X. Zhang , S. Ren and J. Sun . 2016. Deep Residual Learning for Image Recognition,\u00a0in Proceeding of CVPR2016, Las Vegas , NV , pp. 770 - 778 . https:\/\/ieeexplore.ieee.org\/document\/7780459 K. He, X. Zhang, S. Ren and J. Sun. 2016. Deep Residual Learning for Image Recognition,\u00a0in Proceeding of CVPR2016, Las Vegas, NV, pp. 770-778. https:\/\/ieeexplore.ieee.org\/document\/7780459"},{"key":"e_1_3_2_1_39_1","volume-title":"In\u00a0Proceedings of the IEEE conference on computer vision and pattern recognition,\u00a0pp. 770-778","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J.\u00a02016. Deep residual learning for image recognition . In\u00a0Proceedings of the IEEE conference on computer vision and pattern recognition,\u00a0pp. 770-778 . https:\/\/ieeexplore.ieee.org\/document\/7780459 He, K., Zhang, X., Ren, S., & Sun, J.\u00a02016. Deep residual learning for image recognition. In\u00a0Proceedings of the IEEE conference on computer vision and pattern recognition,\u00a0pp. 770-778. https:\/\/ieeexplore.ieee.org\/document\/7780459"}],"event":{"name":"ICMLSC 2022: 2022 The 6th International Conference on Machine Learning and Soft Computing","location":"Haikou China","acronym":"ICMLSC 2022"},"container-title":["2022 The 6th International Conference on Machine Learning and Soft Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3523150.3523154","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3523150.3523154","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:30:43Z","timestamp":1750188643000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3523150.3523154"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,15]]},"references-count":39,"alternative-id":["10.1145\/3523150.3523154","10.1145\/3523150"],"URL":"https:\/\/doi.org\/10.1145\/3523150.3523154","relation":{},"subject":[],"published":{"date-parts":[[2022,1,15]]},"assertion":[{"value":"2022-04-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}