{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T05:55:41Z","timestamp":1774936541921,"version":"3.50.1"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2019,7,22]],"date-time":"2019-07-22T00:00:00Z","timestamp":1563753600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,7,22]],"date-time":"2019-07-22T00:00:00Z","timestamp":1563753600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100007225","name":"Ministry of Science and Technology","doi-asserted-by":"publisher","award":["MOST105- 2628-E-008-004-MY2"],"award-info":[{"award-number":["MOST105- 2628-E-008-004-MY2"]}],"id":[{"id":"10.13039\/100007225","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2020,2]]},"DOI":"10.1007\/s10489-019-01499-0","type":"journal-article","created":{"date-parts":[[2019,7,22]],"date-time":"2019-07-22T08:03:56Z","timestamp":1563782636000},"page":"271-295","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["DCADE: divide and conquer alignment with dynamic encoding for full page data extraction"],"prefix":"10.1007","volume":"50","author":[{"given":"Oviliani Yenty","family":"Yuliana","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1101-6337","authenticated-orcid":false,"given":"Chia-Hui","family":"Chang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,7,22]]},"reference":[{"key":"1499_CR1","doi-asserted-by":"crossref","unstructured":"Arasu A, Garcia-Molina H (2003) Extracting structured data from web pages. In: Proceedings of the 2003 ACM SIGMOD international conference on Management of data, pp 337\u2013348","DOI":"10.1145\/872757.872799"},{"key":"1499_CR2","doi-asserted-by":"crossref","unstructured":"Bing L, Lam W, Wong TL (2013) Wikipedia entity expansion and attribute extraction from the web using semi-supervised learning. In: Proceedings of the sixth ACM international conference on Web search and data mining, pp 567\u2013576","DOI":"10.1145\/2433396.2433468"},{"issue":"10","key":"1499_CR3","first-page":"805","volume":"6","author":"M Bronzi","year":"2013","unstructured":"Bronzi M, Crescenzi V, Merialdo P, Papotti P (2013) Extraction and integration of partially overlapping web sources. VLDB J 6(10):805\u2013816","journal-title":"VLDB J"},{"key":"1499_CR4","doi-asserted-by":"crossref","unstructured":"Carlson A, Betteridge J, Wang RC, Hruschka R, Mitchell TM (2010) Coupled semi-supervised learning for information extraction. In: Proceedings of the third ACM international conference on Web search and data mining, pp 101\u2013110","DOI":"10.1145\/1718487.1718501"},{"issue":"10","key":"1499_CR5","doi-asserted-by":"publisher","first-page":"1411","DOI":"10.1109\/TKDE.2006.152","volume":"18","author":"CH Chang","year":"2006","unstructured":"Chang CH, Kayed M, Girgis MR, Shaalan KF (2006) A survey of web information extraction systems. IEEE Trans Knowl Data Eng 18(10):1411\u20131428","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"1499_CR6","doi-asserted-by":"crossref","unstructured":"Chang CH, Chen TS, Chen MC, Ding JL (2016) Efficient page-level data extraction via schema induction and verification. In: Proceedings of the Pacific-Asia conference on knowledge discovery and data mining, pp 478\u2013490","DOI":"10.1007\/978-3-319-31750-2_38"},{"key":"1499_CR7","doi-asserted-by":"crossref","unstructured":"Chu X, He Y, Chakrabarti K, Ganjam K (2015) Tegra: table extraction by global record alignment. In: Proceedings of the 2015 ACM SIGMOD international conference on management of data, pp 1713\u20131728","DOI":"10.1145\/2723372.2723725"},{"issue":"5","key":"1499_CR8","doi-asserted-by":"publisher","first-page":"731","DOI":"10.1145\/1017460.1017462","volume":"51","author":"V Crescenzi","year":"2005","unstructured":"Crescenzi V, Mecca G (2005) Automatic information extraction from large websites. Journal of the ACM (JACM) 51(5):731\u2013779","journal-title":"Journal of the ACM (JACM)"},{"key":"1499_CR9","doi-asserted-by":"crossref","unstructured":"Crescenzi V, Merialdo P, Alfred DQ (2013) Alfred: crowd assisted data extraction. In: Proceedings of the 22nd international conference on World Wide Web, pp 297\u2013300","DOI":"10.1145\/2487788.2487927"},{"key":"1499_CR10","doi-asserted-by":"crossref","unstructured":"Dhillon PS, Sellamanickam S, Selvaraj SK (2011) Semi-supervised multi-task learning of structured prediction models for web information extraction. In: Proceedings of the 20th ACM international conference on information and knowledge management, pp 957\u2013966","DOI":"10.1145\/2063576.2063713"},{"key":"1499_CR11","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1016\/j.knosys.2014.07.007","volume":"70","author":"E Ferrara","year":"2014","unstructured":"Ferrara E, De Meo P, Fiumara G, Baumgartner R (2014) Web data extraction, applications and techniques: a survey. Knowl-Based Syst 70:301\u2013323","journal-title":"Knowl-Based Syst"},{"issue":"1","key":"1499_CR12","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1007\/s00778-012-0286-6","volume":"22","author":"T Furche","year":"2013","unstructured":"Furche T, Gottlob G, Grasso G, Schallhart C, Sellers A (2013) OXPAth: a language for scalable data extraction, automation, and crawling on the deep web. The International Journal on Very Large Data Bases 22(1):47\u201372","journal-title":"The International Journal on Very Large Data Bases"},{"key":"1499_CR13","doi-asserted-by":"crossref","unstructured":"Gulhane P, Madaan A, Mehta R, Ramamirtham J, Rastogi R, Satpal S, Sengamedu SH, Tengli A, Tiwari C (2011) Web-scale information extraction with vertex. In: Proceedings of the IEEE 27th international conference on data engineering, pp 1209\u20131220","DOI":"10.1109\/ICDE.2011.5767842"},{"key":"1499_CR14","doi-asserted-by":"crossref","unstructured":"Gupta R, Sarawagi S (2011) Joint training for open-domain extraction on the web: exploiting overlap when supervision is limited. In: Proceedings of the fourth ACM international conference on Web search and data mining, pp 217\u2013226","DOI":"10.1145\/1935826.1935868"},{"key":"1499_CR15","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1016\/j.is.2016.05.003","volume":"66","author":"P Jim\u00e9nez","year":"2016","unstructured":"Jim\u00e9nez P, Corchuelo R (2016) On learning web information extraction rules with TANGO. Inf Syst J 66:74\u2013103","journal-title":"Inf Syst J"},{"issue":"2","key":"1499_CR16","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1109\/TKDE.2009.82","volume":"22","author":"M Kayed","year":"2010","unstructured":"Kayed M, Chang CH (2010) Fivatech: page-level web data extraction from template pages. IEEE Trans Knowl Data Eng 22(2):249\u2013263","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"3","key":"1499_CR17","doi-asserted-by":"publisher","first-page":"514","DOI":"10.1109\/TKDE.2011.175","volume":"25","author":"Y Lu","year":"2013","unstructured":"Lu Y, He H, Zhao H, Meng W, Yu C (2013) Annotating search results from web databases. IEEE Trans Knowl Data Eng 25(3):514\u2013527","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"3","key":"1499_CR18","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1016\/0022-2836(70)90057-4","volume":"48","author":"SB Needleman","year":"1970","unstructured":"Needleman SB, Wunsch CD (1970) A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol 48(3):443\u2013453","journal-title":"J Mol Biol"},{"key":"1499_CR19","doi-asserted-by":"crossref","unstructured":"Omari A, Kimelfeld B, Yahav E, Shoham S (2016) Lossless separation of web pages into layout code and data. In: Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining, pp 1805\u2013 1814","DOI":"10.1145\/2939672.2939858"},{"key":"1499_CR20","doi-asserted-by":"crossref","unstructured":"Omari A, Shoham S, Yahav E (2017) Synthesis of forgiving data extractors. In: Proceedings of the tenth ACM international conference on web search and data mining, pp 385\u2013 394","DOI":"10.1145\/3018661.3018740"},{"key":"1499_CR21","doi-asserted-by":"crossref","unstructured":"Ortona S, Orsi G, Furche T, Buoncristiano M (2016) Joint repairs for web wrappers. In: Proceedings of IEEE 32nd international conference on data engineering, pp 1146\u20131157","DOI":"10.1109\/ICDE.2016.7498320"},{"key":"1499_CR22","doi-asserted-by":"crossref","unstructured":"Qu J, Ouyang D, Hua W, Ye Y, Zhou X (2019) Discovering correlations between sparse features in distant supervision for relation extraction. In: Proceedings of the twelfth ACM international conference on web search and data mining, pp 726\u2013734","DOI":"10.1145\/3289600.3291004"},{"key":"1499_CR23","doi-asserted-by":"crossref","unstructured":"Ratner AJ, Bach SH, Ehrenberg HR, R\u00e9 C (2017) Snorkel: fast training set generation for information extraction. In: Proceedings of the 2017 ACM international conference on management of data, pp 1683\u20131686","DOI":"10.1145\/3035918.3056442"},{"key":"1499_CR24","doi-asserted-by":"publisher","first-page":"314","DOI":"10.1016\/j.knosys.2015.07.012","volume":"89","author":"S Shi","year":"2015","unstructured":"Shi S, Liu C, Shen Y, Yuan C, Huang Y (2015) AutoRM: an effective approach for automatic Web data record mining. Knowl-Based Syst 89:314\u2013331","journal-title":"Knowl-Based Syst"},{"key":"1499_CR25","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1016\/j.knosys.2012.10.009","volume":"39","author":"HA Sleiman","year":"2013","unstructured":"Sleiman HA, Corchuelo R (2013) Tex: an efficient and effective unsupervised web information extractor. Knowl-Based Syst 39:109\u2013123","journal-title":"Knowl-Based Syst"},{"issue":"6","key":"1499_CR26","doi-asserted-by":"publisher","first-page":"1544","DOI":"10.1109\/TKDE.2013.161","volume":"26","author":"HA Sleiman","year":"2014","unstructured":"Sleiman HA, Corchuelo R (2014) Trinity: on using trinary trees for unsupervised web data extraction. IEEE Trans Knowl Data Eng 26(6):1544\u20131556","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"1499_CR27","doi-asserted-by":"crossref","unstructured":"Song X, Liu J, Cao Y, Lin CY, Hon HW (2010) Automatic extraction of web data records containing user-generated content. In: Proceedings of the 19th ACM international conference on information and knowledge management, pp 39\u201348","DOI":"10.1145\/1871437.1871447"},{"issue":"7","key":"1499_CR28","doi-asserted-by":"publisher","first-page":"1186","DOI":"10.1109\/TKDE.2011.66","volume":"24","author":"W Su","year":"2012","unstructured":"Su W, Wang J, Lochovsky FH, Liu Y (2012) Combining tag and value similarity for data extraction and alignment. IEEE Trans Knowl Data Eng 24(7):1186\u20131200","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"1499_CR29","unstructured":"Tim F, Georg G, Giovanni G, Xiaonan G, Giorgio O, Christian S, Cheng W (2014) DIADEM: thousands of websites to a single database. In: Proceedings of the VLDB, vol 7, pp 1845\u2013 1856"},{"key":"1499_CR30","doi-asserted-by":"crossref","unstructured":"Xie X, Fang Y, Zhang Z, Li L (2012) Extracting data records from web using suffix tree. In: Proceedings of the ACM SIGKDD workshop on mining data semantics, p 12","DOI":"10.1145\/2350190.2350202"},{"issue":"11","key":"1499_CR31","doi-asserted-by":"publisher","first-page":"4355","DOI":"10.1007\/s10489-018-1208-0","volume":"48","author":"OY Yuliana","year":"2018","unstructured":"Yuliana OY, Chang CH (2018) A novel alignment algorithm for effective web data extraction from singleton-item pages. Appl Intell 48(11):4355\u20134370","journal-title":"Appl Intell"},{"issue":"12","key":"1499_CR32","doi-asserted-by":"publisher","first-page":"1614","DOI":"10.1109\/TKDE.2006.197","volume":"18","author":"Y Zhai","year":"2006","unstructured":"Zhai Y, Liu B (2006) Structured data extraction from the web based on partial tree alignment. IEEE Trans Knowl Data Eng 18(12):1614\u20131628","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"1499_CR33","doi-asserted-by":"crossref","unstructured":"Zhao C, Zhang R, Qi J (2018) Web page template and data separation for better maintainability. In: Proceedings of international conference on web information systems engineering, pp 439\u2013449","DOI":"10.1007\/978-3-030-02922-7_30"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-019-01499-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10489-019-01499-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-019-01499-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,7,20]],"date-time":"2020-07-20T23:35:15Z","timestamp":1595288115000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10489-019-01499-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,7,22]]},"references-count":33,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2020,2]]}},"alternative-id":["1499"],"URL":"https:\/\/doi.org\/10.1007\/s10489-019-01499-0","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,7,22]]},"assertion":[{"value":"22 July 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}