{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T04:55:49Z","timestamp":1764996949089,"version":"3.40.3"},"reference-count":22,"publisher":"Elsevier BV","issue":"2","license":[{"start":{"date-parts":[[2013,4,1]],"date-time":"2013-04-01T00:00:00Z","timestamp":1364774400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Information Systems"],"published-print":{"date-parts":[[2013,4]]},"DOI":"10.1016\/j.is.2012.07.006","type":"journal-article","created":{"date-parts":[[2012,8,2]],"date-time":"2012-08-02T05:00:34Z","timestamp":1343883634000},"page":"183-197","source":"Crossref","is-referenced-by-count":8,"title":["Web-based closed-domain data extraction on online advertisements"],"prefix":"10.1016","volume":"38","author":[{"given":"Maria S.","family":"Pera","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rani","family":"Qumsiyeh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiu-Kai","family":"Ng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.is.2012.07.006_bib1","doi-asserted-by":"crossref","unstructured":"B. Allison, An improved hierarchical Bayesian model of language for document classification, in: Proceedings of COLING, 2008, pp. 25\u201332.","DOI":"10.3115\/1599081.1599085"},{"key":"10.1016\/j.is.2012.07.006_bib2","doi-asserted-by":"crossref","unstructured":"H. Chieu, H. Ng, Named entity recognition with a maximum entropy approach, in: Proceedings of Conference on Natural Language Learning, 2003, pp. 160\u2013163.","DOI":"10.3115\/1119176.1119199"},{"key":"10.1016\/j.is.2012.07.006_bib3","doi-asserted-by":"crossref","unstructured":"W. Cohen, Fast and effective rule induction, in: Proceedings of ICML, 1995, pp. 115\u2013123.","DOI":"10.1016\/B978-1-55860-377-6.50023-2"},{"key":"10.1016\/j.is.2012.07.006_bib4","doi-asserted-by":"crossref","unstructured":"E. Cortez, A. da Silva, M. Goncalves, E. de Moura, ONDUX: on-demand unsupervised learning for information extraction, in: Proceedings of SIGMOD, 2010, pp. 807\u2013818.","DOI":"10.1145\/1807167.1807254"},{"issue":"4","key":"10.1016\/j.is.2012.07.006_bib5","doi-asserted-by":"crossref","first-page":"219","DOI":"10.14778\/1938545.1938547","article-title":"Automatic wrappers for large scale web extraction","volume":"4","author":"Dalvi","year":"2011","journal-title":"VLDB Endowment"},{"key":"10.1016\/j.is.2012.07.006_bib6","unstructured":"M. Hall, E. Frank, Combining Naive Bayes and decision tables, in: Proceedings of Florida Artificial Intelligence Research Society Conference, 2008."},{"key":"10.1016\/j.is.2012.07.006_bib7","doi-asserted-by":"crossref","unstructured":"R. Khare, Y. An, An empirical study on using hidden Markov model for search interface segmentation, in: Proceedings of ACM CIKM, 2009, pp. 17\u201326.","DOI":"10.1145\/1645953.1645959"},{"key":"10.1016\/j.is.2012.07.006_bib8","doi-asserted-by":"crossref","unstructured":"Y. Liu, Y. Zheng, One-against-all multi-class SVM classification using reliability measures, in: Proceedings of IJCNN, 2005, pp. 849\u2013854.","DOI":"10.1109\/IJCNN.2005.1555963"},{"year":"2008","series-title":"Introduction to Information Retrieval","author":"Manning","key":"10.1016\/j.is.2012.07.006_bib9"},{"year":"2003","series-title":"Foundations of Statistical Natural Language Processing","author":"Manning","key":"10.1016\/j.is.2012.07.006_bib10"},{"key":"10.1016\/j.is.2012.07.006_bib11","doi-asserted-by":"crossref","unstructured":"G. Miao, J. Tatemura, W. Hsiung, A. Sawires, L. Moser, Extracting data records from the web using tag path clustering, in: Proceedings of WWW, 2009, pp. 981\u2013990.","DOI":"10.1145\/1526709.1526841"},{"issue":"1","key":"10.1016\/j.is.2012.07.006_bib12","doi-asserted-by":"crossref","first-page":"543","DOI":"10.1613\/jair.2409","article-title":"Creating relational data from unstructured and ungrammatical data sources","volume":"31","author":"Michelson","year":"2008","journal-title":"Journal of Artificial Intelligence Research"},{"year":"1997","series-title":"Machine Learning","author":"Mitchell","key":"10.1016\/j.is.2012.07.006_bib13"},{"key":"10.1016\/j.is.2012.07.006_bib14","doi-asserted-by":"crossref","unstructured":"H. Nguyen, E. Kang, J. Freire, Automatically extracting form labels, in: Proceedings of IEEE ICDE, 2008, pp. 1498\u20131500.","DOI":"10.1109\/ICDE.2008.4497602"},{"issue":"2\u20133","key":"10.1016\/j.is.2012.07.006_bib15","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1007\/s10994-008-5049-7","article-title":"Learning (k, l)-contextual tree languages for information extraction from web pages","volume":"71","author":"Raeymaekers","year":"2008","journal-title":"Machine Learning"},{"key":"10.1016\/j.is.2012.07.006_bib16","first-page":"325","article-title":"Use of Bayesian network in information extraction from unstructured data sources, World Academy of Science","volume":"52","author":"Rajput","year":"2009","journal-title":"Engineering and Technology (WASET)"},{"key":"10.1016\/j.is.2012.07.006_bib17","doi-asserted-by":"crossref","unstructured":"D. Sculley, G. Wachman, Relaxed SVMs online for spam filtering, in: Proceedings of ACM SIGIR, 2007, pp. 415\u2013422.","DOI":"10.1145\/1277741.1277813"},{"key":"10.1016\/j.is.2012.07.006_bib18","doi-asserted-by":"crossref","unstructured":"X. Song, J. Liu, Y. Cao, C.-Y. Lin, H.-W. Hon, Automatic extraction of web data records containing user-generated content, in: Proceedings of ACM CIKM, 2010, pp. 39\u201348.","DOI":"10.1145\/1871437.1871447"},{"key":"10.1016\/j.is.2012.07.006_bib19","doi-asserted-by":"crossref","unstructured":"B. Tang, D. Mazzoni, Multiclass reduced-set support vector machines, in: Proceedings of ICML, 2006, pp. 921\u2013928.","DOI":"10.1145\/1143844.1143960"},{"key":"10.1016\/j.is.2012.07.006_bib20","doi-asserted-by":"crossref","unstructured":"W. Xu, X. Liu, Y. Gong, Document clustering based on non-negative matrix factorization, in: Proceedings of ACM SIGIR, 2003, pp. 267\u2013273.","DOI":"10.1145\/860435.860485"},{"key":"10.1016\/j.is.2012.07.006_bib21","unstructured":"Y. Yang, J. Pedersen, A comparative study on feature selection in text categorization, in: Proceedings of ICML, 1997, pp. 412\u2013420."},{"key":"10.1016\/j.is.2012.07.006_bib22","first-page":"1583","article-title":"Dynamic hierarchical Markov random fields for integrated web data extraction","volume":"9","author":"Zhu","year":"2008","journal-title":"Machine Learning Research"}],"container-title":["Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0306437912001032?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0306437912001032?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T12:42:36Z","timestamp":1743943356000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0306437912001032"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,4]]},"references-count":22,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2013,4]]}},"alternative-id":["S0306437912001032"],"URL":"https:\/\/doi.org\/10.1016\/j.is.2012.07.006","relation":{},"ISSN":["0306-4379"],"issn-type":[{"type":"print","value":"0306-4379"}],"subject":[],"published":{"date-parts":[[2013,4]]}}}