{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T13:31:19Z","timestamp":1760707879333},"reference-count":25,"publisher":"Springer Science and Business Media LLC","issue":"3-4","license":[{"start":{"date-parts":[[2007,10,16]],"date-time":"2007-10-16T00:00:00Z","timestamp":1192492800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IJDAR"],"published-print":{"date-parts":[[2007,12]]},"DOI":"10.1007\/s10032-007-0052-2","type":"journal-article","created":{"date-parts":[[2007,10,15]],"date-time":"2007-10-15T10:50:37Z","timestamp":1192445437000},"page":"211-226","source":"Crossref","is-referenced-by-count":18,"title":["Unsupervised information extraction from unstructured, ungrammatical data sources on the World Wide Web"],"prefix":"10.1007","volume":"10","author":[{"given":"Matthew","family":"Michelson","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Craig A.","family":"Knoblock","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2007,10,16]]},"reference":[{"key":"52_CR1","doi-asserted-by":"crossref","unstructured":"Agichtein, E., Ganti, V.: Mining reference tables for automatic text segmentation. In: Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 20-29. ACM, Baltimore (2004)","DOI":"10.1145\/1014052.1014058"},{"key":"52_CR2","doi-asserted-by":"crossref","unstructured":"Bilenko, M., Mooney, R.J.: Adaptive duplicate detection using learnable string similarity measures. In: Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 39\u201348. ACM, Baltimore (2003)","DOI":"10.1145\/956750.956759"},{"key":"52_CR3","doi-asserted-by":"crossref","unstructured":"Cafarella, M.J., Downey, D., Soderland, S., Etzioni, O.: KnowItNow: Fast, scalable information extraction from the web. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing, pp. 563-570. Association for Computational Linguistics, East Stroudsburg (2005)","DOI":"10.3115\/1220575.1220646"},{"key":"52_CR4","unstructured":"Carman, M.J., Knoblock, C.A.: Learning semantic descriptions of web information sources. In: Proceedings of the International Joint Conference on Artificial Intelligence, pp. 2695\u20132700 (2007)"},{"key":"52_CR5","unstructured":"Ciravegna, F.: Adaptive information extraction from text by rule induction and generalisation. In: Proceedings of the International Joint Conference on Artificial Intelligence, pp. 1251\u20131256 (2001)"},{"key":"52_CR6","unstructured":"Cohen, W., Ravikumar, P., Feinberg, S.: A comparison of string metrics for matching names and records. In: Proceedings of the ACM SIGKDD Workshop on Data Cleaning, Record Linkage, and Object Consolidation, pp. 13\u201318 (2003)"},{"key":"52_CR7","doi-asserted-by":"crossref","unstructured":"Cohen, W., Sarawagi, S.: Exploiting dictionaries in named entity extraction: combining semi-markov extraction processes and data integration methods. In: Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 89\u201398. ACM, Baltimore (2004)","DOI":"10.1145\/1014052.1014065"},{"key":"52_CR8","doi-asserted-by":"crossref","unstructured":"Craswell, N., Bailey, P., Hawking, D.: Server selection on the world wide web. In: Proceedings of the Conference on Digital Libraries, pp. 37\u201346. ACM, Baltimore (2000)","DOI":"10.1145\/336597.336628"},{"key":"52_CR9","doi-asserted-by":"crossref","unstructured":"Dill, S., Gibson, N., Gruhl, D., Guha, R., Jhingran, A., Kanungo,~T., Rajagopalan, S., Tomkins, A., Tomlin, J.A., Zien,~J.Y.: Semtag and seeker: Bootstrapping the semantic web via automated semantic annotation. In: Proceedings of the International World Wide Web Conference, pp. 178\u2013186. ACM, Baltimore (2003)","DOI":"10.1145\/775152.775178"},{"key":"52_CR10","doi-asserted-by":"crossref","unstructured":"Hassan, H., Hassan, A., Emam, O.: Unsupervised information extraction approach using graph mutual reinforcement. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing, pp. 501\u2013508. Association for Computational Linguistics, East Stroudsburg (2006)","DOI":"10.3115\/1610075.1610144"},{"key":"52_CR11","unstructured":"Kushmerick, N., Weld, D.S., Doorenbos, R.: Wrapper induction for information extraction. In: Proceedings of the International Joint Conference on Artificial Intelligence, pp. 729\u2013737 (1997)"},{"key":"52_CR12","unstructured":"Lerman, K., Plangrasopchok, A., Knoblock, C.A.: Automatically labeling the inputs and outputs of web services. In: Proceedings of the National Conference on Artificial Intelligence, pp. 1363\u20131368. AAAI, Charlotte (2006)"},{"key":"52_CR13","doi-asserted-by":"crossref","unstructured":"Levy, A.: Logic-based techniques in data integration. In: J.~Minker (ed.) Logic Based Artificial Intelligence, pp. 575\u2013595. Kluwer, Dordrecht (2000)","DOI":"10.1007\/978-1-4615-1567-8_24"},{"key":"52_CR14","unstructured":"Levy, A.Y., Rajaraman, A., Ordille, J.J.: Querying heterogeneous information sources using source descriptions. In: Proceedings of the International Conference on Very Large Data Bases, pp. 251\u2013262. Morgan Kaufmann, San Fransisco (1996)"},{"issue":"1","key":"52_CR15","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1109\/18.61115","volume":"37","author":"J. Lin","year":"1991","unstructured":"Lin J. (1991). Divergence measures based on the shannon entropy. IEEE Trans. Inf. Theory 37(1): 145\u2013151","journal-title":"IEEE Trans. Inf. Theory"},{"key":"52_CR16","unstructured":"McCallum, A.: Mallet: A machine learning for language toolkit http:\/\/mallet.cs.umass.edu (2002)"},{"key":"52_CR17","unstructured":"Michelson, M., Knoblock, C.A.: Semantic annotation of unstructured and ungrammatical text. In: Proceedings of the International Joint Conference on Artificial Intelligence, pp. 1091\u20131098 (2005)"},{"key":"52_CR18","unstructured":"Michelson, M., Knoblock, C.A.: An automatic approach to semantic annotation of unstructured, ungrammatical sources: A first look. In: Proceedings of the IJCAI Workshop on Analytics for Noisy Unstructured Text Data, pp. 123\u2013130 (2007)"},{"key":"52_CR19","unstructured":"Michelson, M., Knoblock, C.A.: Mining heterogeneous transformations for record linkage. In: Proceedings of the International Workshop on Information Integration on the Web, pp. 68\u201373. AAAI, Charlotte (2007)"},{"key":"52_CR20","doi-asserted-by":"crossref","unstructured":"Minton, S.N., Nanjo, C., Knoblock, C.A., Michalowski, M., Michelson, M.: A heterogeneous field matching method for record linkage. In: Proceedings of the IEEE International Conference on Data Mining, pp. 314\u2013321. IEEE Computer Society, Washington DC (2005)","DOI":"10.1109\/ICDM.2005.7"},{"key":"52_CR21","doi-asserted-by":"crossref","unstructured":"Pa\u015fca, M., Lin, D., Bigham, J., Lifchits, A., Jain, A.: Organizing and searching the world wide web of facts - step one: the one- million fact extraction challenge. In: Proceedings of the National Conference on Artificial Intelligence, pp. 1400\u20131405. AAAI, Charlotte (2006)","DOI":"10.1145\/1242572.1242587"},{"key":"52_CR22","doi-asserted-by":"crossref","unstructured":"Reeve, L., Han, H.: Survey of semantic annotation platforms. In: Proceedings of ACM Symposium on Applied Computing, pp. 1634\u20131638. ACM, Baltimore (2005)","DOI":"10.1145\/1066677.1067049"},{"key":"52_CR23","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1016\/0022-2836(81)90087-5","volume":"147","author":"T.F. Smith","year":"1981","unstructured":"Smith T.F., Waterman M.S. (1981). Identification of common molecular subsequences. J. Mol. Biol. 147: 195\u2013197","journal-title":"J. Mol. Biol."},{"issue":"3","key":"52_CR24","first-page":"330","volume":"14","author":"S. Thakkar","year":"2005","unstructured":"Thakkar S., Ambite J.L., Knoblock C.A. (2005). Composing, optimizing, and executing plans for bioinformatics web services. Int. J. Very Large Databases, Spec. Issue Data Manage. Anal. Mining Life Sci 14(3): 330\u2013353","journal-title":"Int. J. Very Large Databases, Spec. Issue Data Manage. Anal. Mining Life Sci"},{"key":"52_CR25","unstructured":"Winkler, W.E.: The state of record linkage and current research problems. Technical Report U.S. Census Bureau (1999)"}],"container-title":["International Journal of Document Analysis and Recognition (IJDAR)"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10032-007-0052-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10032-007-0052-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10032-007-0052-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,14]],"date-time":"2023-05-14T12:04:41Z","timestamp":1684065881000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10032-007-0052-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007,10,16]]},"references-count":25,"journal-issue":{"issue":"3-4","published-print":{"date-parts":[[2007,12]]}},"alternative-id":["52"],"URL":"https:\/\/doi.org\/10.1007\/s10032-007-0052-2","relation":{},"ISSN":["1433-2833","1433-2825"],"issn-type":[{"value":"1433-2833","type":"print"},{"value":"1433-2825","type":"electronic"}],"subject":[],"published":{"date-parts":[[2007,10,16]]}}}