@inproceedings{71771c7b55c345cc89d03671b486154b,
title = "Distantly Supervised Named Entity Recognition with Category-Oriented Confidence Calibration",
abstract = "Named entity recognition plays an important role in extracting valuable information from digital libraries, which can help stakeholders to take full advantage of large quantities of documents to boost the development of scholarly knowledge discovery. Nevertheless, there aren{\textquoteright}t many annotated NER datasets aiming at scientific literature except medical domain, restricting to utilize abundant of advanced deep learning models. As an alternative solution, distant supervision provides a feasible way to eliminate the need of human annotations by automatically generating annotated datasets based on external resources such as knowledge base, while introducing noise inevitably. In this work, we study the noisy-labeled named entity recognition under distant supervision setting. Considering that most NER systems based on confidence estimation deal with noisy labels ignoring the fact that model has different levels of confidence towards different categories, we propose a Category-oriented confidence calibration (Coca) strategy with an automatically confidence threshold calculation module. We integrate our method into a teacher-student self-training framework to improve the model performance. Our proposed approach achieves promising performance among advanced baseline models and can be easily integrated into other confidence based model frameworks (Our code is publicly available at: https://github.com/possible1402/BOND_Coca ).",
keywords = "Digital library, Distant supervision, Named entity recognition, Pretrained language model, Self-training",
author = "Liangping Ding and Huang, {Tian Yuan} and Huan Liu and Yufei Wang and Zhixiong Zhang",
note = "Publisher Copyright: {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 24th International Conference on Asia-Pacific Digital Libraries, ICADL 2022 ; Conference date: 30-11-2022 Through 02-12-2022",
year = "2022",
doi = "10.1007/978-3-031-21756-2_4",
language = "English",
isbn = "9783031217555",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Nature",
pages = "46--55",
editor = "Yuen-Hsien Tseng and Marie Katsurai and Nguyen, {Hoa N.}",
booktitle = "From Born-Physical to Born-Virtual",
address = "United States",
}