Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KBQA extension #1598

Merged
merged 39 commits into from
May 13, 2023
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
1a59449
Release 1.0.0
IgnatovFedor Nov 8, 2022
f031581
full version of russian kbqa
dmitrijeuseew Nov 8, 2022
06392df
update
dmitrijeuseew Nov 17, 2022
d8d511a
update english kbqa version
dmitrijeuseew Nov 24, 2022
eff34f7
update
dmitrijeuseew Dec 1, 2022
1bd600a
update docs and configs
dmitrijeuseew Jan 5, 2023
00e0dda
update docs
dmitrijeuseew Jan 5, 2023
babc29b
refactor
dmitrijeuseew Jan 5, 2023
23179ea
Merge branch 'dev' into feat/kbqa_full
dmitrijeuseew Jan 7, 2023
6f6e66e
fix docs
dmitrijeuseew Jan 7, 2023
1d7de40
fix requirements
dmitrijeuseew Jan 7, 2023
ce02ab7
refactor kbqa_cq_en and kbqa_cq_ru
dmitrijeuseew Feb 7, 2023
4b9f52c
remove specific configs for datasets
dmitrijeuseew Feb 7, 2023
cd7cf14
Merge branch 'dev' into feat/kbqa_full
dmitrijeuseew Feb 18, 2023
1b44275
refactor
dmitrijeuseew Mar 21, 2023
ae76866
refactor
dmitrijeuseew Mar 21, 2023
94dad22
specify error type in tree_to_sparql
dmitrijeuseew Mar 21, 2023
e33c47c
refactor
dmitrijeuseew Mar 21, 2023
bf57297
refactor
dmitrijeuseew Mar 21, 2023
ee49d9a
refactor
dmitrijeuseew Mar 21, 2023
d8c63c8
change download paths in configs
dmitrijeuseew Mar 22, 2023
ab62448
remove unnecessary parameter
dmitrijeuseew Mar 22, 2023
ca6bfb1
remove unnecessary parameter
dmitrijeuseew Mar 22, 2023
b4499c8
update tests
dmitrijeuseew Mar 23, 2023
d69f006
Merge branch 'dev' into feat/kbqa_full
dmitrijeuseew Mar 23, 2023
d07ee3e
add labels parameter
dmitrijeuseew Mar 30, 2023
f96c3c9
fix docs
dmitrijeuseew Mar 30, 2023
5176f3d
refactor: minor changes
IgnatovFedor Apr 3, 2023
0cdbf5e
remove lang parameter from tree_to_sparql
dmitrijeuseew Apr 3, 2023
4abd098
remove: en_core_web_sm from slovnet_syntax_parser tree_to_sparql requ…
IgnatovFedor Apr 5, 2023
8abbf02
fix: docs build by adding razdel to mock imports list
IgnatovFedor Apr 5, 2023
a63b169
docs: removed deeppavlov.configs usage from entity extraction examples
IgnatovFedor Apr 5, 2023
afbdbf3
refactor: minor changes
IgnatovFedor Apr 5, 2023
c089fa4
refactor
dmitrijeuseew Apr 9, 2023
3eede30
refactor
dmitrijeuseew Apr 9, 2023
6d354a4
fixes
dmitrijeuseew Apr 11, 2023
5ef47eb
minor changes
zucchini-nlp Apr 18, 2023
a2fbfc4
fix: tests
IgnatovFedor May 11, 2023
93ff27d
from pymorphy tag to spacy, fix paths in configs
dmitrijeuseew May 12, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@
{
"class_name": "ner_chunker",
"batch_size": 16,
"max_chunk_len" : 180,
"max_seq_len" : 300,
"vocab_file": "{TRANSFORMER}",
"in": ["x"],
"out": ["x_chunk", "chunk_nums", "chunk_sentences_offsets", "chunk_sentences"]
},
{
"thres_proba": 0.05,
"thres_proba": 0.6,
"o_tag": "O",
"tags_file": "{NER_PATH}/tag.dict",
"return_entities_with_tags": true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
{
"class_name": "ner_chunker",
"batch_size": 16,
"max_chunk_len" : 180,
"max_seq_len" : 300,
"vocab_file": "{TRANSFORMER}",
"in": ["x"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
},
{
"config_path": "{CONFIGS_PATH}/entity_extraction/entity_linking_en.json",
"in": ["entity_substr", "tags", "sentences", "entity_offsets", "sentences_offsets"],
"out": ["entity_ids", "entity_conf", "entity_pages"]
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"]
}
],
"out": ["entity_substr", "tags", "entity_offsets", "entity_ids", "entity_conf", "entity_pages"]
"out": ["entity_substr", "tags", "entity_offsets", "entity_ids", "entity_conf", "entity_pages", "entity_labels"]
},
"metadata": {
"variables": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
},
{
"config_path": "{CONFIGS_PATH}/entity_extraction/entity_linking_ru.json",
"in": ["entity_substr", "tags", "sentences", "entity_offsets", "sentences_offsets"],
"out": ["entity_ids", "entity_conf", "entity_pages"]
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"]
}
],
"out": ["entity_substr", "tags", "entity_offsets", "entity_ids", "entity_conf", "entity_pages"]
"out": ["entity_substr", "tags", "entity_offsets", "entity_ids", "entity_conf", "entity_pages", "entity_labels"]
},
"metadata": {
"variables": {
Expand Down
18 changes: 7 additions & 11 deletions deeppavlov/configs/entity_extraction/entity_linking_en.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"chainer": {
"in": ["entity_substr", "tags", "sentences", "entity_offsets", "sentences_offsets"],
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
"pipe": [
{
"class_name": "torch_transformers_entity_ranker_infer",
Expand All @@ -14,10 +14,10 @@
},
{
"class_name": "entity_linker",
"in": ["entity_substr", "tags", "sentences", "entity_offsets", "sentences_offsets"],
"out": ["entity_ids", "entity_conf", "entity_pages"],
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"],
"load_path": "{DOWNLOADS_PATH}/entity_linking_eng",
"entities_database_filename": "el_eng.db",
"entities_database_filename": "el_eng_v2.db",
"entity_ranker": "#entity_descr_ranking",
"rank_in_runtime": true,
"num_entities_for_bert_ranking": 20,
Expand All @@ -31,10 +31,10 @@
"use_tags": true,
"full_paragraph": true,
"return_confidences": true,
"lang": "ru"
"lang": "en"
}
],
"out": ["entity_ids", "entity_conf", "entity_pages"]
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"]
},
"metadata": {
"variables": {
Expand All @@ -45,16 +45,12 @@
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/entity_linking/el_db_eng.tar.gz",
"url": "http://files.deeppavlov.ai/kbqa/downloads/el_db_eng_v2.tar.gz",
"subdir": "{DOWNLOADS_PATH}/entity_linking_eng"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/entity_linking/el_ranker_eng.tar.gz",
"subdir": "{MODELS_PATH}/entity_linking_eng"
},
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/wikidata_lite.tar.gz",
"subdir": "{DOWNLOADS_PATH}/wikidata"
}
]
}
Expand Down
25 changes: 17 additions & 8 deletions deeppavlov/configs/entity_extraction/entity_linking_ru.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"chainer": {
"in": ["entity_substr", "tags", "sentences", "entity_offsets", "sentences_offsets"],
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
"pipe": [
{
"class_name": "torch_transformers_entity_ranker_infer",
Expand All @@ -14,27 +14,36 @@
},
{
"class_name": "entity_linker",
"in": ["entity_substr", "tags", "sentences", "entity_offsets", "sentences_offsets"],
"out": ["entity_ids", "entity_conf", "entity_pages"],
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"],
"load_path": "{DOWNLOADS_PATH}/entity_linking_rus",
"entities_database_filename": "el_rus.db",
"entities_database_filename": "el_rus_v2.db",
"words_dict_filename": "{DOWNLOADS_PATH}/entity_linking_rus/words_dict.pickle",
"ngrams_matrix_filename": "{DOWNLOADS_PATH}/entity_linking_rus/ngrams_matrix.npz",
"entity_ranker": "#entity_descr_ranking",
"rank_in_runtime": true,
"num_entities_for_bert_ranking": 20,
"num_entities_for_bert_ranking": 30,
"use_gpu": false,
"include_mention": false,
"num_entities_to_return": 3,
"lemmatize": true,
"use_descriptions": true,
"use_connections": true,
"use_tags": true,
"wikidata_file": "{DOWNLOADS_PATH}/wikidata/wikidata_lite.hdt",
"kb_filename": "{DOWNLOADS_PATH}/wikidata/wikidata_lite.hdt",
"prefixes": {"entity": ["http://we"],
"rels": {"direct": "http://wpd",
"no_type": "http://wp",
"statement": "http://wps",
"qualifier": "http://wpq"
}
},
"full_paragraph": true,
"return_confidences": true,
"lang": "ru"
}
],
"out": ["entity_ids", "entity_conf", "entity_pages"]
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"]
},
"metadata": {
"variables": {
Expand All @@ -45,7 +54,7 @@
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/entity_linking/el_db_rus.tar.gz",
"url": "http://files.deeppavlov.ai/kbqa/downloads/el_files_rus_v2.tar.gz",
"subdir": "{DOWNLOADS_PATH}/entity_linking_rus"
},
{
Expand Down
136 changes: 118 additions & 18 deletions deeppavlov/configs/kbqa/kbqa_cq_en.json
Original file line number Diff line number Diff line change
@@ -1,17 +1,56 @@
{
"dataset_reader": {
"class_name": "lcquad_reader",
"question_types": ["statement_property", "right-subgraph", "simple question left",
"simple question right", "left-subgraph", "rank"],
"num_samples": 100,
"data_path": "{DOWNLOADS_PATH}/lcquad/lcquad2.json"
},
"dataset_iterator": {
"class_name": "data_learning_iterator"
},
"chainer": {
"in": ["x"],
"in_y": ["y"],
"in_y": ["gold_answer_ids", "gold_answer_labels", "gold_query"],
"pipe": [
{
"class_name": "question_sign_checker",
"in": ["x"],
"out": ["x_punct"]
},
{
"config_path": "{CONFIGS_PATH}/entity_extraction/entity_detection_en.json",
"config_path": "{CONFIGS_PATH}/classifiers/query_pr.json",
"in": ["x_punct"],
"out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
"out": ["template_type"]
},
{
"class_name": "query_formatter",
"query_info": {"unk_var": "?answer", "mid_var": "?ent"},
"in": ["gold_query"],
"out": ["f_gold_query"]
},
{
"config_path": "{CONFIGS_PATH}/entity_extraction/entity_detection_en.json",
"overwrite": {
"chainer.pipe.1.make_tags_from_probas": true,
"chainer.pipe.2.ner": {
"config_path": "{CONFIGS_PATH}/ner/ner_ontonotes_bert.json",
"overwrite": {
"chainer.out": ["x_tokens", "tokens_offsets", "y_pred", "probas"],
"chainer.pipe.2.use_crf": false,
"metadata.variables.TRANSFORMER": "distilbert-base-cased",
"metadata.variables.MODEL_PATH": "{MODELS_PATH}/entity_type_detection_distilbert_lcquad2.0"
}
},
"metadata.variables.NER_PATH": "{MODELS_PATH}/entity_type_detection_distilbert_lcquad2.0"
},
"in": ["x_punct", "template_type"],
"out": ["entity_type_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
},
{
"class_name": "entity_type_split",
"in": ["entity_type_substr", "tags"],
"out": ["entity_substr", "entity_tags", "type_substr"]
},
{
"class_name": "answer_types_extractor",
Expand All @@ -22,13 +61,31 @@
"out": ["answer_types", "f_entity_substr", "f_tags"]
},
{
"config_path": "{CONFIGS_PATH}/entity_extraction/entity_linking_en.json",
"class_name": "entity_linker",
"load_path": "{DOWNLOADS_PATH}/entity_linking_eng",
"entities_database_filename": "el_db_lcquad2.db",
"num_entities_to_return": 7,
"lemmatize": true,
"use_descriptions": false,
"use_connections": false,
"use_tags": true,
"alias_coef": 1.0,
"wikidata_file": "{DOWNLOADS_PATH}/wikidata/wikidata_full.hdt",
"prefixes": {"entity": ["http://we"],
"rels": {"direct": "http://wpd",
"no_type": "http://wp",
"statement": "http://wps",
"qualifier": "http://wpq"
}
},
"return_confidences": true,
"lang": "en",
"id": "entity_linker"
},
{
"class_name": "wiki_parser",
"id": "wiki_p",
"wiki_filename": "{DOWNLOADS_PATH}/wikidata/wikidata_lite.hdt",
"wiki_filename": "{DOWNLOADS_PATH}/wikidata/wikidata_full.hdt",
"lang": "@en"
},
{
Expand All @@ -38,19 +95,14 @@
"load_path": "{DOWNLOADS_PATH}/wikidata_eng",
"templates_filename": "templates_eng.json"
},
{
"config_path": "{CONFIGS_PATH}/classifiers/query_pr.json",
"in": ["x_punct"],
"out": ["template_type"]
},
{
"class_name": "rel_ranking_infer",
"id": "rel_r_inf",
"ranker": {"config_path": "{CONFIGS_PATH}/ranking/rel_ranking_bert_en.json"},
"ranker": {"config_path": "{CONFIGS_PATH}/ranking/rel_ranking_roberta_en.json",
"overwrite": {"chainer.out": ["y_pred_probas"]}
},
"wiki_parser": "#wiki_p",
"batch_size": 32,
"return_all_possible_answers": true,
"return_answer_ids": false,
"rank_answers": true,
"load_path": "{DOWNLOADS_PATH}/wikidata_eng",
"rel_q2name_filename": "wiki_dict_properties_eng.pickle"
Expand All @@ -65,24 +117,72 @@
"load_path": "{DOWNLOADS_PATH}/wikidata",
"rank_rels_filename_1": "rels_0.txt",
"rank_rels_filename_2": "rels_1.txt",
"sparql_queries_filename": "{DOWNLOADS_PATH}/wikidata/sparql_queries.json",
"sparql_queries_filename": "{DOWNLOADS_PATH}/wikidata/sparql_queries_eng.json",
"entities_to_leave": 5,
"rels_to_leave": 10,
"in": ["x_punct", "x_punct", "template_type", "f_entity_substr", "f_tags", "answer_types"],
"out": ["answers"]
"return_answers": false,
"map_query_str_to_kb": [["P0", "http://wd"], ["P00", "http://wl"], ["wd:", "http://we/"], ["wdt:", "http://wpd/"],
[" p:", " http://wp/"], ["ps:", "http://wps/"], ["pq:", "http://wpq/"]],
"kb_prefixes": {"entity": "wd:E", "rel": "wdt:R", "type": "wd:T", "type_rel": "wdt:P", "type_rels": ["P31", "P279"]},
"gold_query_info": {"unk_var": "?answer", "mid_var": "?ent"},
"in": ["x_punct", "x_punct", "template_type", "entity_substr", "type_substr", "entity_tags", "probas", "answer_types"],
"out": ["cand_answers", "template_answers"]
},
{
"class_name": "rel_ranking_infer",
"ranker": {"config_path": "{CONFIGS_PATH}/ranking/path_ranking_nll_roberta_en.json",
"overwrite": {"chainer.pipe.1.return_probas": true}
},
"wiki_parser": "#wiki_p",
"batch_size": 32,
"nll_path_ranking": true,
"return_elements": ["answer_ids", "queries"],
"rank_answers": true,
"load_path": "{DOWNLOADS_PATH}/wikidata_eng",
"rel_q2name_filename": "wiki_dict_properties_eng.pickle",
"in": ["x_punct", "template_type", "cand_answers", "entity_substr", "template_answers"],
"out": ["answers", "answer_ids", "query"]
}
],
"out": ["answers", "answer_ids", "query"]
},
"train": {
"evaluation_targets": ["test"],
"batch_size": 1,
"metrics": [
{
"name": "kbqa_accuracy",
"inputs": ["x", "answers", "answer_ids", "query", "gold_answer_labels", "gold_answer_ids", "f_gold_query"]
}
],
"out": ["answers"]
"class_name": "nn_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
},
"download": [
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/queries_and_rels.tar.gz",
"url": "http://files.deeppavlov.ai/kbqa/datasets/lcquad2.tar.gz",
"subdir": "{DOWNLOADS_PATH}/lcquad"
},
{
"url": "http://files.deeppavlov.ai/kbqa/models/entity_type_detection_distilbert_lcquad2.0.tar.gz",
"subdir": "{MODELS_PATH}/entity_type_detection_distilbert_lcquad2.0"
},
{
"url": "http://files.deeppavlov.ai/kbqa/downloads/queries_and_rels_lcquad2.tar.gz",
"subdir": "{DOWNLOADS_PATH}/wikidata"
},
{
"url": "http://files.deeppavlov.ai/kbqa/downloads/el_db_lcquad2.tar.gz",
"subdir": "{DOWNLOADS_PATH}/entity_linking_eng"
},
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/wikidata_full.tar.gz",
"subdir": "{DOWNLOADS_PATH}/wikidata"
},
{
Expand Down
Loading