Spaces:
Runtime error
Runtime error
Thomas De Decker
commited on
Commit
·
f6df2a0
1
Parent(s):
cf55b94
Fix truncation bug + Update description
Browse files
README.md
CHANGED
|
@@ -8,15 +8,6 @@ sdk_version: 1.2.0
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
-
models:
|
| 12 |
-
- DeDeckerThomas/keyphrase-extraction-kbir-inspec
|
| 13 |
-
- DeDeckerThomas/keyphrase-extraction-distilbert-openkp
|
| 14 |
-
- DeDeckerThomas/keyphrase-extraction-distilbert-kptimes
|
| 15 |
-
- DeDeckerThomas/keyphrase-extraction-distilbert-inspec
|
| 16 |
-
- DeDeckerThomas/keyphrase-extraction-kbir-kpcrowd
|
| 17 |
-
- DeDeckerThomas/keyphrase-generation-keybart-inspec
|
| 18 |
-
- DeDeckerThomas/keyphrase-generation-t5-small-inspec
|
| 19 |
-
- DeDeckerThomas/keyphrase-generation-t5-small-openkp
|
| 20 |
---
|
| 21 |
|
| 22 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
app.py
CHANGED
|
@@ -35,7 +35,6 @@ def get_annotated_text(text, keyphrases, color="#d294ff"):
|
|
| 35 |
rf"$K:{keyphrases.index(keyphrase)}\2",
|
| 36 |
text,
|
| 37 |
flags=re.I,
|
| 38 |
-
count=1,
|
| 39 |
)
|
| 40 |
|
| 41 |
result = []
|
|
@@ -131,8 +130,7 @@ from a text. Since this is a time-consuming process, Artificial Intelligence is
|
|
| 131 |
Currently, classical machine learning methods, that use statistics and linguistics, are widely used
|
| 132 |
for the extraction process. The fact that these methods have been widely used in the community has
|
| 133 |
the advantage that there are many easy-to-use libraries. Now with the recent innovations in
|
| 134 |
-
|
| 135 |
-
keyphrase extraction can be improved. These new methods also focus on the semantics and
|
| 136 |
context of a document, which is quite an improvement.
|
| 137 |
|
| 138 |
This space gives you the ability to test around with some keyphrase extraction and generation models.
|
|
|
|
| 35 |
rf"$K:{keyphrases.index(keyphrase)}\2",
|
| 36 |
text,
|
| 37 |
flags=re.I,
|
|
|
|
| 38 |
)
|
| 39 |
|
| 40 |
result = []
|
|
|
|
| 130 |
Currently, classical machine learning methods, that use statistics and linguistics, are widely used
|
| 131 |
for the extraction process. The fact that these methods have been widely used in the community has
|
| 132 |
the advantage that there are many easy-to-use libraries. Now with the recent innovations in
|
| 133 |
+
NLP, transformers can be used to improve keyphrase extraction. Transformers also focus on the semantics and
|
|
|
|
| 134 |
context of a document, which is quite an improvement.
|
| 135 |
|
| 136 |
This space gives you the ability to test around with some keyphrase extraction and generation models.
|
pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc
CHANGED
|
Binary files a/pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc and b/pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc differ
|
|
|
pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc
CHANGED
|
Binary files a/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc and b/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc differ
|
|
|
pipelines/keyphrase_extraction_pipeline.py
CHANGED
|
@@ -11,7 +11,9 @@ class KeyphraseExtractionPipeline(TokenClassificationPipeline):
|
|
| 11 |
def __init__(self, model, *args, **kwargs):
|
| 12 |
super().__init__(
|
| 13 |
model=AutoModelForTokenClassification.from_pretrained(model),
|
| 14 |
-
tokenizer=AutoTokenizer.from_pretrained(
|
|
|
|
|
|
|
| 15 |
*args,
|
| 16 |
**kwargs
|
| 17 |
)
|
|
|
|
| 11 |
def __init__(self, model, *args, **kwargs):
|
| 12 |
super().__init__(
|
| 13 |
model=AutoModelForTokenClassification.from_pretrained(model),
|
| 14 |
+
tokenizer=AutoTokenizer.from_pretrained(
|
| 15 |
+
model, truncate=True
|
| 16 |
+
),
|
| 17 |
*args,
|
| 18 |
**kwargs
|
| 19 |
)
|
pipelines/keyphrase_generation_pipeline.py
CHANGED
|
@@ -8,7 +8,7 @@ class KeyphraseGenerationPipeline(Text2TextGenerationPipeline):
|
|
| 8 |
def __init__(self, model, keyphrase_sep_token=";", *args, **kwargs):
|
| 9 |
super().__init__(
|
| 10 |
model=AutoModelForSeq2SeqLM.from_pretrained(model),
|
| 11 |
-
tokenizer=AutoTokenizer.from_pretrained(model),
|
| 12 |
*args,
|
| 13 |
**kwargs
|
| 14 |
)
|
|
|
|
| 8 |
def __init__(self, model, keyphrase_sep_token=";", *args, **kwargs):
|
| 9 |
super().__init__(
|
| 10 |
model=AutoModelForSeq2SeqLM.from_pretrained(model),
|
| 11 |
+
tokenizer=AutoTokenizer.from_pretrained(model, truncate=True),
|
| 12 |
*args,
|
| 13 |
**kwargs
|
| 14 |
)
|