-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_predict.py
67 lines (58 loc) · 1.86 KB
/
test_predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pytest
from juritools.predict import JuriTagger, load_ner_model
from juritools.type import NamedEntity
from jurispacy_tokenizer import JuriSpacyTokenizer
import os
# Windows Fix for PosixPath issue
if os.name == "nt":
import pathlib
temp = pathlib.PosixPath
pathlib.PosixPath = pathlib.WindowsPath
FIXTURE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "model")
model = load_ner_model(os.path.join(FIXTURE_DIR, "new_categories_model.pt"))
tokenizer = JuriSpacyTokenizer()
@pytest.fixture(scope="session")
def juritagger():
return JuriTagger(tokenizer, model)
def test_simple_text(juritagger):
text = "Pierre Dupont est ingénieur.\n Il est content."
juritagger.predict(text)
assert len(juritagger.flair_sentences) == 2
named_entities = juritagger.get_entity_json_from_flair_sentences()
for entity in named_entities:
entity.score = 1.0
assert named_entities == [
NamedEntity(
text="Pierre",
start=0,
label="personnePhysique",
source="NER model",
),
NamedEntity(
text="Dupont",
start=7,
label="personnePhysique",
source="NER model",
),
]
@pytest.mark.skip(reason="output is not a list of dict anymore")
def test_simple_text_old(juritagger):
text = "Pierre Dupont est ingénieur.\n Il est content."
juritagger.predict(text)
assert len(juritagger.flair_sentences) == 2
assert juritagger.get_entity_json_from_flair_sentences() == [
{
"text": "Pierre",
"start": 0,
"end": 6,
"label": "personnePhysique",
"source": "NER model",
},
{
"text": "Dupont",
"start": 7,
"end": 13,
"label": "personnePhysique",
"source": "NER model",
},
]