Spaces:
Running
Running
JJ Tsao
commited on
Commit
·
b68e1c5
1
Parent(s):
cb0afeb
Initial commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app/.DS_Store +0 -0
- app/__pycache__/__init__.cpython-313.pyc +0 -0
- app/__pycache__/api_routes.cpython-313.pyc +0 -0
- app/__pycache__/bootstrap.cpython-313.pyc +0 -0
- app/__pycache__/chatbot.cpython-313.pyc +0 -0
- app/__pycache__/config.cpython-313.pyc +0 -0
- app/__pycache__/llm_services.cpython-313.pyc +0 -0
- app/__pycache__/media_retriever.cpython-313.pyc +0 -0
- app/__pycache__/retriever.cpython-313.pyc +0 -0
- app/__pycache__/schemas.cpython-313.pyc +0 -0
- app/__pycache__/vectorstore.cpython-313.pyc +0 -0
- app/api_routes.py +23 -0
- app/bootstrap.py +85 -0
- app/chatbot.py +78 -0
- app/config.py +26 -0
- app/llm_services.py +77 -0
- app/media_retriever.py +260 -0
- app/retriever.py +18 -0
- app/schemas.py +35 -0
- app/vectorstore.py +14 -0
- data/.DS_Store +0 -0
- data/bm25_files/.DS_Store +0 -0
- data/bm25_files/movie_bm25_model.joblib +3 -0
- data/bm25_files/movie_bm25_vocab.joblib +3 -0
- data/bm25_files/tv_bm25_model.joblib +3 -0
- data/bm25_files/tv_bm25_vocab.joblib +3 -0
- data/nltk_data/.DS_Store +0 -0
- data/nltk_data/corpora/.DS_Store +0 -0
- data/nltk_data/corpora/stopwords.zip +3 -0
- data/nltk_data/corpora/stopwords/README +32 -0
- data/nltk_data/corpora/stopwords/albanian +237 -0
- data/nltk_data/corpora/stopwords/arabic +754 -0
- data/nltk_data/corpora/stopwords/azerbaijani +165 -0
- data/nltk_data/corpora/stopwords/basque +326 -0
- data/nltk_data/corpora/stopwords/belarusian +224 -0
- data/nltk_data/corpora/stopwords/bengali +398 -0
- data/nltk_data/corpora/stopwords/catalan +278 -0
- data/nltk_data/corpora/stopwords/chinese +841 -0
- data/nltk_data/corpora/stopwords/danish +94 -0
- data/nltk_data/corpora/stopwords/dutch +101 -0
- data/nltk_data/corpora/stopwords/english +198 -0
- data/nltk_data/corpora/stopwords/finnish +235 -0
- data/nltk_data/corpora/stopwords/french +157 -0
- data/nltk_data/corpora/stopwords/german +232 -0
- data/nltk_data/corpora/stopwords/greek +265 -0
- data/nltk_data/corpora/stopwords/hebrew +221 -0
- data/nltk_data/corpora/stopwords/hinglish +1036 -0
- data/nltk_data/corpora/stopwords/hungarian +199 -0
- data/nltk_data/corpora/stopwords/indonesian +758 -0
- data/nltk_data/corpora/stopwords/italian +279 -0
app/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
app/__pycache__/__init__.cpython-313.pyc
ADDED
Binary file (190 Bytes). View file
|
|
app/__pycache__/api_routes.cpython-313.pyc
ADDED
Binary file (1.27 kB). View file
|
|
app/__pycache__/bootstrap.cpython-313.pyc
ADDED
Binary file (3.5 kB). View file
|
|
app/__pycache__/chatbot.cpython-313.pyc
ADDED
Binary file (4.04 kB). View file
|
|
app/__pycache__/config.cpython-313.pyc
ADDED
Binary file (1.51 kB). View file
|
|
app/__pycache__/llm_services.cpython-313.pyc
ADDED
Binary file (3.75 kB). View file
|
|
app/__pycache__/media_retriever.cpython-313.pyc
ADDED
Binary file (11.8 kB). View file
|
|
app/__pycache__/retriever.cpython-313.pyc
ADDED
Binary file (527 Bytes). View file
|
|
app/__pycache__/schemas.cpython-313.pyc
ADDED
Binary file (2.16 kB). View file
|
|
app/__pycache__/vectorstore.cpython-313.pyc
ADDED
Binary file (741 Bytes). View file
|
|
app/api_routes.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app.bootstrap import chat_fn
|
2 |
+
from app.schemas import ChatRequest
|
3 |
+
from fastapi import APIRouter
|
4 |
+
from fastapi.responses import StreamingResponse
|
5 |
+
|
6 |
+
router = APIRouter()
|
7 |
+
|
8 |
+
|
9 |
+
@router.post("/chat")
|
10 |
+
async def chat_endpoint(req: ChatRequest):
|
11 |
+
def response_stream():
|
12 |
+
generator = chat_fn(
|
13 |
+
question=req.question,
|
14 |
+
history=req.history,
|
15 |
+
media_type=req.media_type,
|
16 |
+
genres=req.genres,
|
17 |
+
providers=req.providers,
|
18 |
+
year_range=tuple(req.year_range),
|
19 |
+
)
|
20 |
+
for chunk in generator:
|
21 |
+
yield chunk
|
22 |
+
|
23 |
+
return StreamingResponse(response_stream(), media_type="text/plain")
|
app/bootstrap.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
import joblib
|
6 |
+
import nltk
|
7 |
+
from app.chatbot import build_chat_fn
|
8 |
+
from app.config import (
|
9 |
+
BM25_PATH,
|
10 |
+
INTENT_MODEL,
|
11 |
+
NLTK_PATH,
|
12 |
+
QDRANT_API_KEY,
|
13 |
+
QDRANT_ENDPOINT,
|
14 |
+
QDRANT_MOVIE_COLLECTION_NAME,
|
15 |
+
QDRANT_TV_COLLECTION_NAME,
|
16 |
+
)
|
17 |
+
from app.llm_services import load_sentence_model
|
18 |
+
from app.retriever import get_media_retriever
|
19 |
+
from app.vectorstore import connect_qdrant
|
20 |
+
from rank_bm25 import BM25Okapi
|
21 |
+
from transformers import pipeline
|
22 |
+
|
23 |
+
start = time.time()
|
24 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
25 |
+
|
26 |
+
|
27 |
+
def load_bm25_files() -> tuple[dict[str, BM25Okapi], dict[str, int]]:
|
28 |
+
bm25_dir = Path(BM25_PATH)
|
29 |
+
try:
|
30 |
+
bm25_models = {
|
31 |
+
"movie": joblib.load(bm25_dir / "movie_bm25_model.joblib"),
|
32 |
+
"tv": joblib.load(bm25_dir / "tv_bm25_model.joblib"),
|
33 |
+
}
|
34 |
+
bm25_vocabs = {
|
35 |
+
"movie": joblib.load(bm25_dir / "movie_bm25_vocab.joblib"),
|
36 |
+
"tv": joblib.load(bm25_dir / "tv_bm25_vocab.joblib"),
|
37 |
+
}
|
38 |
+
except FileNotFoundError as e:
|
39 |
+
raise FileNotFoundError(f"Missing BM25 files: {e}")
|
40 |
+
return bm25_models, bm25_vocabs
|
41 |
+
|
42 |
+
|
43 |
+
def setup_retriever():
|
44 |
+
embed_model = load_sentence_model()
|
45 |
+
qdrant_client = connect_qdrant(endpoint=QDRANT_ENDPOINT, api_key=QDRANT_API_KEY)
|
46 |
+
nltk.data.path.append(str(NLTK_PATH))
|
47 |
+
print("✅ NLTK resources loaded")
|
48 |
+
|
49 |
+
bm25_models, bm25_vocabs = load_bm25_files()
|
50 |
+
print("✅ BM25 files loaded")
|
51 |
+
|
52 |
+
return get_media_retriever(
|
53 |
+
embed_model=embed_model,
|
54 |
+
qdrant_client=qdrant_client,
|
55 |
+
bm25_models=bm25_models,
|
56 |
+
bm25_vocabs=bm25_vocabs,
|
57 |
+
movie_collection_name=QDRANT_MOVIE_COLLECTION_NAME,
|
58 |
+
tv_collection_name=QDRANT_TV_COLLECTION_NAME,
|
59 |
+
)
|
60 |
+
|
61 |
+
|
62 |
+
def setup_intent_classifier():
|
63 |
+
print(f"🔧 Loading intent classifier from {INTENT_MODEL}")
|
64 |
+
classifier = pipeline("text-classification", model=INTENT_MODEL)
|
65 |
+
|
66 |
+
print("🔥 Warming up intent classifier...")
|
67 |
+
warmup_queries = [
|
68 |
+
"Can you recommend a feel-good movie?",
|
69 |
+
"Who directed The Godfather?",
|
70 |
+
"Do you like action films?",
|
71 |
+
]
|
72 |
+
for q in warmup_queries:
|
73 |
+
_ = classifier(q)
|
74 |
+
|
75 |
+
print("🤖 Classifier ready")
|
76 |
+
return classifier
|
77 |
+
|
78 |
+
|
79 |
+
# Initialize once at startup
|
80 |
+
retriever = setup_retriever()
|
81 |
+
intent_classifier = setup_intent_classifier()
|
82 |
+
chat_fn = build_chat_fn(retriever, intent_classifier)
|
83 |
+
|
84 |
+
print(f"🔧 Total startup time: {time.time() - start:.2f}s")
|
85 |
+
|
app/chatbot.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import time
|
3 |
+
from concurrent.futures import ThreadPoolExecutor
|
4 |
+
|
5 |
+
from app.llm_services import call_chat_model_openai
|
6 |
+
|
7 |
+
|
8 |
+
def sanitize_markdown(md_text: str) -> str:
|
9 |
+
return re.sub(r'!\[.*?\]\(.*?\)', '', md_text)
|
10 |
+
|
11 |
+
|
12 |
+
def build_chat_fn(retriever, intent_classifier):
|
13 |
+
def chat(
|
14 |
+
question,
|
15 |
+
history,
|
16 |
+
media_type="movies",
|
17 |
+
genres=None,
|
18 |
+
providers=None,
|
19 |
+
year_range=None,
|
20 |
+
):
|
21 |
+
full_t0 = time.time()
|
22 |
+
|
23 |
+
with ThreadPoolExecutor() as executor:
|
24 |
+
# Classify user intent to determine if it is a recommendation ask
|
25 |
+
t0 = time.time()
|
26 |
+
intent_future = executor.submit(
|
27 |
+
lambda q: intent_classifier(q)[0]["label"] == "recommendation", question
|
28 |
+
)
|
29 |
+
print(f"\n🧠 executor.submit(classify_intent) took {time.time() - t0:.3f}s")
|
30 |
+
|
31 |
+
# Embed user query as dense vector asynchronously
|
32 |
+
t0 = time.time()
|
33 |
+
query_vector_future = executor.submit(retriever.embed_dense, question)
|
34 |
+
print(f"🧵 executor.submit(embed_text) took {time.time() - t0:.3f}s")
|
35 |
+
|
36 |
+
# Wait for results
|
37 |
+
t0 = time.time()
|
38 |
+
is_rec_intent = intent_future.result()
|
39 |
+
print(f"✅ classify_intent() result received in {time.time() - t0:.3f}s")
|
40 |
+
|
41 |
+
t0 = time.time()
|
42 |
+
dense_vector = query_vector_future.result()
|
43 |
+
print(f"📈 embed_text() result received in {time.time() - t0:.3f}s")
|
44 |
+
|
45 |
+
# Embed user query as sparse vector for hybrid retrieval
|
46 |
+
t0 = time.time()
|
47 |
+
sparse_vector = retriever.embed_sparse(question, media_type)
|
48 |
+
print(f"📈 embed_sparse() result received in {time.time() - t0:.3f}s")
|
49 |
+
|
50 |
+
if is_rec_intent:
|
51 |
+
# If Yes, proceed with the RAG pipeline for retrieval and recommendation
|
52 |
+
t0 = time.time()
|
53 |
+
retrieved_movies = retriever.retrieve_and_rerank(
|
54 |
+
dense_vector,
|
55 |
+
sparse_vector,
|
56 |
+
media_type.lower(),
|
57 |
+
genres,
|
58 |
+
providers,
|
59 |
+
year_range,
|
60 |
+
)
|
61 |
+
print(f"\n📚 retrieve_and_rerank() took {time.time() - t0:.3f}s")
|
62 |
+
|
63 |
+
context = retriever.format_context(retrieved_movies)
|
64 |
+
user_message = f"{question}\n\nContext:\nBased on the following retrieved {media_type.lower()}, suggest the best recommendations.\n\n{context}"
|
65 |
+
|
66 |
+
print(f"✨ Total chat() prep time before streaming: {time.time() - full_t0:.3f}s")
|
67 |
+
for chunk in call_chat_model_openai(history, user_message):
|
68 |
+
yield chunk
|
69 |
+
|
70 |
+
else:
|
71 |
+
# If No, proceed with a general conversation
|
72 |
+
user_message = question
|
73 |
+
|
74 |
+
print(f"✨ Total chat() prep time before streaming: {time.time() - full_t0:.3f}s")
|
75 |
+
for chunk in call_chat_model_openai(history, user_message):
|
76 |
+
yield sanitize_markdown(chunk)
|
77 |
+
|
78 |
+
return chat
|
app/config.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
9 |
+
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
10 |
+
|
11 |
+
QDRANT_ENDPOINT = os.getenv("QDRANT_ENDPOINT")
|
12 |
+
QDRANT_MOVIE_COLLECTION_NAME = os.getenv("QDRANT_MOVIE_COLLECTION_NAME_BGE")
|
13 |
+
QDRANT_TV_COLLECTION_NAME = os.getenv("QDRANT_TV_COLLECTION_NAME_BGE")
|
14 |
+
|
15 |
+
NLTK_PATH = Path(__file__).resolve().parent.parent / "data" / "nltk_data"
|
16 |
+
BM25_PATH = Path(__file__).resolve().parent.parent / "data" / "bm25_files"
|
17 |
+
|
18 |
+
INTENT_MODEL = "JJTsao/intent-classifier-distilbert-moviebot" # Fine-tuned intent classification model for query intent classifiation
|
19 |
+
EMBEDDING_MODEL = "JJTsao/fine-tuned_movie_retriever-bge-base-en-v1.5" # Fine-tuned sentence transfomer model for query dense vector embedding
|
20 |
+
OPENAI_MODEL = "gpt-4o-mini" # LLM for chat completions
|
21 |
+
|
22 |
+
|
23 |
+
if not OPENAI_API_KEY or not QDRANT_API_KEY:
|
24 |
+
raise ValueError("Missing API key(s).")
|
25 |
+
if not QDRANT_ENDPOINT or not QDRANT_MOVIE_COLLECTION_NAME or not QDRANT_TV_COLLECTION_NAME:
|
26 |
+
raise ValueError("Missing QDrant URL or collection name.")
|
app/llm_services.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from openai import OpenAI
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
from app.config import EMBEDDING_MODEL, OPENAI_MODEL, OPENAI_API_KEY
|
7 |
+
|
8 |
+
# === LLM Config ===
|
9 |
+
_sentence_model = None # Not loaded at import time
|
10 |
+
|
11 |
+
# === Clients ===
|
12 |
+
openai_client = OpenAI(api_key=OPENAI_API_KEY)
|
13 |
+
|
14 |
+
# === System Prompt ===
|
15 |
+
SYSTEM_PROMPT = """
|
16 |
+
You are a professional film curator and critic. Your role is to analyze the user's preferences and recommend high-quality films or TV shows using the provided context. Do not seek film or tv show options outside of the list provided to you.
|
17 |
+
Focus on:
|
18 |
+
|
19 |
+
- Artistic merit and storytelling
|
20 |
+
- Genres, themes, and tone
|
21 |
+
- Popularity, IMDB ratings, and Rotten Tomatoes ratings
|
22 |
+
|
23 |
+
Provide a brief explanation of why the user might enjoy each movie or tv series. Include IMDB rating, Rotten Tomatoe ratings, and a poster. Answer with authority and care. Respond in markdown.
|
24 |
+
"""
|
25 |
+
|
26 |
+
|
27 |
+
def load_sentence_model():
|
28 |
+
global _sentence_model
|
29 |
+
if _sentence_model is None:
|
30 |
+
print("⏳ Loading embedding model...")
|
31 |
+
_sentence_model = SentenceTransformer(
|
32 |
+
EMBEDDING_MODEL, device="cuda" if torch.cuda.is_available() else "cpu"
|
33 |
+
)
|
34 |
+
|
35 |
+
print(f"🔥 Model '{EMBEDDING_MODEL}' loaded. Performing GPU warmup...")
|
36 |
+
|
37 |
+
# Realistic multi-sentence warmup to trigger full CUDA graph
|
38 |
+
warmup_sentences = [
|
39 |
+
"A suspenseful thriller with deep character development and moral ambiguity.",
|
40 |
+
"Coming-of-age story with emotional storytelling and strong ensemble performances.",
|
41 |
+
"Mind-bending sci-fi with philosophical undertones and high concept ideas.",
|
42 |
+
"Recommend me some comedies.",
|
43 |
+
]
|
44 |
+
_ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
|
45 |
+
time.sleep(0.5)
|
46 |
+
_ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
|
47 |
+
print("🚀 Embedding model fully warmed up.")
|
48 |
+
|
49 |
+
return _sentence_model
|
50 |
+
|
51 |
+
|
52 |
+
def embed_text(text: str) -> list[float]:
|
53 |
+
model = load_sentence_model()
|
54 |
+
return model.encode(text).tolist()
|
55 |
+
|
56 |
+
|
57 |
+
def build_chat_history(history: list, max_turns: int = 5) -> list:
|
58 |
+
return [
|
59 |
+
{"role": msg.role, "content": msg.content}
|
60 |
+
for msg in history[-max_turns * 2:]
|
61 |
+
]
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
def call_chat_model_openai(history, user_message: str):
|
66 |
+
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
67 |
+
messages += build_chat_history(history or [])
|
68 |
+
messages.append({"role": "user", "content": user_message})
|
69 |
+
|
70 |
+
response = openai_client.chat.completions.create(
|
71 |
+
model=OPENAI_MODEL, messages=messages, temperature=0.7, stream=True
|
72 |
+
)
|
73 |
+
|
74 |
+
for chunk in response:
|
75 |
+
delta = chunk.choices[0].delta.content
|
76 |
+
if delta:
|
77 |
+
yield delta
|
app/media_retriever.py
ADDED
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import Counter
|
2 |
+
from typing import Dict, List
|
3 |
+
|
4 |
+
from nltk.corpus import stopwords
|
5 |
+
from nltk.stem import PorterStemmer
|
6 |
+
from nltk.tokenize import word_tokenize
|
7 |
+
from qdrant_client import QdrantClient
|
8 |
+
from qdrant_client.models import FieldCondition, Filter, MatchValue, Range, models
|
9 |
+
from sentence_transformers import SentenceTransformer
|
10 |
+
|
11 |
+
|
12 |
+
class MediaRetriever:
|
13 |
+
def __init__(
|
14 |
+
self,
|
15 |
+
embed_model: SentenceTransformer,
|
16 |
+
qdrant_client: QdrantClient,
|
17 |
+
bm25_models: Dict,
|
18 |
+
bm25_vocabs: Dict,
|
19 |
+
movie_collection_name: str,
|
20 |
+
tv_collection_name: str,
|
21 |
+
dense_weight: float = 0.4, # Weight of semantic match score for reranking
|
22 |
+
sparse_weight: float = 0.1, # Weight of BM25 match score for reranking
|
23 |
+
rating_weight: float = 0.3, # Weight of rating score for reranking
|
24 |
+
popularity_weight: float = 0.2, # Weight of popularity score for reranking
|
25 |
+
semantic_retrieval_limit: int = 300, # Number of movies to retrieve for reranking
|
26 |
+
bm25_retrieval_limit: int = 20,
|
27 |
+
top_k: int = 20, # Number of post-reranking movies to send to LLM
|
28 |
+
):
|
29 |
+
self.client = qdrant_client
|
30 |
+
self.movie_collection_name = movie_collection_name
|
31 |
+
self.tv_collection_name = tv_collection_name
|
32 |
+
self.embed_model = embed_model
|
33 |
+
self.bm25_models = bm25_models
|
34 |
+
self.bm25_vocabs = bm25_vocabs
|
35 |
+
self.dense_weight = dense_weight
|
36 |
+
self.sparse_weight = sparse_weight
|
37 |
+
self.rating_weight = rating_weight
|
38 |
+
self.popularity_weight = popularity_weight
|
39 |
+
self.semantic_retrieval_limit = semantic_retrieval_limit
|
40 |
+
self.bm25_retrieval_limit = bm25_retrieval_limit
|
41 |
+
self.top_k = top_k
|
42 |
+
|
43 |
+
def embed_dense(self, query: str) -> List[float]:
|
44 |
+
return self.embed_model.encode(query).tolist()
|
45 |
+
|
46 |
+
@staticmethod
|
47 |
+
def tokenize_and_preprocess(text: str) -> List[str]:
|
48 |
+
stop_words = set(stopwords.words("english"))
|
49 |
+
stemmer = PorterStemmer()
|
50 |
+
|
51 |
+
tokens = word_tokenize(text.lower())
|
52 |
+
filtered_tokens = [w for w in tokens if w not in stop_words and w.isalnum()]
|
53 |
+
processed_tokens = [stemmer.stem(w) for w in filtered_tokens]
|
54 |
+
|
55 |
+
return processed_tokens
|
56 |
+
|
57 |
+
def embed_sparse(self, query: str, media_type: str) -> Dict:
|
58 |
+
bm25_model = (
|
59 |
+
self.bm25_models["movie"]
|
60 |
+
if media_type.lower() == "movies"
|
61 |
+
else self.bm25_models["tv"]
|
62 |
+
)
|
63 |
+
bm25_vocab = (
|
64 |
+
self.bm25_vocabs["movie"]
|
65 |
+
if media_type.lower() == "movies"
|
66 |
+
else self.bm25_vocabs["tv"]
|
67 |
+
)
|
68 |
+
|
69 |
+
tokens = self.tokenize_and_preprocess(query)
|
70 |
+
|
71 |
+
term_counts = Counter(tokens)
|
72 |
+
indices, values = [], []
|
73 |
+
|
74 |
+
avg_doc_length = bm25_model.avgdl
|
75 |
+
k1, b = bm25_model.k1, bm25_model.b
|
76 |
+
|
77 |
+
for term, tf in term_counts.items():
|
78 |
+
if term in bm25_vocab:
|
79 |
+
idx = bm25_vocab[term]
|
80 |
+
idf = bm25_model.idf.get(term, 0)
|
81 |
+
numerator = idf * tf * (k1 + 1)
|
82 |
+
denominator = tf + k1 * (1 - b + b * len(tokens) / avg_doc_length)
|
83 |
+
if denominator != 0:
|
84 |
+
weight = numerator / denominator
|
85 |
+
indices.append(idx)
|
86 |
+
values.append(float(weight))
|
87 |
+
sparse_vector = {"indices": indices, "values": values}
|
88 |
+
return sparse_vector
|
89 |
+
|
90 |
+
def retrieve_and_rerank(
|
91 |
+
self,
|
92 |
+
dense_vector: List[float],
|
93 |
+
sparse_vector: Dict,
|
94 |
+
media_type: str = "movies",
|
95 |
+
genres=None,
|
96 |
+
providers=None,
|
97 |
+
year_range=None,
|
98 |
+
) -> List[dict]:
|
99 |
+
# Construct Qdrant filter based on user input
|
100 |
+
qdrant_filter = self._build_filter(genres, providers, year_range)
|
101 |
+
|
102 |
+
# Query Qdrant for semantic search with dense vector
|
103 |
+
dense_results = self._query_dense(
|
104 |
+
vector=dense_vector,
|
105 |
+
media_type=media_type,
|
106 |
+
qdrant_filter=qdrant_filter,
|
107 |
+
)
|
108 |
+
|
109 |
+
# Query Qdrant for BM25 search with sparse vector
|
110 |
+
sparse_results = self._query_sparse(
|
111 |
+
vector=sparse_vector,
|
112 |
+
media_type=media_type,
|
113 |
+
qdrant_filter=qdrant_filter,
|
114 |
+
)
|
115 |
+
|
116 |
+
if not dense_results:
|
117 |
+
return []
|
118 |
+
|
119 |
+
# Fuse dense and sparse results and rerank
|
120 |
+
fused = self.fuse_dense_sparse(dense_results, sparse_results)
|
121 |
+
reranked = self.rerank_fused_results(fused)
|
122 |
+
|
123 |
+
reranked_ids = [p.id for p in reranked[:20]]
|
124 |
+
print ("\nReranked Top-30:")
|
125 |
+
for i, id_ in enumerate(reranked_ids):
|
126 |
+
f = fused[id_]
|
127 |
+
p = f["point"]
|
128 |
+
print(
|
129 |
+
f"#{i + 1} {p.payload.get('title', '')} | Score: {p.score} Dense: {f['dense_score']:.3f}, Sparse: {f['sparse_score']:.3f}, Pop: {p.payload.get('popularity', 0)}, Rating: {p.payload.get('vote_average', 0)}"
|
130 |
+
)
|
131 |
+
|
132 |
+
return reranked[: self.top_k]
|
133 |
+
|
134 |
+
def _build_filter(
|
135 |
+
self, genres=None, providers=None, year_range=None
|
136 |
+
) -> Filter | None:
|
137 |
+
must_clauses = []
|
138 |
+
|
139 |
+
if genres:
|
140 |
+
genre_conditions = [
|
141 |
+
FieldCondition(key="genres", match=MatchValue(value=genre))
|
142 |
+
for genre in genres
|
143 |
+
]
|
144 |
+
must_clauses.append({"should": genre_conditions})
|
145 |
+
|
146 |
+
if providers:
|
147 |
+
provider_conditions = [
|
148 |
+
FieldCondition(key="watch_providers", match=MatchValue(value=provider))
|
149 |
+
for provider in providers
|
150 |
+
]
|
151 |
+
must_clauses.append({"should": provider_conditions})
|
152 |
+
|
153 |
+
if year_range:
|
154 |
+
must_clauses.append(
|
155 |
+
FieldCondition(
|
156 |
+
key="release_year",
|
157 |
+
range=Range(gte=year_range[0], lte=year_range[1]),
|
158 |
+
)
|
159 |
+
)
|
160 |
+
|
161 |
+
return Filter(must=must_clauses) if must_clauses else None
|
162 |
+
|
163 |
+
def _query_dense(self, vector, media_type, qdrant_filter):
|
164 |
+
collection = (
|
165 |
+
self.movie_collection_name
|
166 |
+
if media_type == "movies"
|
167 |
+
else self.tv_collection_name
|
168 |
+
)
|
169 |
+
return self.client.query_points(
|
170 |
+
collection_name=collection,
|
171 |
+
query=vector,
|
172 |
+
using="dense_vector",
|
173 |
+
query_filter=qdrant_filter,
|
174 |
+
limit=self.semantic_retrieval_limit,
|
175 |
+
with_payload=["llm_context", "title", "popularity", "vote_average"],
|
176 |
+
with_vectors=False,
|
177 |
+
)
|
178 |
+
|
179 |
+
def _query_sparse(self, vector, media_type, qdrant_filter):
|
180 |
+
collection = (
|
181 |
+
self.movie_collection_name
|
182 |
+
if media_type == "movies"
|
183 |
+
else self.tv_collection_name
|
184 |
+
)
|
185 |
+
return self.client.query_points(
|
186 |
+
collection_name=collection,
|
187 |
+
query=models.SparseVector(**vector),
|
188 |
+
using="sparse_vector",
|
189 |
+
query_filter=qdrant_filter,
|
190 |
+
limit=self.bm25_retrieval_limit,
|
191 |
+
with_payload=["llm_context", "title", "popularity", "vote_average"],
|
192 |
+
with_vectors=False,
|
193 |
+
)
|
194 |
+
|
195 |
+
def fuse_dense_sparse(
|
196 |
+
self,
|
197 |
+
dense_results: List,
|
198 |
+
sparse_results: List,
|
199 |
+
) -> Dict[str, Dict]:
|
200 |
+
fused = {}
|
201 |
+
|
202 |
+
# Add dense results
|
203 |
+
for point in dense_results.points:
|
204 |
+
fused[point.id] = {
|
205 |
+
"point": point,
|
206 |
+
"dense_score": point.score or 0.0,
|
207 |
+
"sparse_score": 0.0,
|
208 |
+
}
|
209 |
+
|
210 |
+
max_sparse_score = max((pt.score for pt in sparse_results.points), default=1e-6)
|
211 |
+
|
212 |
+
# Add sparse scores
|
213 |
+
for point in sparse_results.points:
|
214 |
+
if point.id in fused:
|
215 |
+
fused[point.id]["sparse_score"] = (
|
216 |
+
min(point.score / max_sparse_score, 0.8) or 0.0
|
217 |
+
)
|
218 |
+
else:
|
219 |
+
fused[point.id] = {
|
220 |
+
"point": point,
|
221 |
+
"dense_score": 0.0,
|
222 |
+
"sparse_score": min(point.score / max_sparse_score, 0.8) or 0.0,
|
223 |
+
}
|
224 |
+
|
225 |
+
return fused
|
226 |
+
|
227 |
+
def rerank_fused_results(
|
228 |
+
self,
|
229 |
+
fused: Dict[str, Dict],
|
230 |
+
) -> List:
|
231 |
+
max_popularity = max(
|
232 |
+
(float(f["point"].payload.get("popularity", 0)) for f in fused.values()),
|
233 |
+
default=1.0,
|
234 |
+
)
|
235 |
+
|
236 |
+
def compute_score(f):
|
237 |
+
point = f["point"]
|
238 |
+
dense_score = f["dense_score"]
|
239 |
+
sparse_score = f["sparse_score"]
|
240 |
+
popularity = float(point.payload.get("popularity", 0)) / max_popularity
|
241 |
+
vote_average = float(point.payload.get("vote_average", 0)) / 10.0
|
242 |
+
|
243 |
+
return (
|
244 |
+
self.dense_weight * dense_score
|
245 |
+
+ self.sparse_weight * sparse_score
|
246 |
+
+ self.rating_weight * vote_average
|
247 |
+
+ self.popularity_weight * popularity
|
248 |
+
)
|
249 |
+
|
250 |
+
reranked = sorted(fused.values(), key=compute_score, reverse=True)
|
251 |
+
|
252 |
+
return [f["point"] for f in reranked]
|
253 |
+
|
254 |
+
def format_context(self, movies: list[dict]) -> str:
|
255 |
+
# Formart the retrieved documents as context for LLM
|
256 |
+
return "\n\n".join(
|
257 |
+
[f" {movie.payload.get('llm_context', '')}" for movie in movies]
|
258 |
+
)
|
259 |
+
|
260 |
+
|
app/retriever.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app.media_retriever import MediaRetriever
|
2 |
+
|
3 |
+
def get_media_retriever(
|
4 |
+
embed_model,
|
5 |
+
qdrant_client,
|
6 |
+
bm25_models,
|
7 |
+
bm25_vocabs,
|
8 |
+
movie_collection_name,
|
9 |
+
tv_collection_name,
|
10 |
+
):
|
11 |
+
return MediaRetriever(
|
12 |
+
embed_model=embed_model,
|
13 |
+
qdrant_client=qdrant_client,
|
14 |
+
bm25_models=bm25_models,
|
15 |
+
bm25_vocabs=bm25_vocabs,
|
16 |
+
movie_collection_name=movie_collection_name,
|
17 |
+
tv_collection_name=tv_collection_name,
|
18 |
+
)
|
app/schemas.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from enum import Enum
|
2 |
+
from typing import List
|
3 |
+
|
4 |
+
from pydantic import BaseModel, field_validator, model_validator
|
5 |
+
|
6 |
+
|
7 |
+
class ChatMessage(BaseModel):
|
8 |
+
role: str
|
9 |
+
content: str
|
10 |
+
|
11 |
+
|
12 |
+
class MediaType(str, Enum):
|
13 |
+
MOVIE = "movies"
|
14 |
+
TV = "tvs"
|
15 |
+
|
16 |
+
|
17 |
+
class ChatRequest(BaseModel):
|
18 |
+
question: str
|
19 |
+
history: List[ChatMessage] = []
|
20 |
+
media_type: MediaType = MediaType.MOVIE
|
21 |
+
genres: List[str] = []
|
22 |
+
providers: List[str] = []
|
23 |
+
year_range: List[int] = [1920, 2025]
|
24 |
+
|
25 |
+
@field_validator("question")
|
26 |
+
def validate_question(cls, v):
|
27 |
+
if not v.strip():
|
28 |
+
raise ValueError("Question cannot be empty")
|
29 |
+
return v
|
30 |
+
|
31 |
+
@model_validator(mode="after")
|
32 |
+
def validate_year_range(self) -> "ChatRequest":
|
33 |
+
if len(self.year_range) != 2:
|
34 |
+
raise ValueError("year_range must be a list of exactly two integers: [start, end]")
|
35 |
+
return self
|
app/vectorstore.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from qdrant_client import QdrantClient
|
2 |
+
|
3 |
+
|
4 |
+
def connect_qdrant(endpoint: str, api_key: str) -> QdrantClient:
|
5 |
+
try:
|
6 |
+
client = QdrantClient(
|
7 |
+
url=endpoint,
|
8 |
+
api_key=api_key
|
9 |
+
)
|
10 |
+
print ("✅ Connected to Qdrant.")
|
11 |
+
return client
|
12 |
+
except Exception as e:
|
13 |
+
print(f"❌ Error connecting to Qdrant: {e}")
|
14 |
+
raise
|
data/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
data/bm25_files/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
data/bm25_files/movie_bm25_model.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8aaf32789542f07efa41b2f3a6023110e0f9df84f354ce50fc67173ed57eb9e0
|
3 |
+
size 6617501
|
data/bm25_files/movie_bm25_vocab.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:559fe41c00d8e94d2e516f8278b5950f2e02b4938151cdc0e23986ae6936513b
|
3 |
+
size 454110
|
data/bm25_files/tv_bm25_model.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bf700b0207c36e600f176b8151c195f2b3d6f9950dc70b10c922c06706224fc
|
3 |
+
size 5680147
|
data/bm25_files/tv_bm25_vocab.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eab388573cc6b4b7dc5b70cd13bdb7dcaaf0a7f261a9cf980c3251a33425d8a0
|
3 |
+
size 489786
|
data/nltk_data/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
data/nltk_data/corpora/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
data/nltk_data/corpora/stopwords.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:430c3b6ee2608783816e8e3a824b28f415cb28fca907e2a5c8c9816819200ba3
|
3 |
+
size 36779
|
data/nltk_data/corpora/stopwords/README
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Stopwords Corpus
|
2 |
+
|
3 |
+
This corpus contains lists of stop words for several languages. These
|
4 |
+
are high-frequency grammatical words which are usually ignored in text
|
5 |
+
retrieval applications.
|
6 |
+
|
7 |
+
They were obtained from:
|
8 |
+
http://anoncvs.postgresql.org/cvsweb.cgi/pgsql/src/backend/snowball/stopwords/
|
9 |
+
|
10 |
+
The stop words for the Romanian language were obtained from:
|
11 |
+
http://arlc.ro/resources/
|
12 |
+
|
13 |
+
The English list has been augmented
|
14 |
+
https://github.com/nltk/nltk_data/issues/22
|
15 |
+
|
16 |
+
The German list has been corrected
|
17 |
+
https://github.com/nltk/nltk_data/pull/49
|
18 |
+
|
19 |
+
A Kazakh list has been added
|
20 |
+
https://github.com/nltk/nltk_data/pull/52
|
21 |
+
|
22 |
+
A Nepali list has been added
|
23 |
+
https://github.com/nltk/nltk_data/pull/83
|
24 |
+
|
25 |
+
An Azerbaijani list has been added
|
26 |
+
https://github.com/nltk/nltk_data/pull/100
|
27 |
+
|
28 |
+
A Greek list has been added
|
29 |
+
https://github.com/nltk/nltk_data/pull/103
|
30 |
+
|
31 |
+
An Indonesian list has been added
|
32 |
+
https://github.com/nltk/nltk_data/pull/112
|
data/nltk_data/corpora/stopwords/albanian
ADDED
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
tyre
|
2 |
+
rreth
|
3 |
+
le
|
4 |
+
atyre
|
5 |
+
këta
|
6 |
+
megjithëse
|
7 |
+
kemi
|
8 |
+
per
|
9 |
+
ndonëse
|
10 |
+
dytë
|
11 |
+
pse
|
12 |
+
tha
|
13 |
+
aty
|
14 |
+
ndaj
|
15 |
+
ke
|
16 |
+
këtë
|
17 |
+
duhet
|
18 |
+
pa
|
19 |
+
perket
|
20 |
+
veç
|
21 |
+
ndonje
|
22 |
+
një
|
23 |
+
keshtu
|
24 |
+
s
|
25 |
+
janë
|
26 |
+
jane
|
27 |
+
ti
|
28 |
+
ia
|
29 |
+
megjithese
|
30 |
+
prej
|
31 |
+
ishte
|
32 |
+
tjerë
|
33 |
+
ai
|
34 |
+
se
|
35 |
+
tillë
|
36 |
+
do
|
37 |
+
si
|
38 |
+
ja
|
39 |
+
tonë
|
40 |
+
keta
|
41 |
+
pastaj
|
42 |
+
ndersa
|
43 |
+
siç
|
44 |
+
unë
|
45 |
+
gjate
|
46 |
+
di
|
47 |
+
kësaj
|
48 |
+
cilin
|
49 |
+
kjo
|
50 |
+
dhënë
|
51 |
+
da
|
52 |
+
teper
|
53 |
+
ketij
|
54 |
+
ama
|
55 |
+
pasi
|
56 |
+
fjalë
|
57 |
+
kanë
|
58 |
+
vetem
|
59 |
+
za
|
60 |
+
d.m.th.
|
61 |
+
ose
|
62 |
+
pas
|
63 |
+
ndonjë
|
64 |
+
cila
|
65 |
+
ndodhur
|
66 |
+
dyte
|
67 |
+
ardhur
|
68 |
+
kësi
|
69 |
+
nga
|
70 |
+
vete
|
71 |
+
atij
|
72 |
+
ta
|
73 |
+
jenë
|
74 |
+
rendit
|
75 |
+
tane
|
76 |
+
keso
|
77 |
+
deri
|
78 |
+
tone
|
79 |
+
të
|
80 |
+
prandaj
|
81 |
+
bëjë
|
82 |
+
domethënë
|
83 |
+
dhe
|
84 |
+
qi
|
85 |
+
mirepo
|
86 |
+
tona
|
87 |
+
që
|
88 |
+
u
|
89 |
+
këtu
|
90 |
+
cilet
|
91 |
+
jene
|
92 |
+
tjere
|
93 |
+
gjë
|
94 |
+
së
|
95 |
+
gjatë
|
96 |
+
duhej
|
97 |
+
t
|
98 |
+
dhene
|
99 |
+
thuhet
|
100 |
+
po
|
101 |
+
une
|
102 |
+
dy
|
103 |
+
cfare
|
104 |
+
ndërsa
|
105 |
+
sepse
|
106 |
+
edhe
|
107 |
+
cilen
|
108 |
+
to
|
109 |
+
meqenese
|
110 |
+
meje
|
111 |
+
tij
|
112 |
+
qene
|
113 |
+
jeni
|
114 |
+
them
|
115 |
+
përket
|
116 |
+
keto
|
117 |
+
ni
|
118 |
+
këso
|
119 |
+
asaj
|
120 |
+
ajo
|
121 |
+
sic
|
122 |
+
vetëm
|
123 |
+
ketyre
|
124 |
+
andaj
|
125 |
+
na
|
126 |
+
sa
|
127 |
+
kesaj
|
128 |
+
cili
|
129 |
+
këtyre
|
130 |
+
domethene
|
131 |
+
mirëpo
|
132 |
+
cilën
|
133 |
+
mos
|
134 |
+
madh
|
135 |
+
qenë
|
136 |
+
cilët
|
137 |
+
thënë
|
138 |
+
jemi
|
139 |
+
fjale
|
140 |
+
soje
|
141 |
+
neve
|
142 |
+
gjitha
|
143 |
+
kështu
|
144 |
+
vet
|
145 |
+
kur
|
146 |
+
ty
|
147 |
+
meqë
|
148 |
+
meqenëse
|
149 |
+
jush
|
150 |
+
ketë
|
151 |
+
para
|
152 |
+
kush
|
153 |
+
i
|
154 |
+
mua
|
155 |
+
dite
|
156 |
+
ate
|
157 |
+
për
|
158 |
+
tepër
|
159 |
+
nesh
|
160 |
+
meqe
|
161 |
+
ketu
|
162 |
+
ku
|
163 |
+
disa
|
164 |
+
ato
|
165 |
+
mbi
|
166 |
+
gje
|
167 |
+
ne
|
168 |
+
është
|
169 |
+
tille
|
170 |
+
teje
|
171 |
+
megjithate
|
172 |
+
ju
|
173 |
+
nese
|
174 |
+
saj
|
175 |
+
ashtu
|
176 |
+
më
|
177 |
+
mbasi
|
178 |
+
te
|
179 |
+
thene
|
180 |
+
jo
|
181 |
+
ditë
|
182 |
+
nuk
|
183 |
+
gjithe
|
184 |
+
shume
|
185 |
+
nje
|
186 |
+
tanë
|
187 |
+
mund
|
188 |
+
aqsa
|
189 |
+
sot
|
190 |
+
këto
|
191 |
+
tjera
|
192 |
+
tjetër
|
193 |
+
tjeter
|
194 |
+
atë
|
195 |
+
kisha
|
196 |
+
megjithatë
|
197 |
+
këtij
|
198 |
+
nëse
|
199 |
+
dimë
|
200 |
+
eshte
|
201 |
+
vazhdojmë
|
202 |
+
ka
|
203 |
+
kam
|
204 |
+
kesi
|
205 |
+
je
|
206 |
+
vazhdojme
|
207 |
+
duke
|
208 |
+
dime
|
209 |
+
kinse
|
210 |
+
por
|
211 |
+
kane
|
212 |
+
pika
|
213 |
+
keni
|
214 |
+
beje
|
215 |
+
ky
|
216 |
+
parasysh
|
217 |
+
apo
|
218 |
+
gjithë
|
219 |
+
me
|
220 |
+
ata
|
221 |
+
çfarë
|
222 |
+
jam
|
223 |
+
juve
|
224 |
+
kete
|
225 |
+
a
|
226 |
+
pra
|
227 |
+
qe
|
228 |
+
tash
|
229 |
+
në
|
230 |
+
vetë
|
231 |
+
vec
|
232 |
+
as
|
233 |
+
ndonese
|
234 |
+
tani
|
235 |
+
pak
|
236 |
+
e
|
237 |
+
shumë
|
data/nltk_data/corpora/stopwords/arabic
ADDED
@@ -0,0 +1,754 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
إذ
|
2 |
+
إذا
|
3 |
+
إذما
|
4 |
+
إذن
|
5 |
+
أف
|
6 |
+
أقل
|
7 |
+
أكثر
|
8 |
+
ألا
|
9 |
+
إلا
|
10 |
+
التي
|
11 |
+
الذي
|
12 |
+
الذين
|
13 |
+
اللاتي
|
14 |
+
اللائي
|
15 |
+
اللتان
|
16 |
+
اللتيا
|
17 |
+
اللتين
|
18 |
+
اللذان
|
19 |
+
اللذين
|
20 |
+
اللواتي
|
21 |
+
إلى
|
22 |
+
إليك
|
23 |
+
إليكم
|
24 |
+
إليكما
|
25 |
+
إليكن
|
26 |
+
أم
|
27 |
+
أما
|
28 |
+
أما
|
29 |
+
إما
|
30 |
+
أن
|
31 |
+
إن
|
32 |
+
إنا
|
33 |
+
أنا
|
34 |
+
أنت
|
35 |
+
أنتم
|
36 |
+
أنتما
|
37 |
+
أنتن
|
38 |
+
إنما
|
39 |
+
إنه
|
40 |
+
أنى
|
41 |
+
أنى
|
42 |
+
آه
|
43 |
+
آها
|
44 |
+
أو
|
45 |
+
أولاء
|
46 |
+
أولئك
|
47 |
+
أوه
|
48 |
+
آي
|
49 |
+
أي
|
50 |
+
أيها
|
51 |
+
إي
|
52 |
+
أين
|
53 |
+
أين
|
54 |
+
أينما
|
55 |
+
إيه
|
56 |
+
بخ
|
57 |
+
بس
|
58 |
+
بعد
|
59 |
+
بعض
|
60 |
+
بك
|
61 |
+
بكم
|
62 |
+
بكم
|
63 |
+
بكما
|
64 |
+
بكن
|
65 |
+
بل
|
66 |
+
بلى
|
67 |
+
بما
|
68 |
+
بماذا
|
69 |
+
بمن
|
70 |
+
بنا
|
71 |
+
به
|
72 |
+
بها
|
73 |
+
بهم
|
74 |
+
بهما
|
75 |
+
بهن
|
76 |
+
بي
|
77 |
+
بين
|
78 |
+
بيد
|
79 |
+
تلك
|
80 |
+
تلكم
|
81 |
+
تلكما
|
82 |
+
ته
|
83 |
+
تي
|
84 |
+
تين
|
85 |
+
تينك
|
86 |
+
ثم
|
87 |
+
ثمة
|
88 |
+
حاشا
|
89 |
+
حبذا
|
90 |
+
حتى
|
91 |
+
حيث
|
92 |
+
حيثما
|
93 |
+
حين
|
94 |
+
خلا
|
95 |
+
دون
|
96 |
+
ذا
|
97 |
+
ذات
|
98 |
+
ذاك
|
99 |
+
ذان
|
100 |
+
ذانك
|
101 |
+
ذلك
|
102 |
+
ذلكم
|
103 |
+
ذلكما
|
104 |
+
ذلكن
|
105 |
+
ذه
|
106 |
+
ذو
|
107 |
+
ذوا
|
108 |
+
ذواتا
|
109 |
+
ذواتي
|
110 |
+
ذي
|
111 |
+
ذين
|
112 |
+
ذينك
|
113 |
+
ريث
|
114 |
+
سوف
|
115 |
+
سوى
|
116 |
+
شتان
|
117 |
+
عدا
|
118 |
+
عسى
|
119 |
+
عل
|
120 |
+
على
|
121 |
+
عليك
|
122 |
+
عليه
|
123 |
+
عما
|
124 |
+
عن
|
125 |
+
عند
|
126 |
+
غير
|
127 |
+
فإذا
|
128 |
+
فإن
|
129 |
+
فلا
|
130 |
+
فمن
|
131 |
+
في
|
132 |
+
فيم
|
133 |
+
فيما
|
134 |
+
فيه
|
135 |
+
فيها
|
136 |
+
قد
|
137 |
+
كأن
|
138 |
+
كأنما
|
139 |
+
كأي
|
140 |
+
كأين
|
141 |
+
كذا
|
142 |
+
كذلك
|
143 |
+
كل
|
144 |
+
كلا
|
145 |
+
كلاهما
|
146 |
+
كلتا
|
147 |
+
كلما
|
148 |
+
كليكما
|
149 |
+
كليهما
|
150 |
+
كم
|
151 |
+
كم
|
152 |
+
كما
|
153 |
+
كي
|
154 |
+
كيت
|
155 |
+
كيف
|
156 |
+
كيفما
|
157 |
+
لا
|
158 |
+
لاسيما
|
159 |
+
لدى
|
160 |
+
لست
|
161 |
+
لستم
|
162 |
+
لستما
|
163 |
+
لستن
|
164 |
+
لسن
|
165 |
+
لسنا
|
166 |
+
لعل
|
167 |
+
لك
|
168 |
+
لكم
|
169 |
+
لكما
|
170 |
+
لكن
|
171 |
+
لكنما
|
172 |
+
لكي
|
173 |
+
لكيلا
|
174 |
+
لم
|
175 |
+
لما
|
176 |
+
لن
|
177 |
+
لنا
|
178 |
+
له
|
179 |
+
لها
|
180 |
+
لهم
|
181 |
+
لهما
|
182 |
+
لهن
|
183 |
+
لو
|
184 |
+
لولا
|
185 |
+
لوما
|
186 |
+
لي
|
187 |
+
لئن
|
188 |
+
ليت
|
189 |
+
ليس
|
190 |
+
ليسا
|
191 |
+
ليست
|
192 |
+
ليستا
|
193 |
+
ليسوا
|
194 |
+
ما
|
195 |
+
ماذا
|
196 |
+
متى
|
197 |
+
مذ
|
198 |
+
مع
|
199 |
+
مما
|
200 |
+
ممن
|
201 |
+
من
|
202 |
+
منه
|
203 |
+
منها
|
204 |
+
منذ
|
205 |
+
مه
|
206 |
+
مهما
|
207 |
+
نحن
|
208 |
+
نحو
|
209 |
+
نعم
|
210 |
+
ها
|
211 |
+
هاتان
|
212 |
+
هاته
|
213 |
+
هاتي
|
214 |
+
هاتين
|
215 |
+
هاك
|
216 |
+
هاهنا
|
217 |
+
هذا
|
218 |
+
هذان
|
219 |
+
هذه
|
220 |
+
هذي
|
221 |
+
هذين
|
222 |
+
هكذا
|
223 |
+
هل
|
224 |
+
هلا
|
225 |
+
هم
|
226 |
+
هما
|
227 |
+
هن
|
228 |
+
هنا
|
229 |
+
هناك
|
230 |
+
هنالك
|
231 |
+
هو
|
232 |
+
هؤلاء
|
233 |
+
هي
|
234 |
+
هيا
|
235 |
+
هيت
|
236 |
+
هيهات
|
237 |
+
والذي
|
238 |
+
والذين
|
239 |
+
وإذ
|
240 |
+
وإذا
|
241 |
+
وإن
|
242 |
+
ولا
|
243 |
+
ولكن
|
244 |
+
ولو
|
245 |
+
وما
|
246 |
+
ومن
|
247 |
+
وهو
|
248 |
+
يا
|
249 |
+
أبٌ
|
250 |
+
أخٌ
|
251 |
+
حمٌ
|
252 |
+
فو
|
253 |
+
أنتِ
|
254 |
+
يناير
|
255 |
+
فبراير
|
256 |
+
مارس
|
257 |
+
أبريل
|
258 |
+
مايو
|
259 |
+
يونيو
|
260 |
+
يوليو
|
261 |
+
أغسطس
|
262 |
+
سبتمبر
|
263 |
+
أكتوبر
|
264 |
+
نوفمبر
|
265 |
+
ديسمبر
|
266 |
+
جانفي
|
267 |
+
فيفري
|
268 |
+
مارس
|
269 |
+
أفريل
|
270 |
+
ماي
|
271 |
+
جوان
|
272 |
+
جويلية
|
273 |
+
أوت
|
274 |
+
كانون
|
275 |
+
شباط
|
276 |
+
آذار
|
277 |
+
نيسان
|
278 |
+
أيار
|
279 |
+
حزيران
|
280 |
+
تموز
|
281 |
+
آب
|
282 |
+
أيلول
|
283 |
+
تشرين
|
284 |
+
دولار
|
285 |
+
دينار
|
286 |
+
ريال
|
287 |
+
درهم
|
288 |
+
ليرة
|
289 |
+
جنيه
|
290 |
+
قرش
|
291 |
+
مليم
|
292 |
+
فلس
|
293 |
+
هللة
|
294 |
+
سنتيم
|
295 |
+
يورو
|
296 |
+
ين
|
297 |
+
يوان
|
298 |
+
شيكل
|
299 |
+
واحد
|
300 |
+
اثنان
|
301 |
+
ثلاثة
|
302 |
+
أربعة
|
303 |
+
خمسة
|
304 |
+
ستة
|
305 |
+
سبعة
|
306 |
+
ثمانية
|
307 |
+
تسعة
|
308 |
+
عشرة
|
309 |
+
أحد
|
310 |
+
اثنا
|
311 |
+
اثني
|
312 |
+
إحدى
|
313 |
+
ثلاث
|
314 |
+
أربع
|
315 |
+
خمس
|
316 |
+
ست
|
317 |
+
سبع
|
318 |
+
ثماني
|
319 |
+
تسع
|
320 |
+
عشر
|
321 |
+
ثمان
|
322 |
+
سبت
|
323 |
+
أحد
|
324 |
+
اثنين
|
325 |
+
ثلاثاء
|
326 |
+
أربعاء
|
327 |
+
خميس
|
328 |
+
جمعة
|
329 |
+
أول
|
330 |
+
ثان
|
331 |
+
ثاني
|
332 |
+
ثالث
|
333 |
+
رابع
|
334 |
+
خامس
|
335 |
+
سادس
|
336 |
+
سابع
|
337 |
+
ثامن
|
338 |
+
تاسع
|
339 |
+
عاشر
|
340 |
+
حادي
|
341 |
+
أ
|
342 |
+
ب
|
343 |
+
ت
|
344 |
+
ث
|
345 |
+
ج
|
346 |
+
ح
|
347 |
+
خ
|
348 |
+
د
|
349 |
+
ذ
|
350 |
+
ر
|
351 |
+
ز
|
352 |
+
س
|
353 |
+
ش
|
354 |
+
ص
|
355 |
+
ض
|
356 |
+
ط
|
357 |
+
ظ
|
358 |
+
ع
|
359 |
+
غ
|
360 |
+
ف
|
361 |
+
ق
|
362 |
+
ك
|
363 |
+
ل
|
364 |
+
م
|
365 |
+
ن
|
366 |
+
ه
|
367 |
+
و
|
368 |
+
ي
|
369 |
+
ء
|
370 |
+
ى
|
371 |
+
آ
|
372 |
+
ؤ
|
373 |
+
ئ
|
374 |
+
أ
|
375 |
+
ة
|
376 |
+
ألف
|
377 |
+
باء
|
378 |
+
تاء
|
379 |
+
ثاء
|
380 |
+
جيم
|
381 |
+
حاء
|
382 |
+
خاء
|
383 |
+
دال
|
384 |
+
ذال
|
385 |
+
راء
|
386 |
+
زاي
|
387 |
+
سين
|
388 |
+
شين
|
389 |
+
صاد
|
390 |
+
ضاد
|
391 |
+
طاء
|
392 |
+
ظاء
|
393 |
+
عين
|
394 |
+
غين
|
395 |
+
فاء
|
396 |
+
قاف
|
397 |
+
كاف
|
398 |
+
لام
|
399 |
+
ميم
|
400 |
+
نون
|
401 |
+
هاء
|
402 |
+
واو
|
403 |
+
ياء
|
404 |
+
همزة
|
405 |
+
ي
|
406 |
+
نا
|
407 |
+
ك
|
408 |
+
كن
|
409 |
+
ه
|
410 |
+
إياه
|
411 |
+
إياها
|
412 |
+
إياهما
|
413 |
+
إياهم
|
414 |
+
إياهن
|
415 |
+
إياك
|
416 |
+
إياكما
|
417 |
+
إياكم
|
418 |
+
إياك
|
419 |
+
إياكن
|
420 |
+
إياي
|
421 |
+
إيانا
|
422 |
+
أولالك
|
423 |
+
تانِ
|
424 |
+
تانِك
|
425 |
+
تِه
|
426 |
+
تِي
|
427 |
+
تَيْنِ
|
428 |
+
ثمّ
|
429 |
+
ثمّة
|
430 |
+
ذانِ
|
431 |
+
ذِه
|
432 |
+
ذِي
|
433 |
+
ذَيْنِ
|
434 |
+
هَؤلاء
|
435 |
+
هَاتانِ
|
436 |
+
هَاتِه
|
437 |
+
هَاتِي
|
438 |
+
هَاتَيْنِ
|
439 |
+
هَذا
|
440 |
+
هَذانِ
|
441 |
+
هَذِه
|
442 |
+
هَذِي
|
443 |
+
هَذَيْنِ
|
444 |
+
الألى
|
445 |
+
الألاء
|
446 |
+
أل
|
447 |
+
أنّى
|
448 |
+
أيّ
|
449 |
+
ّأيّان
|
450 |
+
أنّى
|
451 |
+
أيّ
|
452 |
+
ّأيّان
|
453 |
+
ذيت
|
454 |
+
كأيّ
|
455 |
+
كأيّن
|
456 |
+
بضع
|
457 |
+
فلان
|
458 |
+
وا
|
459 |
+
آمينَ
|
460 |
+
آهِ
|
461 |
+
آهٍ
|
462 |
+
آهاً
|
463 |
+
أُفٍّ
|
464 |
+
أُفٍّ
|
465 |
+
أفٍّ
|
466 |
+
أمامك
|
467 |
+
أمامكَ
|
468 |
+
أوّهْ
|
469 |
+
إلَيْكَ
|
470 |
+
إلَيْكَ
|
471 |
+
إليكَ
|
472 |
+
إليكنّ
|
473 |
+
إيهٍ
|
474 |
+
بخٍ
|
475 |
+
بسّ
|
476 |
+
بَسْ
|
477 |
+
بطآن
|
478 |
+
بَلْهَ
|
479 |
+
حاي
|
480 |
+
حَذارِ
|
481 |
+
حيَّ
|
482 |
+
حيَّ
|
483 |
+
دونك
|
484 |
+
رويدك
|
485 |
+
سرعان
|
486 |
+
شتانَ
|
487 |
+
شَتَّانَ
|
488 |
+
صهْ
|
489 |
+
صهٍ
|
490 |
+
طاق
|
491 |
+
طَق
|
492 |
+
عَدَسْ
|
493 |
+
كِخ
|
494 |
+
مكانَك
|
495 |
+
مكانَك
|
496 |
+
مكانَك
|
497 |
+
مكانكم
|
498 |
+
مكانكما
|
499 |
+
مكانكنّ
|
500 |
+
نَخْ
|
501 |
+
هاكَ
|
502 |
+
هَجْ
|
503 |
+
هلم
|
504 |
+
هيّا
|
505 |
+
هَيْهات
|
506 |
+
وا
|
507 |
+
واهاً
|
508 |
+
وراءَك
|
509 |
+
وُشْكَانَ
|
510 |
+
وَيْ
|
511 |
+
يفعلان
|
512 |
+
تفعلان
|
513 |
+
يفعلون
|
514 |
+
تفعلون
|
515 |
+
تفعلين
|
516 |
+
اتخذ
|
517 |
+
ألفى
|
518 |
+
تخذ
|
519 |
+
ترك
|
520 |
+
تعلَّم
|
521 |
+
جعل
|
522 |
+
حجا
|
523 |
+
حبيب
|
524 |
+
خال
|
525 |
+
حسب
|
526 |
+
خال
|
527 |
+
درى
|
528 |
+
رأى
|
529 |
+
زعم
|
530 |
+
صبر
|
531 |
+
ظنَّ
|
532 |
+
عدَّ
|
533 |
+
علم
|
534 |
+
غادر
|
535 |
+
ذهب
|
536 |
+
وجد
|
537 |
+
ورد
|
538 |
+
وهب
|
539 |
+
أسكن
|
540 |
+
أطعم
|
541 |
+
أعطى
|
542 |
+
رزق
|
543 |
+
زود
|
544 |
+
سقى
|
545 |
+
كسا
|
546 |
+
أخبر
|
547 |
+
أرى
|
548 |
+
أعلم
|
549 |
+
أنبأ
|
550 |
+
حدَث
|
551 |
+
خبَّر
|
552 |
+
نبَّا
|
553 |
+
أفعل به
|
554 |
+
ما أفعله
|
555 |
+
بئس
|
556 |
+
ساء
|
557 |
+
طالما
|
558 |
+
قلما
|
559 |
+
لات
|
560 |
+
لكنَّ
|
561 |
+
ءَ
|
562 |
+
أجل
|
563 |
+
إذاً
|
564 |
+
أمّا
|
565 |
+
إمّا
|
566 |
+
إنَّ
|
567 |
+
أنًّ
|
568 |
+
أى
|
569 |
+
إى
|
570 |
+
أيا
|
571 |
+
ب
|
572 |
+
ثمَّ
|
573 |
+
جلل
|
574 |
+
جير
|
575 |
+
رُبَّ
|
576 |
+
س
|
577 |
+
علًّ
|
578 |
+
ف
|
579 |
+
كأنّ
|
580 |
+
كلَّا
|
581 |
+
كى
|
582 |
+
ل
|
583 |
+
لات
|
584 |
+
لعلَّ
|
585 |
+
لكنَّ
|
586 |
+
لكنَّ
|
587 |
+
م
|
588 |
+
نَّ
|
589 |
+
هلّا
|
590 |
+
وا
|
591 |
+
أل
|
592 |
+
إلّا
|
593 |
+
ت
|
594 |
+
ك
|
595 |
+
لمّا
|
596 |
+
ن
|
597 |
+
ه
|
598 |
+
و
|
599 |
+
ا
|
600 |
+
ي
|
601 |
+
تجاه
|
602 |
+
تلقاء
|
603 |
+
جميع
|
604 |
+
حسب
|
605 |
+
سبحان
|
606 |
+
شبه
|
607 |
+
لعمر
|
608 |
+
مثل
|
609 |
+
معاذ
|
610 |
+
أبو
|
611 |
+
أخو
|
612 |
+
حمو
|
613 |
+
فو
|
614 |
+
مئة
|
615 |
+
مئتان
|
616 |
+
ثلاثمئة
|
617 |
+
أربعمئة
|
618 |
+
خمسمئة
|
619 |
+
ستمئة
|
620 |
+
سبعمئة
|
621 |
+
ثمنمئة
|
622 |
+
تسعمئة
|
623 |
+
مائة
|
624 |
+
ثلاثمائة
|
625 |
+
أربعمائة
|
626 |
+
خمسمائة
|
627 |
+
ستمائة
|
628 |
+
سبعمائة
|
629 |
+
ثمانمئة
|
630 |
+
تسعمائة
|
631 |
+
عشرون
|
632 |
+
ثلاثون
|
633 |
+
اربعون
|
634 |
+
خمسون
|
635 |
+
ستون
|
636 |
+
سبعون
|
637 |
+
ثمانون
|
638 |
+
تسعون
|
639 |
+
عشرين
|
640 |
+
ثلاثين
|
641 |
+
اربعين
|
642 |
+
خمسين
|
643 |
+
ستين
|
644 |
+
سبعين
|
645 |
+
ثمانين
|
646 |
+
تسعين
|
647 |
+
بضع
|
648 |
+
نيف
|
649 |
+
أجمع
|
650 |
+
جميع
|
651 |
+
عامة
|
652 |
+
عين
|
653 |
+
نفس
|
654 |
+
لا سيما
|
655 |
+
أصلا
|
656 |
+
أهلا
|
657 |
+
أيضا
|
658 |
+
بؤسا
|
659 |
+
بعدا
|
660 |
+
بغتة
|
661 |
+
تعسا
|
662 |
+
حقا
|
663 |
+
حمدا
|
664 |
+
خلافا
|
665 |
+
خاصة
|
666 |
+
دواليك
|
667 |
+
سحقا
|
668 |
+
سرا
|
669 |
+
سمعا
|
670 |
+
صبرا
|
671 |
+
صدقا
|
672 |
+
صراحة
|
673 |
+
طرا
|
674 |
+
عجبا
|
675 |
+
عيانا
|
676 |
+
غالبا
|
677 |
+
فرادى
|
678 |
+
فضلا
|
679 |
+
قاطبة
|
680 |
+
كثيرا
|
681 |
+
لبيك
|
682 |
+
معاذ
|
683 |
+
أبدا
|
684 |
+
إزاء
|
685 |
+
أصلا
|
686 |
+
الآن
|
687 |
+
أمد
|
688 |
+
أمس
|
689 |
+
آنفا
|
690 |
+
آناء
|
691 |
+
أنّى
|
692 |
+
أول
|
693 |
+
أيّان
|
694 |
+
تارة
|
695 |
+
ثمّ
|
696 |
+
ثمّة
|
697 |
+
حقا
|
698 |
+
صباح
|
699 |
+
مساء
|
700 |
+
ضحوة
|
701 |
+
عوض
|
702 |
+
غدا
|
703 |
+
غداة
|
704 |
+
قطّ
|
705 |
+
كلّما
|
706 |
+
لدن
|
707 |
+
لمّا
|
708 |
+
مرّة
|
709 |
+
قبل
|
710 |
+
خلف
|
711 |
+
أمام
|
712 |
+
فوق
|
713 |
+
تحت
|
714 |
+
يمين
|
715 |
+
شمال
|
716 |
+
ارتدّ
|
717 |
+
استحال
|
718 |
+
أصبح
|
719 |
+
أضحى
|
720 |
+
آض
|
721 |
+
أمسى
|
722 |
+
انقلب
|
723 |
+
بات
|
724 |
+
تبدّل
|
725 |
+
تحوّل
|
726 |
+
حار
|
727 |
+
رجع
|
728 |
+
راح
|
729 |
+
صار
|
730 |
+
ظلّ
|
731 |
+
عاد
|
732 |
+
غدا
|
733 |
+
كان
|
734 |
+
ما انفك
|
735 |
+
ما برح
|
736 |
+
مادام
|
737 |
+
مازال
|
738 |
+
مافتئ
|
739 |
+
ابتدأ
|
740 |
+
أخذ
|
741 |
+
اخلولق
|
742 |
+
أقبل
|
743 |
+
انبرى
|
744 |
+
أنشأ
|
745 |
+
أوشك
|
746 |
+
جعل
|
747 |
+
حرى
|
748 |
+
شرع
|
749 |
+
طفق
|
750 |
+
علق
|
751 |
+
قام
|
752 |
+
كرب
|
753 |
+
كاد
|
754 |
+
هبّ
|
data/nltk_data/corpora/stopwords/azerbaijani
ADDED
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
ad
|
3 |
+
altı
|
4 |
+
altmış
|
5 |
+
amma
|
6 |
+
arasında
|
7 |
+
artıq
|
8 |
+
ay
|
9 |
+
az
|
10 |
+
bax
|
11 |
+
belə
|
12 |
+
bəli
|
13 |
+
bəlkə
|
14 |
+
beş
|
15 |
+
bəy
|
16 |
+
bəzən
|
17 |
+
bəzi
|
18 |
+
bilər
|
19 |
+
bir
|
20 |
+
biraz
|
21 |
+
biri
|
22 |
+
birşey
|
23 |
+
biz
|
24 |
+
bizim
|
25 |
+
bizlər
|
26 |
+
bu
|
27 |
+
buna
|
28 |
+
bundan
|
29 |
+
bunların
|
30 |
+
bunu
|
31 |
+
bunun
|
32 |
+
buradan
|
33 |
+
bütün
|
34 |
+
ci
|
35 |
+
cı
|
36 |
+
çox
|
37 |
+
cu
|
38 |
+
cü
|
39 |
+
çünki
|
40 |
+
da
|
41 |
+
daha
|
42 |
+
də
|
43 |
+
dedi
|
44 |
+
dək
|
45 |
+
dən
|
46 |
+
dəqiqə
|
47 |
+
deyil
|
48 |
+
dir
|
49 |
+
doqquz
|
50 |
+
doqsan
|
51 |
+
dörd
|
52 |
+
düz
|
53 |
+
ə
|
54 |
+
edən
|
55 |
+
edir
|
56 |
+
əgər
|
57 |
+
əlbəttə
|
58 |
+
elə
|
59 |
+
əlli
|
60 |
+
ən
|
61 |
+
əslində
|
62 |
+
et
|
63 |
+
etdi
|
64 |
+
etmə
|
65 |
+
etmək
|
66 |
+
faiz
|
67 |
+
gilə
|
68 |
+
görə
|
69 |
+
ha
|
70 |
+
haqqında
|
71 |
+
harada
|
72 |
+
hə
|
73 |
+
heç
|
74 |
+
həm
|
75 |
+
həmin
|
76 |
+
həmişə
|
77 |
+
hər
|
78 |
+
ı
|
79 |
+
idi
|
80 |
+
iki
|
81 |
+
il
|
82 |
+
ildə
|
83 |
+
ilə
|
84 |
+
ilk
|
85 |
+
in
|
86 |
+
indi
|
87 |
+
isə
|
88 |
+
istifadə
|
89 |
+
iyirmi
|
90 |
+
ki
|
91 |
+
kim
|
92 |
+
kimə
|
93 |
+
kimi
|
94 |
+
lakin
|
95 |
+
lap
|
96 |
+
məhz
|
97 |
+
mən
|
98 |
+
mənə
|
99 |
+
mirşey
|
100 |
+
nə
|
101 |
+
nəhayət
|
102 |
+
niyə
|
103 |
+
o
|
104 |
+
obirisi
|
105 |
+
of
|
106 |
+
olan
|
107 |
+
olar
|
108 |
+
olaraq
|
109 |
+
oldu
|
110 |
+
olduğu
|
111 |
+
olmadı
|
112 |
+
olmaz
|
113 |
+
olmuşdur
|
114 |
+
olsun
|
115 |
+
olur
|
116 |
+
on
|
117 |
+
ona
|
118 |
+
ondan
|
119 |
+
onlar
|
120 |
+
onlardan
|
121 |
+
onların
|
122 |
+
onsuzda
|
123 |
+
onu
|
124 |
+
onun
|
125 |
+
oradan
|
126 |
+
otuz
|
127 |
+
öz
|
128 |
+
özü
|
129 |
+
qarşı
|
130 |
+
qədər
|
131 |
+
qırx
|
132 |
+
saat
|
133 |
+
sadəcə
|
134 |
+
saniyə
|
135 |
+
səhv
|
136 |
+
səkkiz
|
137 |
+
səksən
|
138 |
+
sən
|
139 |
+
sənə
|
140 |
+
sənin
|
141 |
+
siz
|
142 |
+
sizin
|
143 |
+
sizlər
|
144 |
+
sonra
|
145 |
+
təəssüf
|
146 |
+
ü
|
147 |
+
üç
|
148 |
+
üçün
|
149 |
+
var
|
150 |
+
və
|
151 |
+
xan
|
152 |
+
xanım
|
153 |
+
xeyr
|
154 |
+
ya
|
155 |
+
yalnız
|
156 |
+
yaxşı
|
157 |
+
yeddi
|
158 |
+
yenə
|
159 |
+
yəni
|
160 |
+
yetmiş
|
161 |
+
yox
|
162 |
+
yoxdur
|
163 |
+
yoxsa
|
164 |
+
yüz
|
165 |
+
zaman
|
data/nltk_data/corpora/stopwords/basque
ADDED
@@ -0,0 +1,326 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ahala
|
2 |
+
aitzitik
|
3 |
+
al
|
4 |
+
ala
|
5 |
+
alabadere
|
6 |
+
alabaina
|
7 |
+
alabaina
|
8 |
+
aldiz
|
9 |
+
alta
|
10 |
+
amaitu
|
11 |
+
amaitzeko
|
12 |
+
anitz
|
13 |
+
antzina
|
14 |
+
arabera
|
15 |
+
arabera
|
16 |
+
arabera
|
17 |
+
argi
|
18 |
+
arratsaldero
|
19 |
+
arte
|
20 |
+
artean
|
21 |
+
asko
|
22 |
+
aspaldiko
|
23 |
+
aurrera
|
24 |
+
aurrera
|
25 |
+
azkenez
|
26 |
+
azkenik
|
27 |
+
azkenik
|
28 |
+
ba
|
29 |
+
bada
|
30 |
+
bada
|
31 |
+
bada
|
32 |
+
bada
|
33 |
+
badarik
|
34 |
+
badarik
|
35 |
+
badarik
|
36 |
+
badere
|
37 |
+
bai
|
38 |
+
baina
|
39 |
+
baina
|
40 |
+
baina
|
41 |
+
baino
|
42 |
+
baino
|
43 |
+
baino
|
44 |
+
baino
|
45 |
+
baita
|
46 |
+
baizik
|
47 |
+
baldin
|
48 |
+
baldin
|
49 |
+
barren
|
50 |
+
bat
|
51 |
+
batean
|
52 |
+
batean
|
53 |
+
batean
|
54 |
+
batean
|
55 |
+
batek
|
56 |
+
baten
|
57 |
+
batera
|
58 |
+
batez
|
59 |
+
bati
|
60 |
+
batzuei
|
61 |
+
batzuek
|
62 |
+
batzuetan
|
63 |
+
batzuk
|
64 |
+
bazen
|
65 |
+
bederen
|
66 |
+
bederik
|
67 |
+
beharrez
|
68 |
+
behiala
|
69 |
+
behin
|
70 |
+
behin
|
71 |
+
behin
|
72 |
+
behin
|
73 |
+
behinik
|
74 |
+
behinola
|
75 |
+
behintzat
|
76 |
+
bera
|
77 |
+
beraiek
|
78 |
+
beranduago
|
79 |
+
berau
|
80 |
+
berauek
|
81 |
+
beraz
|
82 |
+
beraz
|
83 |
+
bere
|
84 |
+
berean
|
85 |
+
berebat
|
86 |
+
berehala
|
87 |
+
berori
|
88 |
+
beroriek
|
89 |
+
berriro
|
90 |
+
berriz
|
91 |
+
bertzalde
|
92 |
+
bertzenaz
|
93 |
+
bestalde
|
94 |
+
beste
|
95 |
+
bestela
|
96 |
+
besterik
|
97 |
+
bezain
|
98 |
+
bezala
|
99 |
+
bide
|
100 |
+
bien
|
101 |
+
bigarrenez
|
102 |
+
bigarrenik
|
103 |
+
bitartean
|
104 |
+
bitartean
|
105 |
+
bizkitartean
|
106 |
+
bukaeran
|
107 |
+
bukatzeko
|
108 |
+
da
|
109 |
+
dago
|
110 |
+
dago
|
111 |
+
dela
|
112 |
+
dela
|
113 |
+
dela
|
114 |
+
delarik
|
115 |
+
den
|
116 |
+
dena
|
117 |
+
dena
|
118 |
+
dezadan
|
119 |
+
dira
|
120 |
+
ditu
|
121 |
+
du
|
122 |
+
dute
|
123 |
+
edo
|
124 |
+
edo
|
125 |
+
edota
|
126 |
+
egin
|
127 |
+
egin
|
128 |
+
egun
|
129 |
+
egun
|
130 |
+
egunean
|
131 |
+
emateko
|
132 |
+
era
|
133 |
+
erdi
|
134 |
+
ere
|
135 |
+
ere
|
136 |
+
ere
|
137 |
+
ere
|
138 |
+
ere
|
139 |
+
esan
|
140 |
+
esan
|
141 |
+
esanak
|
142 |
+
esandakoaren
|
143 |
+
eta
|
144 |
+
eta
|
145 |
+
eta
|
146 |
+
eta
|
147 |
+
eta
|
148 |
+
eta
|
149 |
+
eurak
|
150 |
+
ez
|
151 |
+
ez
|
152 |
+
ez
|
153 |
+
eze
|
154 |
+
ezen
|
155 |
+
ezer
|
156 |
+
ezezik
|
157 |
+
ezik
|
158 |
+
ezpabere
|
159 |
+
ezpada
|
160 |
+
ezpere
|
161 |
+
ezperen
|
162 |
+
ezta
|
163 |
+
funtsean
|
164 |
+
gabe
|
165 |
+
gain
|
166 |
+
gainera
|
167 |
+
gainera
|
168 |
+
gainerontzean
|
169 |
+
gaur
|
170 |
+
gero
|
171 |
+
gero
|
172 |
+
gero
|
173 |
+
geroago
|
174 |
+
gisa
|
175 |
+
gu
|
176 |
+
gutxi
|
177 |
+
guzti
|
178 |
+
guztia
|
179 |
+
guztiz
|
180 |
+
haatik
|
181 |
+
haiei
|
182 |
+
haiek
|
183 |
+
haietan
|
184 |
+
hain
|
185 |
+
hainbeste
|
186 |
+
hainbestez
|
187 |
+
hala
|
188 |
+
hala
|
189 |
+
hala
|
190 |
+
halaber
|
191 |
+
halako
|
192 |
+
halatan
|
193 |
+
han
|
194 |
+
handik
|
195 |
+
hango
|
196 |
+
hara
|
197 |
+
hargatik
|
198 |
+
hari
|
199 |
+
hark
|
200 |
+
hartan
|
201 |
+
hartan
|
202 |
+
hasi
|
203 |
+
hasi
|
204 |
+
hasiera
|
205 |
+
hasieran
|
206 |
+
hasteaz
|
207 |
+
hasteko
|
208 |
+
hasteko
|
209 |
+
hau
|
210 |
+
hau
|
211 |
+
hau
|
212 |
+
hau
|
213 |
+
hau
|
214 |
+
hau
|
215 |
+
hauei
|
216 |
+
hauek
|
217 |
+
hauetan
|
218 |
+
hemen
|
219 |
+
hemendik
|
220 |
+
hemengo
|
221 |
+
hi
|
222 |
+
hona
|
223 |
+
honebestez
|
224 |
+
honek
|
225 |
+
honela
|
226 |
+
honela
|
227 |
+
honela
|
228 |
+
honen
|
229 |
+
honen
|
230 |
+
honetan
|
231 |
+
honetaz
|
232 |
+
honi
|
233 |
+
hor
|
234 |
+
hori
|
235 |
+
hori
|
236 |
+
hori
|
237 |
+
horiei
|
238 |
+
horiek
|
239 |
+
horietan
|
240 |
+
horko
|
241 |
+
horra
|
242 |
+
horratik
|
243 |
+
horregatik
|
244 |
+
horregatik
|
245 |
+
horrek
|
246 |
+
horrela
|
247 |
+
horrela
|
248 |
+
horrela
|
249 |
+
horren
|
250 |
+
horrenbestez
|
251 |
+
horretan
|
252 |
+
horri
|
253 |
+
hortaz
|
254 |
+
hortaz
|
255 |
+
hortik
|
256 |
+
hura
|
257 |
+
ikusi
|
258 |
+
ikusi
|
259 |
+
izan
|
260 |
+
izan
|
261 |
+
izan
|
262 |
+
jarraituz
|
263 |
+
kariaz
|
264 |
+
kasuaz
|
265 |
+
kontuan
|
266 |
+
laburbilduz
|
267 |
+
laburki
|
268 |
+
laster
|
269 |
+
laster
|
270 |
+
lehen
|
271 |
+
lehen
|
272 |
+
lehen
|
273 |
+
lehen
|
274 |
+
lehenengo
|
275 |
+
lehenengo
|
276 |
+
lehenik
|
277 |
+
lehen-lehenik
|
278 |
+
litzateke
|
279 |
+
medio
|
280 |
+
mendean
|
281 |
+
mundura
|
282 |
+
nahiz
|
283 |
+
ni
|
284 |
+
noiz
|
285 |
+
nola
|
286 |
+
non
|
287 |
+
nondik
|
288 |
+
nongo
|
289 |
+
nor
|
290 |
+
nora
|
291 |
+
on
|
292 |
+
ondoren
|
293 |
+
ondorio
|
294 |
+
ondorioz
|
295 |
+
ondorioz
|
296 |
+
orain
|
297 |
+
ordea
|
298 |
+
orduan
|
299 |
+
orduan
|
300 |
+
orduan
|
301 |
+
orduko
|
302 |
+
ordura
|
303 |
+
orobat
|
304 |
+
ostean
|
305 |
+
ostera
|
306 |
+
osterantzean
|
307 |
+
pentsatuz
|
308 |
+
ustez
|
309 |
+
ze
|
310 |
+
zein
|
311 |
+
zein
|
312 |
+
zen
|
313 |
+
zen
|
314 |
+
zenbait
|
315 |
+
zenbat
|
316 |
+
zer
|
317 |
+
zeren
|
318 |
+
zergatik
|
319 |
+
zergatik
|
320 |
+
ziren
|
321 |
+
zituen
|
322 |
+
zu
|
323 |
+
zuek
|
324 |
+
zuen
|
325 |
+
zuten
|
326 |
+
zuzen
|
data/nltk_data/corpora/stopwords/belarusian
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
на
|
2 |
+
не
|
3 |
+
што
|
4 |
+
па
|
5 |
+
да
|
6 |
+
за
|
7 |
+
як
|
8 |
+
для
|
9 |
+
гэта
|
10 |
+
ад
|
11 |
+
яго
|
12 |
+
аб
|
13 |
+
ён
|
14 |
+
калi
|
15 |
+
якiя
|
16 |
+
мы
|
17 |
+
больш
|
18 |
+
таксама
|
19 |
+
iх
|
20 |
+
толькi
|
21 |
+
пра
|
22 |
+
каб
|
23 |
+
гэтым
|
24 |
+
так
|
25 |
+
але
|
26 |
+
яшчэ
|
27 |
+
тым
|
28 |
+
якi
|
29 |
+
яе
|
30 |
+
пры
|
31 |
+
цi
|
32 |
+
яны
|
33 |
+
цяпер
|
34 |
+
таму
|
35 |
+
пасля
|
36 |
+
каля
|
37 |
+
гэтага
|
38 |
+
годзе
|
39 |
+
тыс
|
40 |
+
таго
|
41 |
+
тут
|
42 |
+
тысяч
|
43 |
+
ўжо
|
44 |
+
дзе
|
45 |
+
якая
|
46 |
+
са
|
47 |
+
яна
|
48 |
+
гэты
|
49 |
+
пад
|
50 |
+
можна
|
51 |
+
паводле
|
52 |
+
вельмi
|
53 |
+
ва
|
54 |
+
то
|
55 |
+
сёння
|
56 |
+
можа
|
57 |
+
ўсё
|
58 |
+
нас
|
59 |
+
вось
|
60 |
+
нават
|
61 |
+
або
|
62 |
+
сёлета
|
63 |
+
іх
|
64 |
+
той
|
65 |
+
ужо
|
66 |
+
чым
|
67 |
+
тое
|
68 |
+
хто
|
69 |
+
жа
|
70 |
+
без
|
71 |
+
праз
|
72 |
+
мяне
|
73 |
+
аднак
|
74 |
+
бо
|
75 |
+
мне
|
76 |
+
там
|
77 |
+
адзiн
|
78 |
+
два
|
79 |
+
сярод
|
80 |
+
гэтай
|
81 |
+
сябе
|
82 |
+
калі
|
83 |
+
толькі
|
84 |
+
дарэчы
|
85 |
+
млн
|
86 |
+
падчас
|
87 |
+
вы
|
88 |
+
усё
|
89 |
+
нашай
|
90 |
+
якія
|
91 |
+
iм
|
92 |
+
разам
|
93 |
+
акрамя
|
94 |
+
ды
|
95 |
+
якiх
|
96 |
+
мае
|
97 |
+
стала
|
98 |
+
раней
|
99 |
+
шмат
|
100 |
+
амаль
|
101 |
+
усе
|
102 |
+
першы
|
103 |
+
пакуль
|
104 |
+
напрыклад
|
105 |
+
тысячы
|
106 |
+
ўсе
|
107 |
+
ты
|
108 |
+
якой
|
109 |
+
раз
|
110 |
+
свае
|
111 |
+
iншых
|
112 |
+
гэтыя
|
113 |
+
тры
|
114 |
+
яму
|
115 |
+
які
|
116 |
+
сваю
|
117 |
+
памiж
|
118 |
+
ўсiх
|
119 |
+
ёй
|
120 |
+
сваёй
|
121 |
+
хоць
|
122 |
+
некалькi
|
123 |
+
аднаго
|
124 |
+
менавiта
|
125 |
+
проста
|
126 |
+
потым
|
127 |
+
нi
|
128 |
+
заўсёды
|
129 |
+
менш
|
130 |
+
тады
|
131 |
+
нам
|
132 |
+
свой
|
133 |
+
якiм
|
134 |
+
свята
|
135 |
+
такiм
|
136 |
+
перад
|
137 |
+
вядома
|
138 |
+
бы
|
139 |
+
якое
|
140 |
+
мая
|
141 |
+
тых
|
142 |
+
гэтых
|
143 |
+
такiя
|
144 |
+
зараз
|
145 |
+
адной
|
146 |
+
адна
|
147 |
+
адным
|
148 |
+
якога
|
149 |
+
добра
|
150 |
+
над
|
151 |
+
летась
|
152 |
+
справа
|
153 |
+
кожны
|
154 |
+
свайго
|
155 |
+
сваiх
|
156 |
+
неабходна
|
157 |
+
такiх
|
158 |
+
зноў
|
159 |
+
мiльёнаў
|
160 |
+
прычым
|
161 |
+
iншыя
|
162 |
+
днём
|
163 |
+
млрд
|
164 |
+
сам
|
165 |
+
ці
|
166 |
+
нашы
|
167 |
+
сабе
|
168 |
+
адразу
|
169 |
+
усяго
|
170 |
+
двух
|
171 |
+
тыя
|
172 |
+
нашых
|
173 |
+
якую
|
174 |
+
чаго
|
175 |
+
асаблiва
|
176 |
+
сваiм
|
177 |
+
першым
|
178 |
+
згодна
|
179 |
+
такое
|
180 |
+
найбольш
|
181 |
+
такi
|
182 |
+
дзве
|
183 |
+
ім
|
184 |
+
вельмі
|
185 |
+
наша
|
186 |
+
дзвюх
|
187 |
+
ну
|
188 |
+
сваё
|
189 |
+
праўда
|
190 |
+
вас
|
191 |
+
трох
|
192 |
+
зусiм
|
193 |
+
пяць
|
194 |
+
некаторыя
|
195 |
+
дык
|
196 |
+
крыху
|
197 |
+
чаму
|
198 |
+
такой
|
199 |
+
магчыма
|
200 |
+
сапраўды
|
201 |
+
такая
|
202 |
+
вам
|
203 |
+
нешта
|
204 |
+
усiх
|
205 |
+
адно
|
206 |
+
далей
|
207 |
+
значыць
|
208 |
+
чатыры
|
209 |
+
самых
|
210 |
+
хутка
|
211 |
+
самы
|
212 |
+
дзякуючы
|
213 |
+
наш
|
214 |
+
часта
|
215 |
+
самым
|
216 |
+
першай
|
217 |
+
адзін
|
218 |
+
супраць
|
219 |
+
яно
|
220 |
+
другi
|
221 |
+
удзень
|
222 |
+
нiчога
|
223 |
+
мой
|
224 |
+
побач
|
data/nltk_data/corpora/stopwords/bengali
ADDED
@@ -0,0 +1,398 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
অতএব
|
2 |
+
অথচ
|
3 |
+
অথবা
|
4 |
+
অনুযায়ী
|
5 |
+
অনেক
|
6 |
+
অনেকে
|
7 |
+
অনেকেই
|
8 |
+
অন্তত
|
9 |
+
অন্য
|
10 |
+
অবধি
|
11 |
+
অবশ্য
|
12 |
+
অর্থাত
|
13 |
+
আই
|
14 |
+
আগামী
|
15 |
+
আগে
|
16 |
+
আগেই
|
17 |
+
আছে
|
18 |
+
আজ
|
19 |
+
আদ্যভাগে
|
20 |
+
আপনার
|
21 |
+
আপনি
|
22 |
+
আবার
|
23 |
+
আমরা
|
24 |
+
আমাকে
|
25 |
+
আমাদের
|
26 |
+
আমার
|
27 |
+
আমি
|
28 |
+
আর
|
29 |
+
আরও
|
30 |
+
ই
|
31 |
+
ইত্যাদি
|
32 |
+
ইহা
|
33 |
+
উচিত
|
34 |
+
উত্তর
|
35 |
+
উনি
|
36 |
+
উপর
|
37 |
+
উপরে
|
38 |
+
এ
|
39 |
+
এঁদের
|
40 |
+
এঁরা
|
41 |
+
এই
|
42 |
+
একই
|
43 |
+
একটি
|
44 |
+
একবার
|
45 |
+
একে
|
46 |
+
এক্
|
47 |
+
এখন
|
48 |
+
এখনও
|
49 |
+
এখানে
|
50 |
+
এখানেই
|
51 |
+
এটা
|
52 |
+
এটাই
|
53 |
+
এটি
|
54 |
+
এত
|
55 |
+
এতটাই
|
56 |
+
এতে
|
57 |
+
এদের
|
58 |
+
এব
|
59 |
+
এবং
|
60 |
+
এবার
|
61 |
+
এমন
|
62 |
+
এমনকী
|
63 |
+
এমনি
|
64 |
+
এর
|
65 |
+
এরা
|
66 |
+
এল
|
67 |
+
এস
|
68 |
+
এসে
|
69 |
+
ঐ
|
70 |
+
ও
|
71 |
+
ওঁদের
|
72 |
+
ওঁর
|
73 |
+
ওঁরা
|
74 |
+
ওই
|
75 |
+
ওকে
|
76 |
+
ওখানে
|
77 |
+
ওদের
|
78 |
+
ওর
|
79 |
+
ওরা
|
80 |
+
কখনও
|
81 |
+
কত
|
82 |
+
কবে
|
83 |
+
কমনে
|
84 |
+
কয়েক
|
85 |
+
কয়েকটি
|
86 |
+
করছে
|
87 |
+
করছেন
|
88 |
+
করতে
|
89 |
+
করবে
|
90 |
+
করবেন
|
91 |
+
করলে
|
92 |
+
করলেন
|
93 |
+
করা
|
94 |
+
করাই
|
95 |
+
করায়
|
96 |
+
করার
|
97 |
+
করি
|
98 |
+
করিতে
|
99 |
+
করিয়া
|
100 |
+
করিয়ে
|
101 |
+
করে
|
102 |
+
করেই
|
103 |
+
করেছিলেন
|
104 |
+
করেছে
|
105 |
+
করেছেন
|
106 |
+
করেন
|
107 |
+
কাউকে
|
108 |
+
কাছ
|
109 |
+
কাছে
|
110 |
+
কাজ
|
111 |
+
কাজে
|
112 |
+
কারও
|
113 |
+
কারণ
|
114 |
+
কি
|
115 |
+
কিংবা
|
116 |
+
কিছু
|
117 |
+
কিছুই
|
118 |
+
কিন্তু
|
119 |
+
কী
|
120 |
+
কে
|
121 |
+
কেউ
|
122 |
+
কেউই
|
123 |
+
কেখা
|
124 |
+
কেন
|
125 |
+
কোটি
|
126 |
+
কোন
|
127 |
+
কোনও
|
128 |
+
কোনো
|
129 |
+
ক্ষেত্রে
|
130 |
+
কয়েক
|
131 |
+
খুব
|
132 |
+
গিয়ে
|
133 |
+
গিয়েছে
|
134 |
+
গিয়ে
|
135 |
+
গুলি
|
136 |
+
গেছে
|
137 |
+
গেল
|
138 |
+
গেলে
|
139 |
+
গোটা
|
140 |
+
চলে
|
141 |
+
চান
|
142 |
+
চায়
|
143 |
+
চার
|
144 |
+
চালু
|
145 |
+
চেয়ে
|
146 |
+
চেষ্টা
|
147 |
+
ছাড়া
|
148 |
+
ছাড়াও
|
149 |
+
ছিল
|
150 |
+
ছিলেন
|
151 |
+
জন
|
152 |
+
জনকে
|
153 |
+
জনের
|
154 |
+
জন্য
|
155 |
+
জন্যওজে
|
156 |
+
জানতে
|
157 |
+
জানা
|
158 |
+
জানানো
|
159 |
+
জানায়
|
160 |
+
জানিয়ে
|
161 |
+
জানিয়েছে
|
162 |
+
জে
|
163 |
+
জ্নজন
|
164 |
+
টি
|
165 |
+
ঠিক
|
166 |
+
তখন
|
167 |
+
তত
|
168 |
+
তথা
|
169 |
+
তবু
|
170 |
+
তবে
|
171 |
+
তা
|
172 |
+
তাঁকে
|
173 |
+
তাঁদের
|
174 |
+
তাঁর
|
175 |
+
তাঁরা
|
176 |
+
তাঁাহারা
|
177 |
+
তাই
|
178 |
+
তাও
|
179 |
+
তাকে
|
180 |
+
তাতে
|
181 |
+
তাদের
|
182 |
+
তার
|
183 |
+
তারপর
|
184 |
+
তারা
|
185 |
+
তারৈ
|
186 |
+
তাহলে
|
187 |
+
তাহা
|
188 |
+
তাহাতে
|
189 |
+
তাহার
|
190 |
+
তিনঐ
|
191 |
+
তিনি
|
192 |
+
তিনিও
|
193 |
+
তুমি
|
194 |
+
তুলে
|
195 |
+
তেমন
|
196 |
+
তো
|
197 |
+
তোমার
|
198 |
+
থাকবে
|
199 |
+
থাকবেন
|
200 |
+
থাকা
|
201 |
+
থাকায়
|
202 |
+
থাকে
|
203 |
+
থাকেন
|
204 |
+
থেকে
|
205 |
+
থেকেই
|
206 |
+
থেকেও
|
207 |
+
দিকে
|
208 |
+
দিতে
|
209 |
+
দিন
|
210 |
+
দিয়ে
|
211 |
+
দিয়েছে
|
212 |
+
দিয়েছেন
|
213 |
+
দিলেন
|
214 |
+
দু
|
215 |
+
দুই
|
216 |
+
দুটি
|
217 |
+
দুটো
|
218 |
+
দেওয়া
|
219 |
+
দেওয়ার
|
220 |
+
দেওয়া
|
221 |
+
দেখতে
|
222 |
+
দেখা
|
223 |
+
দেখে
|
224 |
+
দেন
|
225 |
+
দেয়
|
226 |
+
দ্বারা
|
227 |
+
ধরা
|
228 |
+
ধরে
|
229 |
+
ধামার
|
230 |
+
নতুন
|
231 |
+
নয়
|
232 |
+
না
|
233 |
+
নাই
|
234 |
+
নাকি
|
235 |
+
নাগাদ
|
236 |
+
নানা
|
237 |
+
নিজে
|
238 |
+
নিজেই
|
239 |
+
নিজেদের
|
240 |
+
নিজের
|
241 |
+
নিতে
|
242 |
+
নিয়ে
|
243 |
+
নিয়ে
|
244 |
+
নেই
|
245 |
+
নেওয়া
|
246 |
+
নেওয়ার
|
247 |
+
নেওয়া
|
248 |
+
নয়
|
249 |
+
পক্ষে
|
250 |
+
পর
|
251 |
+
পরে
|
252 |
+
পরেই
|
253 |
+
পরেও
|
254 |
+
পর্যন্ত
|
255 |
+
পাওয়া
|
256 |
+
পাচ
|
257 |
+
পারি
|
258 |
+
পারে
|
259 |
+
পারেন
|
260 |
+
পি
|
261 |
+
পেয়ে
|
262 |
+
পেয়্র্
|
263 |
+
প্রতি
|
264 |
+
প্রথম
|
265 |
+
প্রভৃতি
|
266 |
+
প্রযন্ত
|
267 |
+
প্রাথমিক
|
268 |
+
প্রায়
|
269 |
+
প্রায়
|
270 |
+
ফলে
|
271 |
+
ফিরে
|
272 |
+
ফের
|
273 |
+
বক্তব্য
|
274 |
+
বদলে
|
275 |
+
বন
|
276 |
+
বরং
|
277 |
+
বলতে
|
278 |
+
বলল
|
279 |
+
বললেন
|
280 |
+
বলা
|
281 |
+
বলে
|
282 |
+
বলেছেন
|
283 |
+
বলেন
|
284 |
+
বসে
|
285 |
+
বহু
|
286 |
+
বা
|
287 |
+
বাদে
|
288 |
+
বার
|
289 |
+
বি
|
290 |
+
বিনা
|
291 |
+
বিভিন্ন
|
292 |
+
বিশেষ
|
293 |
+
বিষয়টি
|
294 |
+
বেশ
|
295 |
+
বেশি
|
296 |
+
ব্যবহার
|
297 |
+
ব্যাপারে
|
298 |
+
ভাবে
|
299 |
+
ভাবেই
|
300 |
+
মতো
|
301 |
+
মতোই
|
302 |
+
মধ্যভাগে
|
303 |
+
মধ্যে
|
304 |
+
মধ্যেই
|
305 |
+
মধ্যেও
|
306 |
+
মনে
|
307 |
+
মাত্র
|
308 |
+
মাধ্যমে
|
309 |
+
মোট
|
310 |
+
মোটেই
|
311 |
+
যখন
|
312 |
+
যত
|
313 |
+
যতটা
|
314 |
+
যথেষ্ট
|
315 |
+
যদি
|
316 |
+
যদিও
|
317 |
+
যা
|
318 |
+
যাঁর
|
319 |
+
যাঁরা
|
320 |
+
যাওয়া
|
321 |
+
যাওয়ার
|
322 |
+
যাওয়া
|
323 |
+
যাকে
|
324 |
+
যাচ্ছে
|
325 |
+
যাতে
|
326 |
+
যাদের
|
327 |
+
যান
|
328 |
+
যাবে
|
329 |
+
যায়
|
330 |
+
যার
|
331 |
+
যারা
|
332 |
+
যিনি
|
333 |
+
যে
|
334 |
+
যেখানে
|
335 |
+
যেতে
|
336 |
+
যেন
|
337 |
+
যেমন
|
338 |
+
র
|
339 |
+
রকম
|
340 |
+
রয়েছে
|
341 |
+
রাখা
|
342 |
+
রেখে
|
343 |
+
লক্ষ
|
344 |
+
শুধু
|
345 |
+
শুরু
|
346 |
+
সঙ্গে
|
347 |
+
সঙ্গেও
|
348 |
+
সব
|
349 |
+
সবার
|
350 |
+
স��স্ত
|
351 |
+
সম্প্রতি
|
352 |
+
সহ
|
353 |
+
সহিত
|
354 |
+
সাধারণ
|
355 |
+
সামনে
|
356 |
+
সি
|
357 |
+
সুতরাং
|
358 |
+
সে
|
359 |
+
সেই
|
360 |
+
সেখান
|
361 |
+
সেখানে
|
362 |
+
সেটা
|
363 |
+
সেটাই
|
364 |
+
সেটাও
|
365 |
+
সেটি
|
366 |
+
স্পষ্ট
|
367 |
+
স্বয়ং
|
368 |
+
হইতে
|
369 |
+
হইবে
|
370 |
+
হইয়া
|
371 |
+
হওয়া
|
372 |
+
হওয়ায়
|
373 |
+
হওয়ার
|
374 |
+
হচ্ছে
|
375 |
+
হত
|
376 |
+
হতে
|
377 |
+
হতেই
|
378 |
+
হন
|
379 |
+
হবে
|
380 |
+
হবেন
|
381 |
+
হয়
|
382 |
+
হয়তো
|
383 |
+
হয়নি
|
384 |
+
হয়ে
|
385 |
+
হয়েই
|
386 |
+
হয়েছিল
|
387 |
+
হয়েছে
|
388 |
+
হয়েছেন
|
389 |
+
হল
|
390 |
+
হলে
|
391 |
+
হলেই
|
392 |
+
হলেও
|
393 |
+
হলো
|
394 |
+
হাজার
|
395 |
+
হিসাবে
|
396 |
+
হৈলে
|
397 |
+
হোক
|
398 |
+
হয়
|
data/nltk_data/corpora/stopwords/catalan
ADDED
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
abans
|
3 |
+
ací
|
4 |
+
ah
|
5 |
+
així
|
6 |
+
això
|
7 |
+
al
|
8 |
+
aleshores
|
9 |
+
algun
|
10 |
+
alguna
|
11 |
+
algunes
|
12 |
+
alguns
|
13 |
+
alhora
|
14 |
+
allà
|
15 |
+
allí
|
16 |
+
allò
|
17 |
+
als
|
18 |
+
altra
|
19 |
+
altre
|
20 |
+
altres
|
21 |
+
amb
|
22 |
+
ambdues
|
23 |
+
ambdós
|
24 |
+
anar
|
25 |
+
ans
|
26 |
+
apa
|
27 |
+
aquell
|
28 |
+
aquella
|
29 |
+
aquelles
|
30 |
+
aquells
|
31 |
+
aquest
|
32 |
+
aquesta
|
33 |
+
aquestes
|
34 |
+
aquests
|
35 |
+
aquí
|
36 |
+
baix
|
37 |
+
bastant
|
38 |
+
bé
|
39 |
+
cada
|
40 |
+
cadascuna
|
41 |
+
cadascunes
|
42 |
+
cadascuns
|
43 |
+
cadascú
|
44 |
+
com
|
45 |
+
consegueixo
|
46 |
+
conseguim
|
47 |
+
conseguir
|
48 |
+
consigueix
|
49 |
+
consigueixen
|
50 |
+
consigueixes
|
51 |
+
contra
|
52 |
+
d'un
|
53 |
+
d'una
|
54 |
+
d'unes
|
55 |
+
d'uns
|
56 |
+
dalt
|
57 |
+
de
|
58 |
+
del
|
59 |
+
dels
|
60 |
+
des
|
61 |
+
des de
|
62 |
+
després
|
63 |
+
dins
|
64 |
+
dintre
|
65 |
+
donat
|
66 |
+
doncs
|
67 |
+
durant
|
68 |
+
e
|
69 |
+
eh
|
70 |
+
el
|
71 |
+
elles
|
72 |
+
ells
|
73 |
+
els
|
74 |
+
em
|
75 |
+
en
|
76 |
+
encara
|
77 |
+
ens
|
78 |
+
entre
|
79 |
+
era
|
80 |
+
erem
|
81 |
+
eren
|
82 |
+
eres
|
83 |
+
es
|
84 |
+
esta
|
85 |
+
estan
|
86 |
+
estat
|
87 |
+
estava
|
88 |
+
estaven
|
89 |
+
estem
|
90 |
+
esteu
|
91 |
+
estic
|
92 |
+
està
|
93 |
+
estàvem
|
94 |
+
estàveu
|
95 |
+
et
|
96 |
+
etc
|
97 |
+
ets
|
98 |
+
fa
|
99 |
+
faig
|
100 |
+
fan
|
101 |
+
fas
|
102 |
+
fem
|
103 |
+
fer
|
104 |
+
feu
|
105 |
+
fi
|
106 |
+
fins
|
107 |
+
fora
|
108 |
+
gairebé
|
109 |
+
ha
|
110 |
+
han
|
111 |
+
has
|
112 |
+
haver
|
113 |
+
havia
|
114 |
+
he
|
115 |
+
hem
|
116 |
+
heu
|
117 |
+
hi
|
118 |
+
ho
|
119 |
+
i
|
120 |
+
igual
|
121 |
+
iguals
|
122 |
+
inclòs
|
123 |
+
ja
|
124 |
+
jo
|
125 |
+
l'hi
|
126 |
+
la
|
127 |
+
les
|
128 |
+
li
|
129 |
+
li'n
|
130 |
+
llarg
|
131 |
+
llavors
|
132 |
+
m'he
|
133 |
+
ma
|
134 |
+
mal
|
135 |
+
malgrat
|
136 |
+
mateix
|
137 |
+
mateixa
|
138 |
+
mateixes
|
139 |
+
mateixos
|
140 |
+
me
|
141 |
+
mentre
|
142 |
+
meu
|
143 |
+
meus
|
144 |
+
meva
|
145 |
+
meves
|
146 |
+
mode
|
147 |
+
molt
|
148 |
+
molta
|
149 |
+
moltes
|
150 |
+
molts
|
151 |
+
mon
|
152 |
+
mons
|
153 |
+
més
|
154 |
+
n'he
|
155 |
+
n'hi
|
156 |
+
ne
|
157 |
+
ni
|
158 |
+
no
|
159 |
+
nogensmenys
|
160 |
+
només
|
161 |
+
nosaltres
|
162 |
+
nostra
|
163 |
+
nostre
|
164 |
+
nostres
|
165 |
+
o
|
166 |
+
oh
|
167 |
+
oi
|
168 |
+
on
|
169 |
+
pas
|
170 |
+
pel
|
171 |
+
pels
|
172 |
+
per
|
173 |
+
per que
|
174 |
+
perquè
|
175 |
+
però
|
176 |
+
poc
|
177 |
+
poca
|
178 |
+
pocs
|
179 |
+
podem
|
180 |
+
poden
|
181 |
+
poder
|
182 |
+
podeu
|
183 |
+
poques
|
184 |
+
potser
|
185 |
+
primer
|
186 |
+
propi
|
187 |
+
puc
|
188 |
+
qual
|
189 |
+
quals
|
190 |
+
quan
|
191 |
+
quant
|
192 |
+
que
|
193 |
+
quelcom
|
194 |
+
qui
|
195 |
+
quin
|
196 |
+
quina
|
197 |
+
quines
|
198 |
+
quins
|
199 |
+
què
|
200 |
+
s'ha
|
201 |
+
s'han
|
202 |
+
sa
|
203 |
+
sabem
|
204 |
+
saben
|
205 |
+
saber
|
206 |
+
sabeu
|
207 |
+
sap
|
208 |
+
saps
|
209 |
+
semblant
|
210 |
+
semblants
|
211 |
+
sense
|
212 |
+
ser
|
213 |
+
ses
|
214 |
+
seu
|
215 |
+
seus
|
216 |
+
seva
|
217 |
+
seves
|
218 |
+
si
|
219 |
+
sobre
|
220 |
+
sobretot
|
221 |
+
soc
|
222 |
+
solament
|
223 |
+
sols
|
224 |
+
som
|
225 |
+
son
|
226 |
+
sons
|
227 |
+
sota
|
228 |
+
sou
|
229 |
+
sóc
|
230 |
+
són
|
231 |
+
t'ha
|
232 |
+
t'han
|
233 |
+
t'he
|
234 |
+
ta
|
235 |
+
tal
|
236 |
+
també
|
237 |
+
tampoc
|
238 |
+
tan
|
239 |
+
tant
|
240 |
+
tanta
|
241 |
+
tantes
|
242 |
+
te
|
243 |
+
tene
|
244 |
+
tenim
|
245 |
+
tenir
|
246 |
+
teniu
|
247 |
+
teu
|
248 |
+
teus
|
249 |
+
teva
|
250 |
+
teves
|
251 |
+
tinc
|
252 |
+
ton
|
253 |
+
tons
|
254 |
+
tot
|
255 |
+
tota
|
256 |
+
totes
|
257 |
+
tots
|
258 |
+
un
|
259 |
+
una
|
260 |
+
unes
|
261 |
+
uns
|
262 |
+
us
|
263 |
+
va
|
264 |
+
vaig
|
265 |
+
vam
|
266 |
+
van
|
267 |
+
vas
|
268 |
+
veu
|
269 |
+
vosaltres
|
270 |
+
vostra
|
271 |
+
vostre
|
272 |
+
vostres
|
273 |
+
érem
|
274 |
+
éreu
|
275 |
+
és
|
276 |
+
éssent
|
277 |
+
últim
|
278 |
+
ús
|
data/nltk_data/corpora/stopwords/chinese
ADDED
@@ -0,0 +1,841 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
一
|
2 |
+
一下
|
3 |
+
一些
|
4 |
+
一切
|
5 |
+
一则
|
6 |
+
一天
|
7 |
+
一定
|
8 |
+
一方面
|
9 |
+
一旦
|
10 |
+
一时
|
11 |
+
一来
|
12 |
+
一样
|
13 |
+
一次
|
14 |
+
一片
|
15 |
+
一直
|
16 |
+
一致
|
17 |
+
一般
|
18 |
+
一起
|
19 |
+
一边
|
20 |
+
一面
|
21 |
+
万一
|
22 |
+
上下
|
23 |
+
上升
|
24 |
+
上去
|
25 |
+
上来
|
26 |
+
上述
|
27 |
+
上面
|
28 |
+
下列
|
29 |
+
下去
|
30 |
+
下来
|
31 |
+
下面
|
32 |
+
不一
|
33 |
+
不久
|
34 |
+
不仅
|
35 |
+
不会
|
36 |
+
不但
|
37 |
+
不光
|
38 |
+
不单
|
39 |
+
不变
|
40 |
+
不只
|
41 |
+
不可
|
42 |
+
不同
|
43 |
+
不够
|
44 |
+
不如
|
45 |
+
不得
|
46 |
+
不怕
|
47 |
+
不惟
|
48 |
+
不成
|
49 |
+
不拘
|
50 |
+
不敢
|
51 |
+
不断
|
52 |
+
不是
|
53 |
+
不比
|
54 |
+
不然
|
55 |
+
不特
|
56 |
+
不独
|
57 |
+
不管
|
58 |
+
不能
|
59 |
+
不要
|
60 |
+
不论
|
61 |
+
不足
|
62 |
+
不过
|
63 |
+
不问
|
64 |
+
与
|
65 |
+
与其
|
66 |
+
与否
|
67 |
+
与此同时
|
68 |
+
专门
|
69 |
+
且
|
70 |
+
两者
|
71 |
+
严格
|
72 |
+
严重
|
73 |
+
个
|
74 |
+
个人
|
75 |
+
个别
|
76 |
+
中小
|
77 |
+
中间
|
78 |
+
丰富
|
79 |
+
临
|
80 |
+
为
|
81 |
+
为主
|
82 |
+
为了
|
83 |
+
为什么
|
84 |
+
为什麽
|
85 |
+
为何
|
86 |
+
为着
|
87 |
+
主张
|
88 |
+
主要
|
89 |
+
举行
|
90 |
+
乃
|
91 |
+
乃至
|
92 |
+
么
|
93 |
+
之
|
94 |
+
之一
|
95 |
+
之前
|
96 |
+
之后
|
97 |
+
之後
|
98 |
+
之所以
|
99 |
+
之类
|
100 |
+
乌乎
|
101 |
+
乎
|
102 |
+
乘
|
103 |
+
也
|
104 |
+
也好
|
105 |
+
也是
|
106 |
+
也罢
|
107 |
+
了
|
108 |
+
了解
|
109 |
+
争取
|
110 |
+
于
|
111 |
+
于是
|
112 |
+
于是乎
|
113 |
+
云云
|
114 |
+
互相
|
115 |
+
产生
|
116 |
+
人们
|
117 |
+
人家
|
118 |
+
什么
|
119 |
+
什么样
|
120 |
+
什麽
|
121 |
+
今后
|
122 |
+
今天
|
123 |
+
今年
|
124 |
+
今後
|
125 |
+
仍然
|
126 |
+
从
|
127 |
+
从事
|
128 |
+
从而
|
129 |
+
他
|
130 |
+
他人
|
131 |
+
他们
|
132 |
+
他的
|
133 |
+
代替
|
134 |
+
以
|
135 |
+
以上
|
136 |
+
以下
|
137 |
+
以为
|
138 |
+
以便
|
139 |
+
以免
|
140 |
+
以前
|
141 |
+
以及
|
142 |
+
以后
|
143 |
+
以外
|
144 |
+
以後
|
145 |
+
以来
|
146 |
+
以至
|
147 |
+
以至于
|
148 |
+
以致
|
149 |
+
们
|
150 |
+
任
|
151 |
+
任何
|
152 |
+
任凭
|
153 |
+
任务
|
154 |
+
企图
|
155 |
+
伟大
|
156 |
+
似乎
|
157 |
+
似的
|
158 |
+
但
|
159 |
+
但是
|
160 |
+
何
|
161 |
+
何况
|
162 |
+
何处
|
163 |
+
何时
|
164 |
+
作为
|
165 |
+
你
|
166 |
+
你们
|
167 |
+
你的
|
168 |
+
使得
|
169 |
+
使用
|
170 |
+
例如
|
171 |
+
依
|
172 |
+
依照
|
173 |
+
依靠
|
174 |
+
促进
|
175 |
+
保持
|
176 |
+
俺
|
177 |
+
俺们
|
178 |
+
倘
|
179 |
+
倘使
|
180 |
+
倘或
|
181 |
+
倘然
|
182 |
+
倘若
|
183 |
+
假使
|
184 |
+
假如
|
185 |
+
假若
|
186 |
+
做到
|
187 |
+
像
|
188 |
+
允许
|
189 |
+
充分
|
190 |
+
先后
|
191 |
+
先後
|
192 |
+
先生
|
193 |
+
全部
|
194 |
+
全面
|
195 |
+
兮
|
196 |
+
共同
|
197 |
+
关于
|
198 |
+
其
|
199 |
+
其一
|
200 |
+
其中
|
201 |
+
其二
|
202 |
+
其他
|
203 |
+
其余
|
204 |
+
其它
|
205 |
+
其实
|
206 |
+
其次
|
207 |
+
具体
|
208 |
+
具体地说
|
209 |
+
具体说来
|
210 |
+
具有
|
211 |
+
再者
|
212 |
+
再说
|
213 |
+
冒
|
214 |
+
冲
|
215 |
+
决定
|
216 |
+
况且
|
217 |
+
准备
|
218 |
+
几
|
219 |
+
几乎
|
220 |
+
几时
|
221 |
+
凭
|
222 |
+
凭借
|
223 |
+
出去
|
224 |
+
出来
|
225 |
+
出现
|
226 |
+
分别
|
227 |
+
则
|
228 |
+
别
|
229 |
+
别的
|
230 |
+
别说
|
231 |
+
到
|
232 |
+
前后
|
233 |
+
前者
|
234 |
+
前进
|
235 |
+
前面
|
236 |
+
加之
|
237 |
+
加以
|
238 |
+
加入
|
239 |
+
加强
|
240 |
+
十分
|
241 |
+
即
|
242 |
+
即令
|
243 |
+
即使
|
244 |
+
即便
|
245 |
+
即或
|
246 |
+
即若
|
247 |
+
却不
|
248 |
+
原来
|
249 |
+
又
|
250 |
+
及
|
251 |
+
及其
|
252 |
+
及时
|
253 |
+
及至
|
254 |
+
双方
|
255 |
+
反之
|
256 |
+
反应
|
257 |
+
反映
|
258 |
+
反过来
|
259 |
+
反过来说
|
260 |
+
取得
|
261 |
+
受到
|
262 |
+
变成
|
263 |
+
另
|
264 |
+
另一方面
|
265 |
+
另外
|
266 |
+
只是
|
267 |
+
只有
|
268 |
+
只要
|
269 |
+
只限
|
270 |
+
叫
|
271 |
+
叫做
|
272 |
+
召开
|
273 |
+
叮咚
|
274 |
+
可
|
275 |
+
可以
|
276 |
+
可是
|
277 |
+
可能
|
278 |
+
可见
|
279 |
+
各
|
280 |
+
各个
|
281 |
+
各人
|
282 |
+
各位
|
283 |
+
各地
|
284 |
+
各种
|
285 |
+
各级
|
286 |
+
各自
|
287 |
+
合理
|
288 |
+
同
|
289 |
+
同一
|
290 |
+
同时
|
291 |
+
同样
|
292 |
+
后来
|
293 |
+
后面
|
294 |
+
向
|
295 |
+
向着
|
296 |
+
吓
|
297 |
+
吗
|
298 |
+
否则
|
299 |
+
吧
|
300 |
+
吧哒
|
301 |
+
吱
|
302 |
+
呀
|
303 |
+
呃
|
304 |
+
呕
|
305 |
+
呗
|
306 |
+
呜
|
307 |
+
呜呼
|
308 |
+
呢
|
309 |
+
周围
|
310 |
+
呵
|
311 |
+
呸
|
312 |
+
呼哧
|
313 |
+
咋
|
314 |
+
和
|
315 |
+
咚
|
316 |
+
咦
|
317 |
+
咱
|
318 |
+
咱们
|
319 |
+
咳
|
320 |
+
哇
|
321 |
+
哈
|
322 |
+
哈哈
|
323 |
+
哉
|
324 |
+
哎
|
325 |
+
哎呀
|
326 |
+
哎哟
|
327 |
+
哗
|
328 |
+
哟
|
329 |
+
哦
|
330 |
+
哩
|
331 |
+
哪
|
332 |
+
哪个
|
333 |
+
哪些
|
334 |
+
哪儿
|
335 |
+
哪天
|
336 |
+
哪年
|
337 |
+
哪怕
|
338 |
+
哪样
|
339 |
+
哪边
|
340 |
+
哪里
|
341 |
+
哼
|
342 |
+
哼唷
|
343 |
+
唉
|
344 |
+
啊
|
345 |
+
啐
|
346 |
+
啥
|
347 |
+
啦
|
348 |
+
啪达
|
349 |
+
喂
|
350 |
+
喏
|
351 |
+
喔唷
|
352 |
+
嗡嗡
|
353 |
+
嗬
|
354 |
+
嗯
|
355 |
+
嗳
|
356 |
+
嘎
|
357 |
+
嘎登
|
358 |
+
嘘
|
359 |
+
嘛
|
360 |
+
嘻
|
361 |
+
嘿
|
362 |
+
因
|
363 |
+
因为
|
364 |
+
因此
|
365 |
+
因而
|
366 |
+
固然
|
367 |
+
在
|
368 |
+
在下
|
369 |
+
地
|
370 |
+
坚决
|
371 |
+
坚持
|
372 |
+
基本
|
373 |
+
处理
|
374 |
+
复杂
|
375 |
+
多
|
376 |
+
多少
|
377 |
+
多数
|
378 |
+
多次
|
379 |
+
大力
|
380 |
+
大多数
|
381 |
+
大大
|
382 |
+
大家
|
383 |
+
大批
|
384 |
+
大约
|
385 |
+
大量
|
386 |
+
失去
|
387 |
+
她
|
388 |
+
她们
|
389 |
+
她的
|
390 |
+
好的
|
391 |
+
好象
|
392 |
+
如
|
393 |
+
如上所述
|
394 |
+
如下
|
395 |
+
如何
|
396 |
+
如其
|
397 |
+
如果
|
398 |
+
如此
|
399 |
+
如若
|
400 |
+
存在
|
401 |
+
宁
|
402 |
+
宁可
|
403 |
+
宁愿
|
404 |
+
宁肯
|
405 |
+
它
|
406 |
+
它们
|
407 |
+
它们的
|
408 |
+
它的
|
409 |
+
安全
|
410 |
+
完全
|
411 |
+
完成
|
412 |
+
实现
|
413 |
+
实际
|
414 |
+
宣布
|
415 |
+
容易
|
416 |
+
密切
|
417 |
+
对
|
418 |
+
对于
|
419 |
+
对应
|
420 |
+
将
|
421 |
+
少数
|
422 |
+
尔后
|
423 |
+
尚且
|
424 |
+
尤其
|
425 |
+
就
|
426 |
+
就是
|
427 |
+
就是说
|
428 |
+
尽
|
429 |
+
尽管
|
430 |
+
属于
|
431 |
+
岂但
|
432 |
+
左右
|
433 |
+
巨大
|
434 |
+
巩固
|
435 |
+
己
|
436 |
+
已经
|
437 |
+
帮助
|
438 |
+
常常
|
439 |
+
并
|
440 |
+
并不
|
441 |
+
并不是
|
442 |
+
并且
|
443 |
+
并没有
|
444 |
+
广大
|
445 |
+
广泛
|
446 |
+
应当
|
447 |
+
应用
|
448 |
+
应该
|
449 |
+
开外
|
450 |
+
开始
|
451 |
+
开展
|
452 |
+
引起
|
453 |
+
强烈
|
454 |
+
强调
|
455 |
+
归
|
456 |
+
当
|
457 |
+
当前
|
458 |
+
当时
|
459 |
+
当然
|
460 |
+
当着
|
461 |
+
形成
|
462 |
+
彻底
|
463 |
+
彼
|
464 |
+
彼此
|
465 |
+
往
|
466 |
+
往往
|
467 |
+
待
|
468 |
+
後来
|
469 |
+
後面
|
470 |
+
得
|
471 |
+
得出
|
472 |
+
得到
|
473 |
+
心里
|
474 |
+
必然
|
475 |
+
必要
|
476 |
+
必须
|
477 |
+
怎
|
478 |
+
怎么
|
479 |
+
怎么办
|
480 |
+
怎么样
|
481 |
+
怎样
|
482 |
+
怎麽
|
483 |
+
总之
|
484 |
+
总是
|
485 |
+
总的来看
|
486 |
+
总的来说
|
487 |
+
总的说来
|
488 |
+
总结
|
489 |
+
总而言之
|
490 |
+
恰恰相反
|
491 |
+
您
|
492 |
+
意思
|
493 |
+
愿意
|
494 |
+
慢说
|
495 |
+
成为
|
496 |
+
我
|
497 |
+
我们
|
498 |
+
我的
|
499 |
+
或
|
500 |
+
或是
|
501 |
+
或者
|
502 |
+
战斗
|
503 |
+
所
|
504 |
+
所以
|
505 |
+
所有
|
506 |
+
所谓
|
507 |
+
打
|
508 |
+
扩大
|
509 |
+
把
|
510 |
+
抑或
|
511 |
+
拿
|
512 |
+
按
|
513 |
+
按照
|
514 |
+
换句话说
|
515 |
+
换言之
|
516 |
+
据
|
517 |
+
掌握
|
518 |
+
接着
|
519 |
+
接著
|
520 |
+
故
|
521 |
+
故此
|
522 |
+
整个
|
523 |
+
方便
|
524 |
+
方面
|
525 |
+
旁人
|
526 |
+
无宁
|
527 |
+
无法
|
528 |
+
无论
|
529 |
+
既
|
530 |
+
既是
|
531 |
+
既然
|
532 |
+
时候
|
533 |
+
明显
|
534 |
+
明确
|
535 |
+
是
|
536 |
+
是否
|
537 |
+
是的
|
538 |
+
显然
|
539 |
+
显著
|
540 |
+
普通
|
541 |
+
普遍
|
542 |
+
更加
|
543 |
+
曾经
|
544 |
+
替
|
545 |
+
最后
|
546 |
+
最大
|
547 |
+
最好
|
548 |
+
最後
|
549 |
+
最近
|
550 |
+
最高
|
551 |
+
有
|
552 |
+
有些
|
553 |
+
有关
|
554 |
+
有利
|
555 |
+
有力
|
556 |
+
有所
|
557 |
+
有效
|
558 |
+
有时
|
559 |
+
有点
|
560 |
+
有的
|
561 |
+
有着
|
562 |
+
有著
|
563 |
+
望
|
564 |
+
朝
|
565 |
+
朝着
|
566 |
+
本
|
567 |
+
本着
|
568 |
+
来
|
569 |
+
来着
|
570 |
+
极了
|
571 |
+
构成
|
572 |
+
果然
|
573 |
+
果真
|
574 |
+
某
|
575 |
+
某个
|
576 |
+
某些
|
577 |
+
根据
|
578 |
+
根本
|
579 |
+
欢迎
|
580 |
+
正在
|
581 |
+
正如
|
582 |
+
正常
|
583 |
+
此
|
584 |
+
此外
|
585 |
+
此时
|
586 |
+
此间
|
587 |
+
毋宁
|
588 |
+
每
|
589 |
+
每个
|
590 |
+
每天
|
591 |
+
每年
|
592 |
+
每当
|
593 |
+
比
|
594 |
+
比如
|
595 |
+
比方
|
596 |
+
比较
|
597 |
+
毫不
|
598 |
+
没有
|
599 |
+
沿
|
600 |
+
沿着
|
601 |
+
注意
|
602 |
+
深入
|
603 |
+
清楚
|
604 |
+
满足
|
605 |
+
漫说
|
606 |
+
焉
|
607 |
+
然则
|
608 |
+
然后
|
609 |
+
然後
|
610 |
+
然而
|
611 |
+
照
|
612 |
+
照着
|
613 |
+
特别是
|
614 |
+
特殊
|
615 |
+
特点
|
616 |
+
现代
|
617 |
+
现在
|
618 |
+
甚么
|
619 |
+
甚而
|
620 |
+
甚至
|
621 |
+
用
|
622 |
+
由
|
623 |
+
由于
|
624 |
+
由此可见
|
625 |
+
的
|
626 |
+
的话
|
627 |
+
目前
|
628 |
+
直到
|
629 |
+
直接
|
630 |
+
相似
|
631 |
+
相信
|
632 |
+
相反
|
633 |
+
相同
|
634 |
+
相对
|
635 |
+
相对而言
|
636 |
+
相应
|
637 |
+
相当
|
638 |
+
相等
|
639 |
+
省得
|
640 |
+
看出
|
641 |
+
看到
|
642 |
+
看来
|
643 |
+
看看
|
644 |
+
看见
|
645 |
+
真是
|
646 |
+
真正
|
647 |
+
着
|
648 |
+
着呢
|
649 |
+
矣
|
650 |
+
知道
|
651 |
+
确定
|
652 |
+
离
|
653 |
+
积极
|
654 |
+
移动
|
655 |
+
突出
|
656 |
+
突然
|
657 |
+
立即
|
658 |
+
第
|
659 |
+
等
|
660 |
+
等等
|
661 |
+
管
|
662 |
+
紧接着
|
663 |
+
纵
|
664 |
+
纵令
|
665 |
+
纵使
|
666 |
+
纵然
|
667 |
+
练习
|
668 |
+
组成
|
669 |
+
经
|
670 |
+
经常
|
671 |
+
经过
|
672 |
+
结合
|
673 |
+
结果
|
674 |
+
给
|
675 |
+
绝对
|
676 |
+
继续
|
677 |
+
继而
|
678 |
+
维持
|
679 |
+
综上所述
|
680 |
+
罢了
|
681 |
+
考虑
|
682 |
+
者
|
683 |
+
而
|
684 |
+
而且
|
685 |
+
而况
|
686 |
+
而外
|
687 |
+
而已
|
688 |
+
而是
|
689 |
+
而言
|
690 |
+
联系
|
691 |
+
能
|
692 |
+
能否
|
693 |
+
能够
|
694 |
+
腾
|
695 |
+
自
|
696 |
+
自个儿
|
697 |
+
自从
|
698 |
+
自各儿
|
699 |
+
自家
|
700 |
+
自己
|
701 |
+
自身
|
702 |
+
至
|
703 |
+
至于
|
704 |
+
良好
|
705 |
+
若
|
706 |
+
若是
|
707 |
+
若非
|
708 |
+
范围
|
709 |
+
莫若
|
710 |
+
获得
|
711 |
+
虽
|
712 |
+
虽则
|
713 |
+
虽然
|
714 |
+
虽说
|
715 |
+
行为
|
716 |
+
行动
|
717 |
+
表明
|
718 |
+
表示
|
719 |
+
被
|
720 |
+
要
|
721 |
+
要不
|
722 |
+
要不是
|
723 |
+
要不然
|
724 |
+
要么
|
725 |
+
要是
|
726 |
+
要求
|
727 |
+
规定
|
728 |
+
觉得
|
729 |
+
认为
|
730 |
+
认真
|
731 |
+
认识
|
732 |
+
让
|
733 |
+
许多
|
734 |
+
论
|
735 |
+
设使
|
736 |
+
设若
|
737 |
+
该
|
738 |
+
说明
|
739 |
+
诸位
|
740 |
+
谁
|
741 |
+
谁知
|
742 |
+
赶
|
743 |
+
起
|
744 |
+
起来
|
745 |
+
起见
|
746 |
+
趁
|
747 |
+
趁着
|
748 |
+
越是
|
749 |
+
跟
|
750 |
+
转动
|
751 |
+
转变
|
752 |
+
转贴
|
753 |
+
较
|
754 |
+
较之
|
755 |
+
边
|
756 |
+
达到
|
757 |
+
迅速
|
758 |
+
过
|
759 |
+
过去
|
760 |
+
过来
|
761 |
+
运用
|
762 |
+
还是
|
763 |
+
还有
|
764 |
+
这
|
765 |
+
这个
|
766 |
+
这么
|
767 |
+
这么些
|
768 |
+
这么样
|
769 |
+
这么点儿
|
770 |
+
这些
|
771 |
+
这会儿
|
772 |
+
这儿
|
773 |
+
这就是说
|
774 |
+
这时
|
775 |
+
这样
|
776 |
+
这点
|
777 |
+
这种
|
778 |
+
这边
|
779 |
+
这里
|
780 |
+
这麽
|
781 |
+
进入
|
782 |
+
进步
|
783 |
+
进而
|
784 |
+
进行
|
785 |
+
连
|
786 |
+
连同
|
787 |
+
适应
|
788 |
+
适当
|
789 |
+
适用
|
790 |
+
逐步
|
791 |
+
逐渐
|
792 |
+
通常
|
793 |
+
通过
|
794 |
+
造成
|
795 |
+
遇到
|
796 |
+
遭到
|
797 |
+
避免
|
798 |
+
那
|
799 |
+
那个
|
800 |
+
那么
|
801 |
+
那么些
|
802 |
+
那么样
|
803 |
+
那些
|
804 |
+
那会儿
|
805 |
+
那儿
|
806 |
+
那时
|
807 |
+
那样
|
808 |
+
那边
|
809 |
+
那里
|
810 |
+
那麽
|
811 |
+
部分
|
812 |
+
鄙人
|
813 |
+
采取
|
814 |
+
里面
|
815 |
+
重大
|
816 |
+
重新
|
817 |
+
重要
|
818 |
+
鉴于
|
819 |
+
问题
|
820 |
+
防止
|
821 |
+
阿
|
822 |
+
附近
|
823 |
+
限制
|
824 |
+
除
|
825 |
+
除了
|
826 |
+
除此之外
|
827 |
+
除非
|
828 |
+
随
|
829 |
+
随着
|
830 |
+
随著
|
831 |
+
集中
|
832 |
+
需要
|
833 |
+
非但
|
834 |
+
非常
|
835 |
+
非徒
|
836 |
+
靠
|
837 |
+
顺
|
838 |
+
顺着
|
839 |
+
首先
|
840 |
+
高兴
|
841 |
+
是不是
|
data/nltk_data/corpora/stopwords/danish
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
og
|
2 |
+
i
|
3 |
+
jeg
|
4 |
+
det
|
5 |
+
at
|
6 |
+
en
|
7 |
+
den
|
8 |
+
til
|
9 |
+
er
|
10 |
+
som
|
11 |
+
på
|
12 |
+
de
|
13 |
+
med
|
14 |
+
han
|
15 |
+
af
|
16 |
+
for
|
17 |
+
ikke
|
18 |
+
der
|
19 |
+
var
|
20 |
+
mig
|
21 |
+
sig
|
22 |
+
men
|
23 |
+
et
|
24 |
+
har
|
25 |
+
om
|
26 |
+
vi
|
27 |
+
min
|
28 |
+
havde
|
29 |
+
ham
|
30 |
+
hun
|
31 |
+
nu
|
32 |
+
over
|
33 |
+
da
|
34 |
+
fra
|
35 |
+
du
|
36 |
+
ud
|
37 |
+
sin
|
38 |
+
dem
|
39 |
+
os
|
40 |
+
op
|
41 |
+
man
|
42 |
+
hans
|
43 |
+
hvor
|
44 |
+
eller
|
45 |
+
hvad
|
46 |
+
skal
|
47 |
+
selv
|
48 |
+
her
|
49 |
+
alle
|
50 |
+
vil
|
51 |
+
blev
|
52 |
+
kunne
|
53 |
+
ind
|
54 |
+
når
|
55 |
+
være
|
56 |
+
dog
|
57 |
+
noget
|
58 |
+
ville
|
59 |
+
jo
|
60 |
+
deres
|
61 |
+
efter
|
62 |
+
ned
|
63 |
+
skulle
|
64 |
+
denne
|
65 |
+
end
|
66 |
+
dette
|
67 |
+
mit
|
68 |
+
også
|
69 |
+
under
|
70 |
+
have
|
71 |
+
dig
|
72 |
+
anden
|
73 |
+
hende
|
74 |
+
mine
|
75 |
+
alt
|
76 |
+
meget
|
77 |
+
sit
|
78 |
+
sine
|
79 |
+
vor
|
80 |
+
mod
|
81 |
+
disse
|
82 |
+
hvis
|
83 |
+
din
|
84 |
+
nogle
|
85 |
+
hos
|
86 |
+
blive
|
87 |
+
mange
|
88 |
+
ad
|
89 |
+
bliver
|
90 |
+
hendes
|
91 |
+
været
|
92 |
+
thi
|
93 |
+
jer
|
94 |
+
sådan
|
data/nltk_data/corpora/stopwords/dutch
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
de
|
2 |
+
en
|
3 |
+
van
|
4 |
+
ik
|
5 |
+
te
|
6 |
+
dat
|
7 |
+
die
|
8 |
+
in
|
9 |
+
een
|
10 |
+
hij
|
11 |
+
het
|
12 |
+
niet
|
13 |
+
zijn
|
14 |
+
is
|
15 |
+
was
|
16 |
+
op
|
17 |
+
aan
|
18 |
+
met
|
19 |
+
als
|
20 |
+
voor
|
21 |
+
had
|
22 |
+
er
|
23 |
+
maar
|
24 |
+
om
|
25 |
+
hem
|
26 |
+
dan
|
27 |
+
zou
|
28 |
+
of
|
29 |
+
wat
|
30 |
+
mijn
|
31 |
+
men
|
32 |
+
dit
|
33 |
+
zo
|
34 |
+
door
|
35 |
+
over
|
36 |
+
ze
|
37 |
+
zich
|
38 |
+
bij
|
39 |
+
ook
|
40 |
+
tot
|
41 |
+
je
|
42 |
+
mij
|
43 |
+
uit
|
44 |
+
der
|
45 |
+
daar
|
46 |
+
haar
|
47 |
+
naar
|
48 |
+
heb
|
49 |
+
hoe
|
50 |
+
heeft
|
51 |
+
hebben
|
52 |
+
deze
|
53 |
+
u
|
54 |
+
want
|
55 |
+
nog
|
56 |
+
zal
|
57 |
+
me
|
58 |
+
zij
|
59 |
+
nu
|
60 |
+
ge
|
61 |
+
geen
|
62 |
+
omdat
|
63 |
+
iets
|
64 |
+
worden
|
65 |
+
toch
|
66 |
+
al
|
67 |
+
waren
|
68 |
+
veel
|
69 |
+
meer
|
70 |
+
doen
|
71 |
+
toen
|
72 |
+
moet
|
73 |
+
ben
|
74 |
+
zonder
|
75 |
+
kan
|
76 |
+
hun
|
77 |
+
dus
|
78 |
+
alles
|
79 |
+
onder
|
80 |
+
ja
|
81 |
+
eens
|
82 |
+
hier
|
83 |
+
wie
|
84 |
+
werd
|
85 |
+
altijd
|
86 |
+
doch
|
87 |
+
wordt
|
88 |
+
wezen
|
89 |
+
kunnen
|
90 |
+
ons
|
91 |
+
zelf
|
92 |
+
tegen
|
93 |
+
na
|
94 |
+
reeds
|
95 |
+
wil
|
96 |
+
kon
|
97 |
+
niets
|
98 |
+
uw
|
99 |
+
iemand
|
100 |
+
geweest
|
101 |
+
andere
|
data/nltk_data/corpora/stopwords/english
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
about
|
3 |
+
above
|
4 |
+
after
|
5 |
+
again
|
6 |
+
against
|
7 |
+
ain
|
8 |
+
all
|
9 |
+
am
|
10 |
+
an
|
11 |
+
and
|
12 |
+
any
|
13 |
+
are
|
14 |
+
aren
|
15 |
+
aren't
|
16 |
+
as
|
17 |
+
at
|
18 |
+
be
|
19 |
+
because
|
20 |
+
been
|
21 |
+
before
|
22 |
+
being
|
23 |
+
below
|
24 |
+
between
|
25 |
+
both
|
26 |
+
but
|
27 |
+
by
|
28 |
+
can
|
29 |
+
couldn
|
30 |
+
couldn't
|
31 |
+
d
|
32 |
+
did
|
33 |
+
didn
|
34 |
+
didn't
|
35 |
+
do
|
36 |
+
does
|
37 |
+
doesn
|
38 |
+
doesn't
|
39 |
+
doing
|
40 |
+
don
|
41 |
+
don't
|
42 |
+
down
|
43 |
+
during
|
44 |
+
each
|
45 |
+
few
|
46 |
+
for
|
47 |
+
from
|
48 |
+
further
|
49 |
+
had
|
50 |
+
hadn
|
51 |
+
hadn't
|
52 |
+
has
|
53 |
+
hasn
|
54 |
+
hasn't
|
55 |
+
have
|
56 |
+
haven
|
57 |
+
haven't
|
58 |
+
having
|
59 |
+
he
|
60 |
+
he'd
|
61 |
+
he'll
|
62 |
+
her
|
63 |
+
here
|
64 |
+
hers
|
65 |
+
herself
|
66 |
+
he's
|
67 |
+
him
|
68 |
+
himself
|
69 |
+
his
|
70 |
+
how
|
71 |
+
i
|
72 |
+
i'd
|
73 |
+
if
|
74 |
+
i'll
|
75 |
+
i'm
|
76 |
+
in
|
77 |
+
into
|
78 |
+
is
|
79 |
+
isn
|
80 |
+
isn't
|
81 |
+
it
|
82 |
+
it'd
|
83 |
+
it'll
|
84 |
+
it's
|
85 |
+
its
|
86 |
+
itself
|
87 |
+
i've
|
88 |
+
just
|
89 |
+
ll
|
90 |
+
m
|
91 |
+
ma
|
92 |
+
me
|
93 |
+
mightn
|
94 |
+
mightn't
|
95 |
+
more
|
96 |
+
most
|
97 |
+
mustn
|
98 |
+
mustn't
|
99 |
+
my
|
100 |
+
myself
|
101 |
+
needn
|
102 |
+
needn't
|
103 |
+
no
|
104 |
+
nor
|
105 |
+
not
|
106 |
+
now
|
107 |
+
o
|
108 |
+
of
|
109 |
+
off
|
110 |
+
on
|
111 |
+
once
|
112 |
+
only
|
113 |
+
or
|
114 |
+
other
|
115 |
+
our
|
116 |
+
ours
|
117 |
+
ourselves
|
118 |
+
out
|
119 |
+
over
|
120 |
+
own
|
121 |
+
re
|
122 |
+
s
|
123 |
+
same
|
124 |
+
shan
|
125 |
+
shan't
|
126 |
+
she
|
127 |
+
she'd
|
128 |
+
she'll
|
129 |
+
she's
|
130 |
+
should
|
131 |
+
shouldn
|
132 |
+
shouldn't
|
133 |
+
should've
|
134 |
+
so
|
135 |
+
some
|
136 |
+
such
|
137 |
+
t
|
138 |
+
than
|
139 |
+
that
|
140 |
+
that'll
|
141 |
+
the
|
142 |
+
their
|
143 |
+
theirs
|
144 |
+
them
|
145 |
+
themselves
|
146 |
+
then
|
147 |
+
there
|
148 |
+
these
|
149 |
+
they
|
150 |
+
they'd
|
151 |
+
they'll
|
152 |
+
they're
|
153 |
+
they've
|
154 |
+
this
|
155 |
+
those
|
156 |
+
through
|
157 |
+
to
|
158 |
+
too
|
159 |
+
under
|
160 |
+
until
|
161 |
+
up
|
162 |
+
ve
|
163 |
+
very
|
164 |
+
was
|
165 |
+
wasn
|
166 |
+
wasn't
|
167 |
+
we
|
168 |
+
we'd
|
169 |
+
we'll
|
170 |
+
we're
|
171 |
+
were
|
172 |
+
weren
|
173 |
+
weren't
|
174 |
+
we've
|
175 |
+
what
|
176 |
+
when
|
177 |
+
where
|
178 |
+
which
|
179 |
+
while
|
180 |
+
who
|
181 |
+
whom
|
182 |
+
why
|
183 |
+
will
|
184 |
+
with
|
185 |
+
won
|
186 |
+
won't
|
187 |
+
wouldn
|
188 |
+
wouldn't
|
189 |
+
y
|
190 |
+
you
|
191 |
+
you'd
|
192 |
+
you'll
|
193 |
+
your
|
194 |
+
you're
|
195 |
+
yours
|
196 |
+
yourself
|
197 |
+
yourselves
|
198 |
+
you've
|
data/nltk_data/corpora/stopwords/finnish
ADDED
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
olla
|
2 |
+
olen
|
3 |
+
olet
|
4 |
+
on
|
5 |
+
olemme
|
6 |
+
olette
|
7 |
+
ovat
|
8 |
+
ole
|
9 |
+
oli
|
10 |
+
olisi
|
11 |
+
olisit
|
12 |
+
olisin
|
13 |
+
olisimme
|
14 |
+
olisitte
|
15 |
+
olisivat
|
16 |
+
olit
|
17 |
+
olin
|
18 |
+
olimme
|
19 |
+
olitte
|
20 |
+
olivat
|
21 |
+
ollut
|
22 |
+
olleet
|
23 |
+
en
|
24 |
+
et
|
25 |
+
ei
|
26 |
+
emme
|
27 |
+
ette
|
28 |
+
eivät
|
29 |
+
minä
|
30 |
+
minun
|
31 |
+
minut
|
32 |
+
minua
|
33 |
+
minussa
|
34 |
+
minusta
|
35 |
+
minuun
|
36 |
+
minulla
|
37 |
+
minulta
|
38 |
+
minulle
|
39 |
+
sinä
|
40 |
+
sinun
|
41 |
+
sinut
|
42 |
+
sinua
|
43 |
+
sinussa
|
44 |
+
sinusta
|
45 |
+
sinuun
|
46 |
+
sinulla
|
47 |
+
sinulta
|
48 |
+
sinulle
|
49 |
+
hän
|
50 |
+
hänen
|
51 |
+
hänet
|
52 |
+
häntä
|
53 |
+
hänessä
|
54 |
+
hänestä
|
55 |
+
häneen
|
56 |
+
hänellä
|
57 |
+
häneltä
|
58 |
+
hänelle
|
59 |
+
me
|
60 |
+
meidän
|
61 |
+
meidät
|
62 |
+
meitä
|
63 |
+
meissä
|
64 |
+
meistä
|
65 |
+
meihin
|
66 |
+
meillä
|
67 |
+
meiltä
|
68 |
+
meille
|
69 |
+
te
|
70 |
+
teidän
|
71 |
+
teidät
|
72 |
+
teitä
|
73 |
+
teissä
|
74 |
+
teistä
|
75 |
+
teihin
|
76 |
+
teillä
|
77 |
+
teiltä
|
78 |
+
teille
|
79 |
+
he
|
80 |
+
heidän
|
81 |
+
heidät
|
82 |
+
heitä
|
83 |
+
heissä
|
84 |
+
heistä
|
85 |
+
heihin
|
86 |
+
heillä
|
87 |
+
heiltä
|
88 |
+
heille
|
89 |
+
tämä
|
90 |
+
tämän
|
91 |
+
tätä
|
92 |
+
tässä
|
93 |
+
tästä
|
94 |
+
tähän
|
95 |
+
tallä
|
96 |
+
tältä
|
97 |
+
tälle
|
98 |
+
tänä
|
99 |
+
täksi
|
100 |
+
tuo
|
101 |
+
tuon
|
102 |
+
tuotä
|
103 |
+
tuossa
|
104 |
+
tuosta
|
105 |
+
tuohon
|
106 |
+
tuolla
|
107 |
+
tuolta
|
108 |
+
tuolle
|
109 |
+
tuona
|
110 |
+
tuoksi
|
111 |
+
se
|
112 |
+
sen
|
113 |
+
sitä
|
114 |
+
siinä
|
115 |
+
siitä
|
116 |
+
siihen
|
117 |
+
sillä
|
118 |
+
siltä
|
119 |
+
sille
|
120 |
+
sinä
|
121 |
+
siksi
|
122 |
+
nämä
|
123 |
+
näiden
|
124 |
+
näitä
|
125 |
+
näissä
|
126 |
+
näistä
|
127 |
+
näihin
|
128 |
+
näillä
|
129 |
+
näiltä
|
130 |
+
näille
|
131 |
+
näinä
|
132 |
+
näiksi
|
133 |
+
nuo
|
134 |
+
noiden
|
135 |
+
noita
|
136 |
+
noissa
|
137 |
+
noista
|
138 |
+
noihin
|
139 |
+
noilla
|
140 |
+
noilta
|
141 |
+
noille
|
142 |
+
noina
|
143 |
+
noiksi
|
144 |
+
ne
|
145 |
+
niiden
|
146 |
+
niitä
|
147 |
+
niissä
|
148 |
+
niistä
|
149 |
+
niihin
|
150 |
+
niillä
|
151 |
+
niiltä
|
152 |
+
niille
|
153 |
+
niinä
|
154 |
+
niiksi
|
155 |
+
kuka
|
156 |
+
kenen
|
157 |
+
kenet
|
158 |
+
ketä
|
159 |
+
kenessä
|
160 |
+
kenestä
|
161 |
+
keneen
|
162 |
+
kenellä
|
163 |
+
keneltä
|
164 |
+
kenelle
|
165 |
+
kenenä
|
166 |
+
keneksi
|
167 |
+
ketkä
|
168 |
+
keiden
|
169 |
+
ketkä
|
170 |
+
keitä
|
171 |
+
keissä
|
172 |
+
keistä
|
173 |
+
keihin
|
174 |
+
keillä
|
175 |
+
keiltä
|
176 |
+
keille
|
177 |
+
keinä
|
178 |
+
keiksi
|
179 |
+
mikä
|
180 |
+
minkä
|
181 |
+
minkä
|
182 |
+
mitä
|
183 |
+
missä
|
184 |
+
mistä
|
185 |
+
mihin
|
186 |
+
millä
|
187 |
+
miltä
|
188 |
+
mille
|
189 |
+
minä
|
190 |
+
miksi
|
191 |
+
mitkä
|
192 |
+
joka
|
193 |
+
jonka
|
194 |
+
jota
|
195 |
+
jossa
|
196 |
+
josta
|
197 |
+
johon
|
198 |
+
jolla
|
199 |
+
jolta
|
200 |
+
jolle
|
201 |
+
jona
|
202 |
+
joksi
|
203 |
+
jotka
|
204 |
+
joiden
|
205 |
+
joita
|
206 |
+
joissa
|
207 |
+
joista
|
208 |
+
joihin
|
209 |
+
joilla
|
210 |
+
joilta
|
211 |
+
joille
|
212 |
+
joina
|
213 |
+
joiksi
|
214 |
+
että
|
215 |
+
ja
|
216 |
+
jos
|
217 |
+
koska
|
218 |
+
kuin
|
219 |
+
mutta
|
220 |
+
niin
|
221 |
+
sekä
|
222 |
+
sillä
|
223 |
+
tai
|
224 |
+
vaan
|
225 |
+
vai
|
226 |
+
vaikka
|
227 |
+
kanssa
|
228 |
+
mukaan
|
229 |
+
noin
|
230 |
+
poikki
|
231 |
+
yli
|
232 |
+
kun
|
233 |
+
niin
|
234 |
+
nyt
|
235 |
+
itse
|
data/nltk_data/corpora/stopwords/french
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
au
|
2 |
+
aux
|
3 |
+
avec
|
4 |
+
ce
|
5 |
+
ces
|
6 |
+
dans
|
7 |
+
de
|
8 |
+
des
|
9 |
+
du
|
10 |
+
elle
|
11 |
+
en
|
12 |
+
et
|
13 |
+
eux
|
14 |
+
il
|
15 |
+
ils
|
16 |
+
je
|
17 |
+
la
|
18 |
+
le
|
19 |
+
les
|
20 |
+
leur
|
21 |
+
lui
|
22 |
+
ma
|
23 |
+
mais
|
24 |
+
me
|
25 |
+
même
|
26 |
+
mes
|
27 |
+
moi
|
28 |
+
mon
|
29 |
+
ne
|
30 |
+
nos
|
31 |
+
notre
|
32 |
+
nous
|
33 |
+
on
|
34 |
+
ou
|
35 |
+
par
|
36 |
+
pas
|
37 |
+
pour
|
38 |
+
qu
|
39 |
+
que
|
40 |
+
qui
|
41 |
+
sa
|
42 |
+
se
|
43 |
+
ses
|
44 |
+
son
|
45 |
+
sur
|
46 |
+
ta
|
47 |
+
te
|
48 |
+
tes
|
49 |
+
toi
|
50 |
+
ton
|
51 |
+
tu
|
52 |
+
un
|
53 |
+
une
|
54 |
+
vos
|
55 |
+
votre
|
56 |
+
vous
|
57 |
+
c
|
58 |
+
d
|
59 |
+
j
|
60 |
+
l
|
61 |
+
à
|
62 |
+
m
|
63 |
+
n
|
64 |
+
s
|
65 |
+
t
|
66 |
+
y
|
67 |
+
été
|
68 |
+
étée
|
69 |
+
étées
|
70 |
+
étés
|
71 |
+
étant
|
72 |
+
étante
|
73 |
+
étants
|
74 |
+
étantes
|
75 |
+
suis
|
76 |
+
es
|
77 |
+
est
|
78 |
+
sommes
|
79 |
+
êtes
|
80 |
+
sont
|
81 |
+
serai
|
82 |
+
seras
|
83 |
+
sera
|
84 |
+
serons
|
85 |
+
serez
|
86 |
+
seront
|
87 |
+
serais
|
88 |
+
serait
|
89 |
+
serions
|
90 |
+
seriez
|
91 |
+
seraient
|
92 |
+
étais
|
93 |
+
était
|
94 |
+
étions
|
95 |
+
étiez
|
96 |
+
étaient
|
97 |
+
fus
|
98 |
+
fut
|
99 |
+
fûmes
|
100 |
+
fûtes
|
101 |
+
furent
|
102 |
+
sois
|
103 |
+
soit
|
104 |
+
soyons
|
105 |
+
soyez
|
106 |
+
soient
|
107 |
+
fusse
|
108 |
+
fusses
|
109 |
+
fût
|
110 |
+
fussions
|
111 |
+
fussiez
|
112 |
+
fussent
|
113 |
+
ayant
|
114 |
+
ayante
|
115 |
+
ayantes
|
116 |
+
ayants
|
117 |
+
eu
|
118 |
+
eue
|
119 |
+
eues
|
120 |
+
eus
|
121 |
+
ai
|
122 |
+
as
|
123 |
+
avons
|
124 |
+
avez
|
125 |
+
ont
|
126 |
+
aurai
|
127 |
+
auras
|
128 |
+
aura
|
129 |
+
aurons
|
130 |
+
aurez
|
131 |
+
auront
|
132 |
+
aurais
|
133 |
+
aurait
|
134 |
+
aurions
|
135 |
+
auriez
|
136 |
+
auraient
|
137 |
+
avais
|
138 |
+
avait
|
139 |
+
avions
|
140 |
+
aviez
|
141 |
+
avaient
|
142 |
+
eut
|
143 |
+
eûmes
|
144 |
+
eûtes
|
145 |
+
eurent
|
146 |
+
aie
|
147 |
+
aies
|
148 |
+
ait
|
149 |
+
ayons
|
150 |
+
ayez
|
151 |
+
aient
|
152 |
+
eusse
|
153 |
+
eusses
|
154 |
+
eût
|
155 |
+
eussions
|
156 |
+
eussiez
|
157 |
+
eussent
|
data/nltk_data/corpora/stopwords/german
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aber
|
2 |
+
alle
|
3 |
+
allem
|
4 |
+
allen
|
5 |
+
aller
|
6 |
+
alles
|
7 |
+
als
|
8 |
+
also
|
9 |
+
am
|
10 |
+
an
|
11 |
+
ander
|
12 |
+
andere
|
13 |
+
anderem
|
14 |
+
anderen
|
15 |
+
anderer
|
16 |
+
anderes
|
17 |
+
anderm
|
18 |
+
andern
|
19 |
+
anderr
|
20 |
+
anders
|
21 |
+
auch
|
22 |
+
auf
|
23 |
+
aus
|
24 |
+
bei
|
25 |
+
bin
|
26 |
+
bis
|
27 |
+
bist
|
28 |
+
da
|
29 |
+
damit
|
30 |
+
dann
|
31 |
+
der
|
32 |
+
den
|
33 |
+
des
|
34 |
+
dem
|
35 |
+
die
|
36 |
+
das
|
37 |
+
dass
|
38 |
+
daß
|
39 |
+
derselbe
|
40 |
+
derselben
|
41 |
+
denselben
|
42 |
+
desselben
|
43 |
+
demselben
|
44 |
+
dieselbe
|
45 |
+
dieselben
|
46 |
+
dasselbe
|
47 |
+
dazu
|
48 |
+
dein
|
49 |
+
deine
|
50 |
+
deinem
|
51 |
+
deinen
|
52 |
+
deiner
|
53 |
+
deines
|
54 |
+
denn
|
55 |
+
derer
|
56 |
+
dessen
|
57 |
+
dich
|
58 |
+
dir
|
59 |
+
du
|
60 |
+
dies
|
61 |
+
diese
|
62 |
+
diesem
|
63 |
+
diesen
|
64 |
+
dieser
|
65 |
+
dieses
|
66 |
+
doch
|
67 |
+
dort
|
68 |
+
durch
|
69 |
+
ein
|
70 |
+
eine
|
71 |
+
einem
|
72 |
+
einen
|
73 |
+
einer
|
74 |
+
eines
|
75 |
+
einig
|
76 |
+
einige
|
77 |
+
einigem
|
78 |
+
einigen
|
79 |
+
einiger
|
80 |
+
einiges
|
81 |
+
einmal
|
82 |
+
er
|
83 |
+
ihn
|
84 |
+
ihm
|
85 |
+
es
|
86 |
+
etwas
|
87 |
+
euer
|
88 |
+
eure
|
89 |
+
eurem
|
90 |
+
euren
|
91 |
+
eurer
|
92 |
+
eures
|
93 |
+
für
|
94 |
+
gegen
|
95 |
+
gewesen
|
96 |
+
hab
|
97 |
+
habe
|
98 |
+
haben
|
99 |
+
hat
|
100 |
+
hatte
|
101 |
+
hatten
|
102 |
+
hier
|
103 |
+
hin
|
104 |
+
hinter
|
105 |
+
ich
|
106 |
+
mich
|
107 |
+
mir
|
108 |
+
ihr
|
109 |
+
ihre
|
110 |
+
ihrem
|
111 |
+
ihren
|
112 |
+
ihrer
|
113 |
+
ihres
|
114 |
+
euch
|
115 |
+
im
|
116 |
+
in
|
117 |
+
indem
|
118 |
+
ins
|
119 |
+
ist
|
120 |
+
jede
|
121 |
+
jedem
|
122 |
+
jeden
|
123 |
+
jeder
|
124 |
+
jedes
|
125 |
+
jene
|
126 |
+
jenem
|
127 |
+
jenen
|
128 |
+
jener
|
129 |
+
jenes
|
130 |
+
jetzt
|
131 |
+
kann
|
132 |
+
kein
|
133 |
+
keine
|
134 |
+
keinem
|
135 |
+
keinen
|
136 |
+
keiner
|
137 |
+
keines
|
138 |
+
können
|
139 |
+
könnte
|
140 |
+
machen
|
141 |
+
man
|
142 |
+
manche
|
143 |
+
manchem
|
144 |
+
manchen
|
145 |
+
mancher
|
146 |
+
manches
|
147 |
+
mein
|
148 |
+
meine
|
149 |
+
meinem
|
150 |
+
meinen
|
151 |
+
meiner
|
152 |
+
meines
|
153 |
+
mit
|
154 |
+
muss
|
155 |
+
musste
|
156 |
+
nach
|
157 |
+
nicht
|
158 |
+
nichts
|
159 |
+
noch
|
160 |
+
nun
|
161 |
+
nur
|
162 |
+
ob
|
163 |
+
oder
|
164 |
+
ohne
|
165 |
+
sehr
|
166 |
+
sein
|
167 |
+
seine
|
168 |
+
seinem
|
169 |
+
seinen
|
170 |
+
seiner
|
171 |
+
seines
|
172 |
+
selbst
|
173 |
+
sich
|
174 |
+
sie
|
175 |
+
ihnen
|
176 |
+
sind
|
177 |
+
so
|
178 |
+
solche
|
179 |
+
solchem
|
180 |
+
solchen
|
181 |
+
solcher
|
182 |
+
solches
|
183 |
+
soll
|
184 |
+
sollte
|
185 |
+
sondern
|
186 |
+
sonst
|
187 |
+
über
|
188 |
+
um
|
189 |
+
und
|
190 |
+
uns
|
191 |
+
unsere
|
192 |
+
unserem
|
193 |
+
unseren
|
194 |
+
unser
|
195 |
+
unseres
|
196 |
+
unter
|
197 |
+
viel
|
198 |
+
vom
|
199 |
+
von
|
200 |
+
vor
|
201 |
+
während
|
202 |
+
war
|
203 |
+
waren
|
204 |
+
warst
|
205 |
+
was
|
206 |
+
weg
|
207 |
+
weil
|
208 |
+
weiter
|
209 |
+
welche
|
210 |
+
welchem
|
211 |
+
welchen
|
212 |
+
welcher
|
213 |
+
welches
|
214 |
+
wenn
|
215 |
+
werde
|
216 |
+
werden
|
217 |
+
wie
|
218 |
+
wieder
|
219 |
+
will
|
220 |
+
wir
|
221 |
+
wird
|
222 |
+
wirst
|
223 |
+
wo
|
224 |
+
wollen
|
225 |
+
wollte
|
226 |
+
würde
|
227 |
+
würden
|
228 |
+
zu
|
229 |
+
zum
|
230 |
+
zur
|
231 |
+
zwar
|
232 |
+
zwischen
|
data/nltk_data/corpora/stopwords/greek
ADDED
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
αλλα
|
2 |
+
αν
|
3 |
+
αντι
|
4 |
+
απο
|
5 |
+
αυτα
|
6 |
+
αυτεσ
|
7 |
+
αυτη
|
8 |
+
αυτο
|
9 |
+
αυτοι
|
10 |
+
αυτοσ
|
11 |
+
αυτουσ
|
12 |
+
αυτων
|
13 |
+
αἱ
|
14 |
+
αἳ
|
15 |
+
αἵ
|
16 |
+
αὐτόσ
|
17 |
+
αὐτὸς
|
18 |
+
αὖ
|
19 |
+
γάρ
|
20 |
+
γα
|
21 |
+
γα^
|
22 |
+
γε
|
23 |
+
για
|
24 |
+
γοῦν
|
25 |
+
γὰρ
|
26 |
+
δ'
|
27 |
+
δέ
|
28 |
+
δή
|
29 |
+
δαί
|
30 |
+
δαίσ
|
31 |
+
δαὶ
|
32 |
+
δαὶς
|
33 |
+
δε
|
34 |
+
δεν
|
35 |
+
δι'
|
36 |
+
διά
|
37 |
+
διὰ
|
38 |
+
δὲ
|
39 |
+
δὴ
|
40 |
+
δ’
|
41 |
+
εαν
|
42 |
+
ειμαι
|
43 |
+
ειμαστε
|
44 |
+
ειναι
|
45 |
+
εισαι
|
46 |
+
ειστε
|
47 |
+
εκεινα
|
48 |
+
εκεινεσ
|
49 |
+
εκεινη
|
50 |
+
εκεινο
|
51 |
+
εκεινοι
|
52 |
+
εκεινοσ
|
53 |
+
εκεινουσ
|
54 |
+
εκεινων
|
55 |
+
ενω
|
56 |
+
επ
|
57 |
+
επι
|
58 |
+
εἰ
|
59 |
+
εἰμί
|
60 |
+
εἰμὶ
|
61 |
+
εἰς
|
62 |
+
εἰσ
|
63 |
+
εἴ
|
64 |
+
εἴμι
|
65 |
+
εἴτε
|
66 |
+
η
|
67 |
+
θα
|
68 |
+
ισωσ
|
69 |
+
κ
|
70 |
+
καί
|
71 |
+
καίτοι
|
72 |
+
καθ
|
73 |
+
και
|
74 |
+
κατ
|
75 |
+
κατά
|
76 |
+
κατα
|
77 |
+
κατὰ
|
78 |
+
καὶ
|
79 |
+
κι
|
80 |
+
κἀν
|
81 |
+
κἂν
|
82 |
+
μέν
|
83 |
+
μή
|
84 |
+
μήτε
|
85 |
+
μα
|
86 |
+
με
|
87 |
+
μεθ
|
88 |
+
μετ
|
89 |
+
μετά
|
90 |
+
μετα
|
91 |
+
μετὰ
|
92 |
+
μη
|
93 |
+
μην
|
94 |
+
μἐν
|
95 |
+
μὲν
|
96 |
+
μὴ
|
97 |
+
μὴν
|
98 |
+
να
|
99 |
+
ο
|
100 |
+
οι
|
101 |
+
ομωσ
|
102 |
+
οπωσ
|
103 |
+
οσο
|
104 |
+
οτι
|
105 |
+
οἱ
|
106 |
+
οἳ
|
107 |
+
οἷς
|
108 |
+
οὐ
|
109 |
+
οὐδ
|
110 |
+
οὐδέ
|
111 |
+
οὐδείσ
|
112 |
+
οὐδεὶς
|
113 |
+
οὐδὲ
|
114 |
+
οὐδὲν
|
115 |
+
οὐκ
|
116 |
+
οὐχ
|
117 |
+
οὐχὶ
|
118 |
+
οὓς
|
119 |
+
οὔτε
|
120 |
+
οὕτω
|
121 |
+
οὕτως
|
122 |
+
οὕτωσ
|
123 |
+
οὖν
|
124 |
+
οὗ
|
125 |
+
οὗτος
|
126 |
+
οὗτοσ
|
127 |
+
παρ
|
128 |
+
παρά
|
129 |
+
παρα
|
130 |
+
παρὰ
|
131 |
+
περί
|
132 |
+
περὶ
|
133 |
+
ποια
|
134 |
+
ποιεσ
|
135 |
+
ποιο
|
136 |
+
ποιοι
|
137 |
+
ποιοσ
|
138 |
+
ποιουσ
|
139 |
+
ποιων
|
140 |
+
ποτε
|
141 |
+
που
|
142 |
+
ποῦ
|
143 |
+
προ
|
144 |
+
προσ
|
145 |
+
πρόσ
|
146 |
+
πρὸ
|
147 |
+
πρὸς
|
148 |
+
πως
|
149 |
+
πωσ
|
150 |
+
σε
|
151 |
+
στη
|
152 |
+
στην
|
153 |
+
στο
|
154 |
+
στον
|
155 |
+
σόσ
|
156 |
+
σύ
|
157 |
+
σύν
|
158 |
+
σὸς
|
159 |
+
σὺ
|
160 |
+
σὺν
|
161 |
+
τά
|
162 |
+
τήν
|
163 |
+
τί
|
164 |
+
τίς
|
165 |
+
τίσ
|
166 |
+
τα
|
167 |
+
ταῖς
|
168 |
+
τε
|
169 |
+
την
|
170 |
+
τησ
|
171 |
+
τι
|
172 |
+
τινα
|
173 |
+
τις
|
174 |
+
τισ
|
175 |
+
το
|
176 |
+
τοί
|
177 |
+
τοι
|
178 |
+
τοιοῦτος
|
179 |
+
τοιοῦτοσ
|
180 |
+
τον
|
181 |
+
τοτε
|
182 |
+
του
|
183 |
+
τούσ
|
184 |
+
τοὺς
|
185 |
+
τοῖς
|
186 |
+
τοῦ
|
187 |
+
των
|
188 |
+
τό
|
189 |
+
τόν
|
190 |
+
τότε
|
191 |
+
τὰ
|
192 |
+
τὰς
|
193 |
+
τὴν
|
194 |
+
τὸ
|
195 |
+
τὸν
|
196 |
+
τῆς
|
197 |
+
τῆσ
|
198 |
+
τῇ
|
199 |
+
τῶν
|
200 |
+
τῷ
|
201 |
+
ωσ
|
202 |
+
ἀλλ'
|
203 |
+
ἀλλά
|
204 |
+
ἀλλὰ
|
205 |
+
ἀλλ’
|
206 |
+
ἀπ
|
207 |
+
ἀπό
|
208 |
+
ἀπὸ
|
209 |
+
ἀφ
|
210 |
+
ἂν
|
211 |
+
ἃ
|
212 |
+
ἄλλος
|
213 |
+
ἄλλοσ
|
214 |
+
ἄν
|
215 |
+
ἄρα
|
216 |
+
ἅμα
|
217 |
+
ἐάν
|
218 |
+
ἐγώ
|
219 |
+
ἐγὼ
|
220 |
+
ἐκ
|
221 |
+
ἐμόσ
|
222 |
+
ἐμὸς
|
223 |
+
ἐν
|
224 |
+
ἐξ
|
225 |
+
ἐπί
|
226 |
+
ἐπεὶ
|
227 |
+
ἐπὶ
|
228 |
+
ἐστι
|
229 |
+
ἐφ
|
230 |
+
ἐὰν
|
231 |
+
ἑαυτοῦ
|
232 |
+
ἔτι
|
233 |
+
ἡ
|
234 |
+
ἢ
|
235 |
+
ἣ
|
236 |
+
ἤ
|
237 |
+
ἥ
|
238 |
+
ἧς
|
239 |
+
ἵνα
|
240 |
+
ὁ
|
241 |
+
ὃ
|
242 |
+
ὃν
|
243 |
+
ὃς
|
244 |
+
ὅ
|
245 |
+
ὅδε
|
246 |
+
ὅθεν
|
247 |
+
ὅπερ
|
248 |
+
ὅς
|
249 |
+
ὅσ
|
250 |
+
ὅστις
|
251 |
+
ὅστισ
|
252 |
+
ὅτε
|
253 |
+
ὅτι
|
254 |
+
ὑμόσ
|
255 |
+
ὑπ
|
256 |
+
ὑπέρ
|
257 |
+
ὑπό
|
258 |
+
ὑπὲρ
|
259 |
+
ὑπὸ
|
260 |
+
ὡς
|
261 |
+
ὡσ
|
262 |
+
ὥς
|
263 |
+
ὥστε
|
264 |
+
ὦ
|
265 |
+
ᾧ
|
data/nltk_data/corpora/stopwords/hebrew
ADDED
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
אני
|
2 |
+
את
|
3 |
+
אתה
|
4 |
+
אנחנו
|
5 |
+
אתן
|
6 |
+
אתם
|
7 |
+
הם
|
8 |
+
הן
|
9 |
+
היא
|
10 |
+
הוא
|
11 |
+
שלי
|
12 |
+
שלו
|
13 |
+
שלך
|
14 |
+
שלה
|
15 |
+
שלנו
|
16 |
+
שלכם
|
17 |
+
שלכן
|
18 |
+
שלהם
|
19 |
+
שלהן
|
20 |
+
לי
|
21 |
+
לו
|
22 |
+
לה
|
23 |
+
לנו
|
24 |
+
לכם
|
25 |
+
לכן
|
26 |
+
להם
|
27 |
+
להן
|
28 |
+
אותה
|
29 |
+
אותו
|
30 |
+
זה
|
31 |
+
זאת
|
32 |
+
אלה
|
33 |
+
אלו
|
34 |
+
תחת
|
35 |
+
מתחת
|
36 |
+
מעל
|
37 |
+
בין
|
38 |
+
עם
|
39 |
+
עד
|
40 |
+
נגר
|
41 |
+
על
|
42 |
+
אל
|
43 |
+
מול
|
44 |
+
של
|
45 |
+
אצל
|
46 |
+
כמו
|
47 |
+
אחר
|
48 |
+
אותו
|
49 |
+
בלי
|
50 |
+
לפני
|
51 |
+
אחרי
|
52 |
+
מאחורי
|
53 |
+
עלי
|
54 |
+
עליו
|
55 |
+
עליה
|
56 |
+
עליך
|
57 |
+
עלינו
|
58 |
+
עליכם
|
59 |
+
לעיכן
|
60 |
+
עליהם
|
61 |
+
עליהן
|
62 |
+
כל
|
63 |
+
כולם
|
64 |
+
כולן
|
65 |
+
כך
|
66 |
+
ככה
|
67 |
+
כזה
|
68 |
+
זה
|
69 |
+
זות
|
70 |
+
אותי
|
71 |
+
אותה
|
72 |
+
אותם
|
73 |
+
אותך
|
74 |
+
אותו
|
75 |
+
אותן
|
76 |
+
אותנו
|
77 |
+
ואת
|
78 |
+
את
|
79 |
+
אתכם
|
80 |
+
אתכן
|
81 |
+
איתי
|
82 |
+
איתו
|
83 |
+
איתך
|
84 |
+
איתה
|
85 |
+
איתם
|
86 |
+
איתן
|
87 |
+
איתנו
|
88 |
+
איתכם
|
89 |
+
איתכן
|
90 |
+
יהיה
|
91 |
+
תהיה
|
92 |
+
היתי
|
93 |
+
היתה
|
94 |
+
היה
|
95 |
+
להיות
|
96 |
+
עצמי
|
97 |
+
עצמו
|
98 |
+
עצמה
|
99 |
+
עצמם
|
100 |
+
עצמן
|
101 |
+
עצמנו
|
102 |
+
עצמהם
|
103 |
+
עצמהן
|
104 |
+
מי
|
105 |
+
מה
|
106 |
+
איפה
|
107 |
+
היכן
|
108 |
+
במקום שבו
|
109 |
+
אם
|
110 |
+
לאן
|
111 |
+
למקום שבו
|
112 |
+
מקום בו
|
113 |
+
איזה
|
114 |
+
מהיכן
|
115 |
+
איך
|
116 |
+
כיצד
|
117 |
+
באיזו מידה
|
118 |
+
מתי
|
119 |
+
בשעה ש
|
120 |
+
כאשר
|
121 |
+
כש
|
122 |
+
למרות
|
123 |
+
לפני
|
124 |
+
אחרי
|
125 |
+
מאיזו סיבה
|
126 |
+
הסיבה שבגללה
|
127 |
+
למה
|
128 |
+
מדוע
|
129 |
+
לאיזו תכלית
|
130 |
+
כי
|
131 |
+
יש
|
132 |
+
אין
|
133 |
+
אך
|
134 |
+
מנין
|
135 |
+
מאין
|
136 |
+
מאיפה
|
137 |
+
יכל
|
138 |
+
יכלה
|
139 |
+
יכלו
|
140 |
+
יכול
|
141 |
+
יכולה
|
142 |
+
יכולים
|
143 |
+
יכולות
|
144 |
+
יוכלו
|
145 |
+
יוכל
|
146 |
+
מסוגל
|
147 |
+
לא
|
148 |
+
רק
|
149 |
+
אולי
|
150 |
+
אין
|
151 |
+
לאו
|
152 |
+
אי
|
153 |
+
כלל
|
154 |
+
נגד
|
155 |
+
אם
|
156 |
+
עם
|
157 |
+
אל
|
158 |
+
אלה
|
159 |
+
אלו
|
160 |
+
אף
|
161 |
+
על
|
162 |
+
מעל
|
163 |
+
מתחת
|
164 |
+
מצד
|
165 |
+
בשביל
|
166 |
+
לבין
|
167 |
+
באמצע
|
168 |
+
בתוך
|
169 |
+
דרך
|
170 |
+
מבעד
|
171 |
+
באמצעות
|
172 |
+
למעלה
|
173 |
+
למטה
|
174 |
+
מחוץ
|
175 |
+
מן
|
176 |
+
לעבר
|
177 |
+
מכאן
|
178 |
+
כאן
|
179 |
+
הנה
|
180 |
+
הרי
|
181 |
+
פה
|
182 |
+
שם
|
183 |
+
אך
|
184 |
+
ברם
|
185 |
+
שוב
|
186 |
+
אבל
|
187 |
+
מבלי
|
188 |
+
בלי
|
189 |
+
מלבד
|
190 |
+
רק
|
191 |
+
בגלל
|
192 |
+
מכיוון
|
193 |
+
עד
|
194 |
+
אשר
|
195 |
+
ואילו
|
196 |
+
למרות
|
197 |
+
אס
|
198 |
+
כמו
|
199 |
+
כפי
|
200 |
+
אז
|
201 |
+
אחרי
|
202 |
+
כן
|
203 |
+
לכן
|
204 |
+
לפיכך
|
205 |
+
מאד
|
206 |
+
עז
|
207 |
+
מעט
|
208 |
+
מעטים
|
209 |
+
במידה
|
210 |
+
שוב
|
211 |
+
יותר
|
212 |
+
מדי
|
213 |
+
גם
|
214 |
+
כן
|
215 |
+
נו
|
216 |
+
אחר
|
217 |
+
אחרת
|
218 |
+
אחרים
|
219 |
+
אחרות
|
220 |
+
אשר
|
221 |
+
או
|
data/nltk_data/corpora/stopwords/hinglish
ADDED
@@ -0,0 +1,1036 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
aadi
|
3 |
+
aaj
|
4 |
+
aap
|
5 |
+
aapne
|
6 |
+
aata
|
7 |
+
aati
|
8 |
+
aaya
|
9 |
+
aaye
|
10 |
+
ab
|
11 |
+
abbe
|
12 |
+
abbey
|
13 |
+
abe
|
14 |
+
abhi
|
15 |
+
able
|
16 |
+
about
|
17 |
+
above
|
18 |
+
accha
|
19 |
+
according
|
20 |
+
accordingly
|
21 |
+
acha
|
22 |
+
achcha
|
23 |
+
across
|
24 |
+
actually
|
25 |
+
after
|
26 |
+
afterwards
|
27 |
+
again
|
28 |
+
against
|
29 |
+
agar
|
30 |
+
ain
|
31 |
+
aint
|
32 |
+
ain't
|
33 |
+
aisa
|
34 |
+
aise
|
35 |
+
aisi
|
36 |
+
alag
|
37 |
+
all
|
38 |
+
allow
|
39 |
+
allows
|
40 |
+
almost
|
41 |
+
alone
|
42 |
+
along
|
43 |
+
already
|
44 |
+
also
|
45 |
+
although
|
46 |
+
always
|
47 |
+
am
|
48 |
+
among
|
49 |
+
amongst
|
50 |
+
an
|
51 |
+
and
|
52 |
+
andar
|
53 |
+
another
|
54 |
+
any
|
55 |
+
anybody
|
56 |
+
anyhow
|
57 |
+
anyone
|
58 |
+
anything
|
59 |
+
anyway
|
60 |
+
anyways
|
61 |
+
anywhere
|
62 |
+
ap
|
63 |
+
apan
|
64 |
+
apart
|
65 |
+
apna
|
66 |
+
apnaa
|
67 |
+
apne
|
68 |
+
apni
|
69 |
+
appear
|
70 |
+
are
|
71 |
+
aren
|
72 |
+
arent
|
73 |
+
aren't
|
74 |
+
around
|
75 |
+
arre
|
76 |
+
as
|
77 |
+
aside
|
78 |
+
ask
|
79 |
+
asking
|
80 |
+
at
|
81 |
+
aur
|
82 |
+
avum
|
83 |
+
aya
|
84 |
+
aye
|
85 |
+
baad
|
86 |
+
baar
|
87 |
+
bad
|
88 |
+
bahut
|
89 |
+
bana
|
90 |
+
banae
|
91 |
+
banai
|
92 |
+
banao
|
93 |
+
banaya
|
94 |
+
banaye
|
95 |
+
banayi
|
96 |
+
banda
|
97 |
+
bande
|
98 |
+
bandi
|
99 |
+
bane
|
100 |
+
bani
|
101 |
+
bas
|
102 |
+
bata
|
103 |
+
batao
|
104 |
+
bc
|
105 |
+
be
|
106 |
+
became
|
107 |
+
because
|
108 |
+
become
|
109 |
+
becomes
|
110 |
+
becoming
|
111 |
+
been
|
112 |
+
before
|
113 |
+
beforehand
|
114 |
+
behind
|
115 |
+
being
|
116 |
+
below
|
117 |
+
beside
|
118 |
+
besides
|
119 |
+
best
|
120 |
+
better
|
121 |
+
between
|
122 |
+
beyond
|
123 |
+
bhai
|
124 |
+
bheetar
|
125 |
+
bhi
|
126 |
+
bhitar
|
127 |
+
bht
|
128 |
+
bilkul
|
129 |
+
bohot
|
130 |
+
bol
|
131 |
+
bola
|
132 |
+
bole
|
133 |
+
boli
|
134 |
+
bolo
|
135 |
+
bolta
|
136 |
+
bolte
|
137 |
+
bolti
|
138 |
+
both
|
139 |
+
brief
|
140 |
+
bro
|
141 |
+
btw
|
142 |
+
but
|
143 |
+
by
|
144 |
+
came
|
145 |
+
can
|
146 |
+
cannot
|
147 |
+
cant
|
148 |
+
can't
|
149 |
+
cause
|
150 |
+
causes
|
151 |
+
certain
|
152 |
+
certainly
|
153 |
+
chahiye
|
154 |
+
chaiye
|
155 |
+
chal
|
156 |
+
chalega
|
157 |
+
chhaiye
|
158 |
+
clearly
|
159 |
+
c'mon
|
160 |
+
com
|
161 |
+
come
|
162 |
+
comes
|
163 |
+
could
|
164 |
+
couldn
|
165 |
+
couldnt
|
166 |
+
couldn't
|
167 |
+
d
|
168 |
+
de
|
169 |
+
dede
|
170 |
+
dega
|
171 |
+
degi
|
172 |
+
dekh
|
173 |
+
dekha
|
174 |
+
dekhe
|
175 |
+
dekhi
|
176 |
+
dekho
|
177 |
+
denge
|
178 |
+
dhang
|
179 |
+
di
|
180 |
+
did
|
181 |
+
didn
|
182 |
+
didnt
|
183 |
+
didn't
|
184 |
+
dijiye
|
185 |
+
diya
|
186 |
+
diyaa
|
187 |
+
diye
|
188 |
+
diyo
|
189 |
+
do
|
190 |
+
does
|
191 |
+
doesn
|
192 |
+
doesnt
|
193 |
+
doesn't
|
194 |
+
doing
|
195 |
+
done
|
196 |
+
dono
|
197 |
+
dont
|
198 |
+
don't
|
199 |
+
doosra
|
200 |
+
doosre
|
201 |
+
down
|
202 |
+
downwards
|
203 |
+
dude
|
204 |
+
dunga
|
205 |
+
dungi
|
206 |
+
during
|
207 |
+
dusra
|
208 |
+
dusre
|
209 |
+
dusri
|
210 |
+
dvaara
|
211 |
+
dvara
|
212 |
+
dwaara
|
213 |
+
dwara
|
214 |
+
each
|
215 |
+
edu
|
216 |
+
eg
|
217 |
+
eight
|
218 |
+
either
|
219 |
+
ek
|
220 |
+
else
|
221 |
+
elsewhere
|
222 |
+
enough
|
223 |
+
etc
|
224 |
+
even
|
225 |
+
ever
|
226 |
+
every
|
227 |
+
everybody
|
228 |
+
everyone
|
229 |
+
everything
|
230 |
+
everywhere
|
231 |
+
ex
|
232 |
+
exactly
|
233 |
+
example
|
234 |
+
except
|
235 |
+
far
|
236 |
+
few
|
237 |
+
fifth
|
238 |
+
fir
|
239 |
+
first
|
240 |
+
five
|
241 |
+
followed
|
242 |
+
following
|
243 |
+
follows
|
244 |
+
for
|
245 |
+
forth
|
246 |
+
four
|
247 |
+
from
|
248 |
+
further
|
249 |
+
furthermore
|
250 |
+
gaya
|
251 |
+
gaye
|
252 |
+
gayi
|
253 |
+
get
|
254 |
+
gets
|
255 |
+
getting
|
256 |
+
ghar
|
257 |
+
given
|
258 |
+
gives
|
259 |
+
go
|
260 |
+
goes
|
261 |
+
going
|
262 |
+
gone
|
263 |
+
good
|
264 |
+
got
|
265 |
+
gotten
|
266 |
+
greetings
|
267 |
+
haan
|
268 |
+
had
|
269 |
+
hadd
|
270 |
+
hadn
|
271 |
+
hadnt
|
272 |
+
hadn't
|
273 |
+
hai
|
274 |
+
hain
|
275 |
+
hamara
|
276 |
+
hamare
|
277 |
+
hamari
|
278 |
+
hamne
|
279 |
+
han
|
280 |
+
happens
|
281 |
+
har
|
282 |
+
hardly
|
283 |
+
has
|
284 |
+
hasn
|
285 |
+
hasnt
|
286 |
+
hasn't
|
287 |
+
have
|
288 |
+
haven
|
289 |
+
havent
|
290 |
+
haven't
|
291 |
+
having
|
292 |
+
he
|
293 |
+
hello
|
294 |
+
help
|
295 |
+
hence
|
296 |
+
her
|
297 |
+
here
|
298 |
+
hereafter
|
299 |
+
hereby
|
300 |
+
herein
|
301 |
+
here's
|
302 |
+
hereupon
|
303 |
+
hers
|
304 |
+
herself
|
305 |
+
he's
|
306 |
+
hi
|
307 |
+
him
|
308 |
+
himself
|
309 |
+
his
|
310 |
+
hither
|
311 |
+
hm
|
312 |
+
hmm
|
313 |
+
ho
|
314 |
+
hoga
|
315 |
+
hoge
|
316 |
+
hogi
|
317 |
+
hona
|
318 |
+
honaa
|
319 |
+
hone
|
320 |
+
honge
|
321 |
+
hongi
|
322 |
+
honi
|
323 |
+
hopefully
|
324 |
+
hota
|
325 |
+
hotaa
|
326 |
+
hote
|
327 |
+
hoti
|
328 |
+
how
|
329 |
+
howbeit
|
330 |
+
however
|
331 |
+
hoyenge
|
332 |
+
hoyengi
|
333 |
+
hu
|
334 |
+
hua
|
335 |
+
hue
|
336 |
+
huh
|
337 |
+
hui
|
338 |
+
hum
|
339 |
+
humein
|
340 |
+
humne
|
341 |
+
hun
|
342 |
+
huye
|
343 |
+
huyi
|
344 |
+
i
|
345 |
+
i'd
|
346 |
+
idk
|
347 |
+
ie
|
348 |
+
if
|
349 |
+
i'll
|
350 |
+
i'm
|
351 |
+
imo
|
352 |
+
in
|
353 |
+
inasmuch
|
354 |
+
inc
|
355 |
+
inhe
|
356 |
+
inhi
|
357 |
+
inho
|
358 |
+
inka
|
359 |
+
inkaa
|
360 |
+
inke
|
361 |
+
inki
|
362 |
+
inn
|
363 |
+
inner
|
364 |
+
inse
|
365 |
+
insofar
|
366 |
+
into
|
367 |
+
inward
|
368 |
+
is
|
369 |
+
ise
|
370 |
+
isi
|
371 |
+
iska
|
372 |
+
iskaa
|
373 |
+
iske
|
374 |
+
iski
|
375 |
+
isme
|
376 |
+
isn
|
377 |
+
isne
|
378 |
+
isnt
|
379 |
+
isn't
|
380 |
+
iss
|
381 |
+
isse
|
382 |
+
issi
|
383 |
+
isski
|
384 |
+
it
|
385 |
+
it'd
|
386 |
+
it'll
|
387 |
+
itna
|
388 |
+
itne
|
389 |
+
itni
|
390 |
+
itno
|
391 |
+
its
|
392 |
+
it's
|
393 |
+
itself
|
394 |
+
ityaadi
|
395 |
+
ityadi
|
396 |
+
i've
|
397 |
+
ja
|
398 |
+
jaa
|
399 |
+
jab
|
400 |
+
jabh
|
401 |
+
jaha
|
402 |
+
jahaan
|
403 |
+
jahan
|
404 |
+
jaisa
|
405 |
+
jaise
|
406 |
+
jaisi
|
407 |
+
jata
|
408 |
+
jayega
|
409 |
+
jidhar
|
410 |
+
jin
|
411 |
+
jinhe
|
412 |
+
jinhi
|
413 |
+
jinho
|
414 |
+
jinhone
|
415 |
+
jinka
|
416 |
+
jinke
|
417 |
+
jinki
|
418 |
+
jinn
|
419 |
+
jis
|
420 |
+
jise
|
421 |
+
jiska
|
422 |
+
jiske
|
423 |
+
jiski
|
424 |
+
jisme
|
425 |
+
jiss
|
426 |
+
jisse
|
427 |
+
jitna
|
428 |
+
jitne
|
429 |
+
jitni
|
430 |
+
jo
|
431 |
+
just
|
432 |
+
jyaada
|
433 |
+
jyada
|
434 |
+
k
|
435 |
+
ka
|
436 |
+
kaafi
|
437 |
+
kab
|
438 |
+
kabhi
|
439 |
+
kafi
|
440 |
+
kaha
|
441 |
+
kahaa
|
442 |
+
kahaan
|
443 |
+
kahan
|
444 |
+
kahi
|
445 |
+
kahin
|
446 |
+
kahte
|
447 |
+
kaisa
|
448 |
+
kaise
|
449 |
+
kaisi
|
450 |
+
kal
|
451 |
+
kam
|
452 |
+
kar
|
453 |
+
kara
|
454 |
+
kare
|
455 |
+
karega
|
456 |
+
karegi
|
457 |
+
karen
|
458 |
+
karenge
|
459 |
+
kari
|
460 |
+
karke
|
461 |
+
karna
|
462 |
+
karne
|
463 |
+
karni
|
464 |
+
karo
|
465 |
+
karta
|
466 |
+
karte
|
467 |
+
karti
|
468 |
+
karu
|
469 |
+
karun
|
470 |
+
karunga
|
471 |
+
karungi
|
472 |
+
kaun
|
473 |
+
kaunsa
|
474 |
+
kayi
|
475 |
+
kch
|
476 |
+
ke
|
477 |
+
keep
|
478 |
+
keeps
|
479 |
+
keh
|
480 |
+
kehte
|
481 |
+
kept
|
482 |
+
khud
|
483 |
+
ki
|
484 |
+
kin
|
485 |
+
kine
|
486 |
+
kinhe
|
487 |
+
kinho
|
488 |
+
kinka
|
489 |
+
kinke
|
490 |
+
kinki
|
491 |
+
kinko
|
492 |
+
kinn
|
493 |
+
kino
|
494 |
+
kis
|
495 |
+
kise
|
496 |
+
kisi
|
497 |
+
kiska
|
498 |
+
kiske
|
499 |
+
kiski
|
500 |
+
kisko
|
501 |
+
kisliye
|
502 |
+
kisne
|
503 |
+
kitna
|
504 |
+
kitne
|
505 |
+
kitni
|
506 |
+
kitno
|
507 |
+
kiya
|
508 |
+
kiye
|
509 |
+
know
|
510 |
+
known
|
511 |
+
knows
|
512 |
+
ko
|
513 |
+
koi
|
514 |
+
kon
|
515 |
+
konsa
|
516 |
+
koyi
|
517 |
+
krna
|
518 |
+
krne
|
519 |
+
kuch
|
520 |
+
kuchch
|
521 |
+
kuchh
|
522 |
+
kul
|
523 |
+
kull
|
524 |
+
kya
|
525 |
+
kyaa
|
526 |
+
kyu
|
527 |
+
kyuki
|
528 |
+
kyun
|
529 |
+
kyunki
|
530 |
+
lagta
|
531 |
+
lagte
|
532 |
+
lagti
|
533 |
+
last
|
534 |
+
lately
|
535 |
+
later
|
536 |
+
le
|
537 |
+
least
|
538 |
+
lekar
|
539 |
+
lekin
|
540 |
+
less
|
541 |
+
lest
|
542 |
+
let
|
543 |
+
let's
|
544 |
+
li
|
545 |
+
like
|
546 |
+
liked
|
547 |
+
likely
|
548 |
+
little
|
549 |
+
liya
|
550 |
+
liye
|
551 |
+
ll
|
552 |
+
lo
|
553 |
+
log
|
554 |
+
logon
|
555 |
+
lol
|
556 |
+
look
|
557 |
+
looking
|
558 |
+
looks
|
559 |
+
ltd
|
560 |
+
lunga
|
561 |
+
m
|
562 |
+
maan
|
563 |
+
maana
|
564 |
+
maane
|
565 |
+
maani
|
566 |
+
maano
|
567 |
+
magar
|
568 |
+
mai
|
569 |
+
main
|
570 |
+
maine
|
571 |
+
mainly
|
572 |
+
mana
|
573 |
+
mane
|
574 |
+
mani
|
575 |
+
mano
|
576 |
+
many
|
577 |
+
mat
|
578 |
+
may
|
579 |
+
maybe
|
580 |
+
me
|
581 |
+
mean
|
582 |
+
meanwhile
|
583 |
+
mein
|
584 |
+
mera
|
585 |
+
mere
|
586 |
+
merely
|
587 |
+
meri
|
588 |
+
might
|
589 |
+
mightn
|
590 |
+
mightnt
|
591 |
+
mightn't
|
592 |
+
mil
|
593 |
+
mjhe
|
594 |
+
more
|
595 |
+
moreover
|
596 |
+
most
|
597 |
+
mostly
|
598 |
+
much
|
599 |
+
mujhe
|
600 |
+
must
|
601 |
+
mustn
|
602 |
+
mustnt
|
603 |
+
mustn't
|
604 |
+
my
|
605 |
+
myself
|
606 |
+
na
|
607 |
+
naa
|
608 |
+
naah
|
609 |
+
nahi
|
610 |
+
nahin
|
611 |
+
nai
|
612 |
+
name
|
613 |
+
namely
|
614 |
+
nd
|
615 |
+
ne
|
616 |
+
near
|
617 |
+
nearly
|
618 |
+
necessary
|
619 |
+
neeche
|
620 |
+
need
|
621 |
+
needn
|
622 |
+
neednt
|
623 |
+
needn't
|
624 |
+
needs
|
625 |
+
neither
|
626 |
+
never
|
627 |
+
nevertheless
|
628 |
+
new
|
629 |
+
next
|
630 |
+
nhi
|
631 |
+
nine
|
632 |
+
no
|
633 |
+
nobody
|
634 |
+
non
|
635 |
+
none
|
636 |
+
noone
|
637 |
+
nope
|
638 |
+
nor
|
639 |
+
normally
|
640 |
+
not
|
641 |
+
nothing
|
642 |
+
novel
|
643 |
+
now
|
644 |
+
nowhere
|
645 |
+
o
|
646 |
+
obviously
|
647 |
+
of
|
648 |
+
off
|
649 |
+
often
|
650 |
+
oh
|
651 |
+
ok
|
652 |
+
okay
|
653 |
+
old
|
654 |
+
on
|
655 |
+
once
|
656 |
+
one
|
657 |
+
ones
|
658 |
+
only
|
659 |
+
onto
|
660 |
+
or
|
661 |
+
other
|
662 |
+
others
|
663 |
+
otherwise
|
664 |
+
ought
|
665 |
+
our
|
666 |
+
ours
|
667 |
+
ourselves
|
668 |
+
out
|
669 |
+
outside
|
670 |
+
over
|
671 |
+
overall
|
672 |
+
own
|
673 |
+
par
|
674 |
+
pata
|
675 |
+
pe
|
676 |
+
pehla
|
677 |
+
pehle
|
678 |
+
pehli
|
679 |
+
people
|
680 |
+
per
|
681 |
+
perhaps
|
682 |
+
phla
|
683 |
+
phle
|
684 |
+
phli
|
685 |
+
placed
|
686 |
+
please
|
687 |
+
plus
|
688 |
+
poora
|
689 |
+
poori
|
690 |
+
provides
|
691 |
+
pura
|
692 |
+
puri
|
693 |
+
q
|
694 |
+
que
|
695 |
+
quite
|
696 |
+
raha
|
697 |
+
rahaa
|
698 |
+
rahe
|
699 |
+
rahi
|
700 |
+
rakh
|
701 |
+
rakha
|
702 |
+
rakhe
|
703 |
+
rakhen
|
704 |
+
rakhi
|
705 |
+
rakho
|
706 |
+
rather
|
707 |
+
re
|
708 |
+
really
|
709 |
+
reasonably
|
710 |
+
regarding
|
711 |
+
regardless
|
712 |
+
regards
|
713 |
+
rehte
|
714 |
+
rha
|
715 |
+
rhaa
|
716 |
+
rhe
|
717 |
+
rhi
|
718 |
+
ri
|
719 |
+
right
|
720 |
+
s
|
721 |
+
sa
|
722 |
+
saara
|
723 |
+
saare
|
724 |
+
saath
|
725 |
+
sab
|
726 |
+
sabhi
|
727 |
+
sabse
|
728 |
+
sahi
|
729 |
+
said
|
730 |
+
sakta
|
731 |
+
saktaa
|
732 |
+
sakte
|
733 |
+
sakti
|
734 |
+
same
|
735 |
+
sang
|
736 |
+
sara
|
737 |
+
sath
|
738 |
+
saw
|
739 |
+
say
|
740 |
+
saying
|
741 |
+
says
|
742 |
+
se
|
743 |
+
second
|
744 |
+
secondly
|
745 |
+
see
|
746 |
+
seeing
|
747 |
+
seem
|
748 |
+
seemed
|
749 |
+
seeming
|
750 |
+
seems
|
751 |
+
seen
|
752 |
+
self
|
753 |
+
selves
|
754 |
+
sensible
|
755 |
+
sent
|
756 |
+
serious
|
757 |
+
seriously
|
758 |
+
seven
|
759 |
+
several
|
760 |
+
shall
|
761 |
+
shan
|
762 |
+
shant
|
763 |
+
shan't
|
764 |
+
she
|
765 |
+
she's
|
766 |
+
should
|
767 |
+
shouldn
|
768 |
+
shouldnt
|
769 |
+
shouldn't
|
770 |
+
should've
|
771 |
+
si
|
772 |
+
since
|
773 |
+
six
|
774 |
+
so
|
775 |
+
soch
|
776 |
+
some
|
777 |
+
somebody
|
778 |
+
somehow
|
779 |
+
someone
|
780 |
+
something
|
781 |
+
sometime
|
782 |
+
sometimes
|
783 |
+
somewhat
|
784 |
+
somewhere
|
785 |
+
soon
|
786 |
+
still
|
787 |
+
sub
|
788 |
+
such
|
789 |
+
sup
|
790 |
+
sure
|
791 |
+
t
|
792 |
+
tab
|
793 |
+
tabh
|
794 |
+
tak
|
795 |
+
take
|
796 |
+
taken
|
797 |
+
tarah
|
798 |
+
teen
|
799 |
+
teeno
|
800 |
+
teesra
|
801 |
+
teesre
|
802 |
+
teesri
|
803 |
+
tell
|
804 |
+
tends
|
805 |
+
tera
|
806 |
+
tere
|
807 |
+
teri
|
808 |
+
th
|
809 |
+
tha
|
810 |
+
than
|
811 |
+
thank
|
812 |
+
thanks
|
813 |
+
thanx
|
814 |
+
that
|
815 |
+
that'll
|
816 |
+
thats
|
817 |
+
that's
|
818 |
+
the
|
819 |
+
theek
|
820 |
+
their
|
821 |
+
theirs
|
822 |
+
them
|
823 |
+
themselves
|
824 |
+
then
|
825 |
+
thence
|
826 |
+
there
|
827 |
+
thereafter
|
828 |
+
thereby
|
829 |
+
therefore
|
830 |
+
therein
|
831 |
+
theres
|
832 |
+
there's
|
833 |
+
thereupon
|
834 |
+
these
|
835 |
+
they
|
836 |
+
they'd
|
837 |
+
they'll
|
838 |
+
they're
|
839 |
+
they've
|
840 |
+
thi
|
841 |
+
thik
|
842 |
+
thing
|
843 |
+
think
|
844 |
+
thinking
|
845 |
+
third
|
846 |
+
this
|
847 |
+
tho
|
848 |
+
thoda
|
849 |
+
thodi
|
850 |
+
thorough
|
851 |
+
thoroughly
|
852 |
+
those
|
853 |
+
though
|
854 |
+
thought
|
855 |
+
three
|
856 |
+
through
|
857 |
+
throughout
|
858 |
+
thru
|
859 |
+
thus
|
860 |
+
tjhe
|
861 |
+
to
|
862 |
+
together
|
863 |
+
toh
|
864 |
+
too
|
865 |
+
took
|
866 |
+
toward
|
867 |
+
towards
|
868 |
+
tried
|
869 |
+
tries
|
870 |
+
true
|
871 |
+
truly
|
872 |
+
try
|
873 |
+
trying
|
874 |
+
tu
|
875 |
+
tujhe
|
876 |
+
tum
|
877 |
+
tumhara
|
878 |
+
tumhare
|
879 |
+
tumhari
|
880 |
+
tune
|
881 |
+
twice
|
882 |
+
two
|
883 |
+
um
|
884 |
+
umm
|
885 |
+
un
|
886 |
+
under
|
887 |
+
unhe
|
888 |
+
unhi
|
889 |
+
unho
|
890 |
+
unhone
|
891 |
+
unka
|
892 |
+
unkaa
|
893 |
+
unke
|
894 |
+
unki
|
895 |
+
unko
|
896 |
+
unless
|
897 |
+
unlikely
|
898 |
+
unn
|
899 |
+
unse
|
900 |
+
until
|
901 |
+
unto
|
902 |
+
up
|
903 |
+
upar
|
904 |
+
upon
|
905 |
+
us
|
906 |
+
use
|
907 |
+
used
|
908 |
+
useful
|
909 |
+
uses
|
910 |
+
usi
|
911 |
+
using
|
912 |
+
uska
|
913 |
+
uske
|
914 |
+
usne
|
915 |
+
uss
|
916 |
+
usse
|
917 |
+
ussi
|
918 |
+
usually
|
919 |
+
vaala
|
920 |
+
vaale
|
921 |
+
vaali
|
922 |
+
vahaan
|
923 |
+
vahan
|
924 |
+
vahi
|
925 |
+
vahin
|
926 |
+
vaisa
|
927 |
+
vaise
|
928 |
+
vaisi
|
929 |
+
vala
|
930 |
+
vale
|
931 |
+
vali
|
932 |
+
various
|
933 |
+
ve
|
934 |
+
very
|
935 |
+
via
|
936 |
+
viz
|
937 |
+
vo
|
938 |
+
waala
|
939 |
+
waale
|
940 |
+
waali
|
941 |
+
wagaira
|
942 |
+
wagairah
|
943 |
+
wagerah
|
944 |
+
waha
|
945 |
+
wahaan
|
946 |
+
wahan
|
947 |
+
wahi
|
948 |
+
wahin
|
949 |
+
waisa
|
950 |
+
waise
|
951 |
+
waisi
|
952 |
+
wala
|
953 |
+
wale
|
954 |
+
wali
|
955 |
+
want
|
956 |
+
wants
|
957 |
+
was
|
958 |
+
wasn
|
959 |
+
wasnt
|
960 |
+
wasn't
|
961 |
+
way
|
962 |
+
we
|
963 |
+
we'd
|
964 |
+
well
|
965 |
+
we'll
|
966 |
+
went
|
967 |
+
were
|
968 |
+
we're
|
969 |
+
weren
|
970 |
+
werent
|
971 |
+
weren't
|
972 |
+
we've
|
973 |
+
what
|
974 |
+
whatever
|
975 |
+
what's
|
976 |
+
when
|
977 |
+
whence
|
978 |
+
whenever
|
979 |
+
where
|
980 |
+
whereafter
|
981 |
+
whereas
|
982 |
+
whereby
|
983 |
+
wherein
|
984 |
+
where's
|
985 |
+
whereupon
|
986 |
+
wherever
|
987 |
+
whether
|
988 |
+
which
|
989 |
+
while
|
990 |
+
who
|
991 |
+
whoever
|
992 |
+
whole
|
993 |
+
whom
|
994 |
+
who's
|
995 |
+
whose
|
996 |
+
why
|
997 |
+
will
|
998 |
+
willing
|
999 |
+
with
|
1000 |
+
within
|
1001 |
+
without
|
1002 |
+
wo
|
1003 |
+
woh
|
1004 |
+
wohi
|
1005 |
+
won
|
1006 |
+
wont
|
1007 |
+
won't
|
1008 |
+
would
|
1009 |
+
wouldn
|
1010 |
+
wouldnt
|
1011 |
+
wouldn't
|
1012 |
+
y
|
1013 |
+
ya
|
1014 |
+
yadi
|
1015 |
+
yah
|
1016 |
+
yaha
|
1017 |
+
yahaan
|
1018 |
+
yahan
|
1019 |
+
yahi
|
1020 |
+
yahin
|
1021 |
+
ye
|
1022 |
+
yeah
|
1023 |
+
yeh
|
1024 |
+
yehi
|
1025 |
+
yes
|
1026 |
+
yet
|
1027 |
+
you
|
1028 |
+
you'd
|
1029 |
+
you'll
|
1030 |
+
your
|
1031 |
+
you're
|
1032 |
+
yours
|
1033 |
+
yourself
|
1034 |
+
yourselves
|
1035 |
+
you've
|
1036 |
+
yup
|
data/nltk_data/corpora/stopwords/hungarian
ADDED
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
ahogy
|
3 |
+
ahol
|
4 |
+
aki
|
5 |
+
akik
|
6 |
+
akkor
|
7 |
+
alatt
|
8 |
+
által
|
9 |
+
általában
|
10 |
+
amely
|
11 |
+
amelyek
|
12 |
+
amelyekben
|
13 |
+
amelyeket
|
14 |
+
amelyet
|
15 |
+
amelynek
|
16 |
+
ami
|
17 |
+
amit
|
18 |
+
amolyan
|
19 |
+
amíg
|
20 |
+
amikor
|
21 |
+
át
|
22 |
+
abban
|
23 |
+
ahhoz
|
24 |
+
annak
|
25 |
+
arra
|
26 |
+
arról
|
27 |
+
az
|
28 |
+
azok
|
29 |
+
azon
|
30 |
+
azt
|
31 |
+
azzal
|
32 |
+
azért
|
33 |
+
aztán
|
34 |
+
azután
|
35 |
+
azonban
|
36 |
+
bár
|
37 |
+
be
|
38 |
+
belül
|
39 |
+
benne
|
40 |
+
cikk
|
41 |
+
cikkek
|
42 |
+
cikkeket
|
43 |
+
csak
|
44 |
+
de
|
45 |
+
e
|
46 |
+
eddig
|
47 |
+
egész
|
48 |
+
egy
|
49 |
+
egyes
|
50 |
+
egyetlen
|
51 |
+
egyéb
|
52 |
+
egyik
|
53 |
+
egyre
|
54 |
+
ekkor
|
55 |
+
el
|
56 |
+
elég
|
57 |
+
ellen
|
58 |
+
elõ
|
59 |
+
elõször
|
60 |
+
elõtt
|
61 |
+
elsõ
|
62 |
+
én
|
63 |
+
éppen
|
64 |
+
ebben
|
65 |
+
ehhez
|
66 |
+
emilyen
|
67 |
+
ennek
|
68 |
+
erre
|
69 |
+
ez
|
70 |
+
ezt
|
71 |
+
ezek
|
72 |
+
ezen
|
73 |
+
ezzel
|
74 |
+
ezért
|
75 |
+
és
|
76 |
+
fel
|
77 |
+
felé
|
78 |
+
hanem
|
79 |
+
hiszen
|
80 |
+
hogy
|
81 |
+
hogyan
|
82 |
+
igen
|
83 |
+
így
|
84 |
+
illetve
|
85 |
+
ill.
|
86 |
+
ill
|
87 |
+
ilyen
|
88 |
+
ilyenkor
|
89 |
+
ison
|
90 |
+
ismét
|
91 |
+
itt
|
92 |
+
jó
|
93 |
+
jól
|
94 |
+
jobban
|
95 |
+
kell
|
96 |
+
kellett
|
97 |
+
keresztül
|
98 |
+
keressünk
|
99 |
+
ki
|
100 |
+
kívül
|
101 |
+
között
|
102 |
+
közül
|
103 |
+
legalább
|
104 |
+
lehet
|
105 |
+
lehetett
|
106 |
+
legyen
|
107 |
+
lenne
|
108 |
+
lenni
|
109 |
+
lesz
|
110 |
+
lett
|
111 |
+
maga
|
112 |
+
magát
|
113 |
+
majd
|
114 |
+
majd
|
115 |
+
már
|
116 |
+
más
|
117 |
+
másik
|
118 |
+
meg
|
119 |
+
még
|
120 |
+
mellett
|
121 |
+
mert
|
122 |
+
mely
|
123 |
+
melyek
|
124 |
+
mi
|
125 |
+
mit
|
126 |
+
míg
|
127 |
+
miért
|
128 |
+
milyen
|
129 |
+
mikor
|
130 |
+
minden
|
131 |
+
mindent
|
132 |
+
mindenki
|
133 |
+
mindig
|
134 |
+
mint
|
135 |
+
mintha
|
136 |
+
mivel
|
137 |
+
most
|
138 |
+
nagy
|
139 |
+
nagyobb
|
140 |
+
nagyon
|
141 |
+
ne
|
142 |
+
néha
|
143 |
+
nekem
|
144 |
+
neki
|
145 |
+
nem
|
146 |
+
néhány
|
147 |
+
nélkül
|
148 |
+
nincs
|
149 |
+
olyan
|
150 |
+
ott
|
151 |
+
össze
|
152 |
+
õ
|
153 |
+
õk
|
154 |
+
õket
|
155 |
+
pedig
|
156 |
+
persze
|
157 |
+
rá
|
158 |
+
s
|
159 |
+
saját
|
160 |
+
sem
|
161 |
+
semmi
|
162 |
+
sok
|
163 |
+
sokat
|
164 |
+
sokkal
|
165 |
+
számára
|
166 |
+
szemben
|
167 |
+
szerint
|
168 |
+
szinte
|
169 |
+
talán
|
170 |
+
tehát
|
171 |
+
teljes
|
172 |
+
tovább
|
173 |
+
továbbá
|
174 |
+
több
|
175 |
+
úgy
|
176 |
+
ugyanis
|
177 |
+
új
|
178 |
+
újabb
|
179 |
+
újra
|
180 |
+
után
|
181 |
+
utána
|
182 |
+
utolsó
|
183 |
+
vagy
|
184 |
+
vagyis
|
185 |
+
valaki
|
186 |
+
valami
|
187 |
+
valamint
|
188 |
+
való
|
189 |
+
vagyok
|
190 |
+
van
|
191 |
+
vannak
|
192 |
+
volt
|
193 |
+
voltam
|
194 |
+
voltak
|
195 |
+
voltunk
|
196 |
+
vissza
|
197 |
+
vele
|
198 |
+
viszont
|
199 |
+
volna
|
data/nltk_data/corpora/stopwords/indonesian
ADDED
@@ -0,0 +1,758 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ada
|
2 |
+
adalah
|
3 |
+
adanya
|
4 |
+
adapun
|
5 |
+
agak
|
6 |
+
agaknya
|
7 |
+
agar
|
8 |
+
akan
|
9 |
+
akankah
|
10 |
+
akhir
|
11 |
+
akhiri
|
12 |
+
akhirnya
|
13 |
+
aku
|
14 |
+
akulah
|
15 |
+
amat
|
16 |
+
amatlah
|
17 |
+
anda
|
18 |
+
andalah
|
19 |
+
antar
|
20 |
+
antara
|
21 |
+
antaranya
|
22 |
+
apa
|
23 |
+
apaan
|
24 |
+
apabila
|
25 |
+
apakah
|
26 |
+
apalagi
|
27 |
+
apatah
|
28 |
+
artinya
|
29 |
+
asal
|
30 |
+
asalkan
|
31 |
+
atas
|
32 |
+
atau
|
33 |
+
ataukah
|
34 |
+
ataupun
|
35 |
+
awal
|
36 |
+
awalnya
|
37 |
+
bagai
|
38 |
+
bagaikan
|
39 |
+
bagaimana
|
40 |
+
bagaimanakah
|
41 |
+
bagaimanapun
|
42 |
+
bagi
|
43 |
+
bagian
|
44 |
+
bahkan
|
45 |
+
bahwa
|
46 |
+
bahwasanya
|
47 |
+
baik
|
48 |
+
bakal
|
49 |
+
bakalan
|
50 |
+
balik
|
51 |
+
banyak
|
52 |
+
bapak
|
53 |
+
baru
|
54 |
+
bawah
|
55 |
+
beberapa
|
56 |
+
begini
|
57 |
+
beginian
|
58 |
+
beginikah
|
59 |
+
beginilah
|
60 |
+
begitu
|
61 |
+
begitukah
|
62 |
+
begitulah
|
63 |
+
begitupun
|
64 |
+
bekerja
|
65 |
+
belakang
|
66 |
+
belakangan
|
67 |
+
belum
|
68 |
+
belumlah
|
69 |
+
benar
|
70 |
+
benarkah
|
71 |
+
benarlah
|
72 |
+
berada
|
73 |
+
berakhir
|
74 |
+
berakhirlah
|
75 |
+
berakhirnya
|
76 |
+
berapa
|
77 |
+
berapakah
|
78 |
+
berapalah
|
79 |
+
berapapun
|
80 |
+
berarti
|
81 |
+
berawal
|
82 |
+
berbagai
|
83 |
+
berdatangan
|
84 |
+
beri
|
85 |
+
berikan
|
86 |
+
berikut
|
87 |
+
berikutnya
|
88 |
+
berjumlah
|
89 |
+
berkali-kali
|
90 |
+
berkata
|
91 |
+
berkehendak
|
92 |
+
berkeinginan
|
93 |
+
berkenaan
|
94 |
+
berlainan
|
95 |
+
berlalu
|
96 |
+
berlangsung
|
97 |
+
berlebihan
|
98 |
+
bermacam
|
99 |
+
bermacam-macam
|
100 |
+
bermaksud
|
101 |
+
bermula
|
102 |
+
bersama
|
103 |
+
bersama-sama
|
104 |
+
bersiap
|
105 |
+
bersiap-siap
|
106 |
+
bertanya
|
107 |
+
bertanya-tanya
|
108 |
+
berturut
|
109 |
+
berturut-turut
|
110 |
+
bertutur
|
111 |
+
berujar
|
112 |
+
berupa
|
113 |
+
besar
|
114 |
+
betul
|
115 |
+
betulkah
|
116 |
+
biasa
|
117 |
+
biasanya
|
118 |
+
bila
|
119 |
+
bilakah
|
120 |
+
bisa
|
121 |
+
bisakah
|
122 |
+
boleh
|
123 |
+
bolehkah
|
124 |
+
bolehlah
|
125 |
+
buat
|
126 |
+
bukan
|
127 |
+
bukankah
|
128 |
+
bukanlah
|
129 |
+
bukannya
|
130 |
+
bulan
|
131 |
+
bung
|
132 |
+
cara
|
133 |
+
caranya
|
134 |
+
cukup
|
135 |
+
cukupkah
|
136 |
+
cukuplah
|
137 |
+
cuma
|
138 |
+
dahulu
|
139 |
+
dalam
|
140 |
+
dan
|
141 |
+
dapat
|
142 |
+
dari
|
143 |
+
daripada
|
144 |
+
datang
|
145 |
+
dekat
|
146 |
+
demi
|
147 |
+
demikian
|
148 |
+
demikianlah
|
149 |
+
dengan
|
150 |
+
depan
|
151 |
+
di
|
152 |
+
dia
|
153 |
+
diakhiri
|
154 |
+
diakhirinya
|
155 |
+
dialah
|
156 |
+
diantara
|
157 |
+
diantaranya
|
158 |
+
diberi
|
159 |
+
diberikan
|
160 |
+
diberikannya
|
161 |
+
dibuat
|
162 |
+
dibuatnya
|
163 |
+
didapat
|
164 |
+
didatangkan
|
165 |
+
digunakan
|
166 |
+
diibaratkan
|
167 |
+
diibaratkannya
|
168 |
+
diingat
|
169 |
+
diingatkan
|
170 |
+
diinginkan
|
171 |
+
dijawab
|
172 |
+
dijelaskan
|
173 |
+
dijelaskannya
|
174 |
+
dikarenakan
|
175 |
+
dikatakan
|
176 |
+
dikatakannya
|
177 |
+
dikerjakan
|
178 |
+
diketahui
|
179 |
+
diketahuinya
|
180 |
+
dikira
|
181 |
+
dilakukan
|
182 |
+
dilalui
|
183 |
+
dilihat
|
184 |
+
dimaksud
|
185 |
+
dimaksudkan
|
186 |
+
dimaksudkannya
|
187 |
+
dimaksudnya
|
188 |
+
diminta
|
189 |
+
dimintai
|
190 |
+
dimisalkan
|
191 |
+
dimulai
|
192 |
+
dimulailah
|
193 |
+
dimulainya
|
194 |
+
dimungkinkan
|
195 |
+
dini
|
196 |
+
dipastikan
|
197 |
+
diperbuat
|
198 |
+
diperbuatnya
|
199 |
+
dipergunakan
|
200 |
+
diperkirakan
|
201 |
+
diperlihatkan
|
202 |
+
diperlukan
|
203 |
+
diperlukannya
|
204 |
+
dipersoalkan
|
205 |
+
dipertanyakan
|
206 |
+
dipunyai
|
207 |
+
diri
|
208 |
+
dirinya
|
209 |
+
disampaikan
|
210 |
+
disebut
|
211 |
+
disebutkan
|
212 |
+
disebutkannya
|
213 |
+
disini
|
214 |
+
disinilah
|
215 |
+
ditambahkan
|
216 |
+
ditandaskan
|
217 |
+
ditanya
|
218 |
+
ditanyai
|
219 |
+
ditanyakan
|
220 |
+
ditegaskan
|
221 |
+
ditujukan
|
222 |
+
ditunjuk
|
223 |
+
ditunjuki
|
224 |
+
ditunjukkan
|
225 |
+
ditunjukkannya
|
226 |
+
ditunjuknya
|
227 |
+
dituturkan
|
228 |
+
dituturkannya
|
229 |
+
diucapkan
|
230 |
+
diucapkannya
|
231 |
+
diungkapkan
|
232 |
+
dong
|
233 |
+
dua
|
234 |
+
dulu
|
235 |
+
empat
|
236 |
+
enggak
|
237 |
+
enggaknya
|
238 |
+
entah
|
239 |
+
entahlah
|
240 |
+
guna
|
241 |
+
gunakan
|
242 |
+
hal
|
243 |
+
hampir
|
244 |
+
hanya
|
245 |
+
hanyalah
|
246 |
+
hari
|
247 |
+
harus
|
248 |
+
haruslah
|
249 |
+
harusnya
|
250 |
+
hendak
|
251 |
+
hendaklah
|
252 |
+
hendaknya
|
253 |
+
hingga
|
254 |
+
ia
|
255 |
+
ialah
|
256 |
+
ibarat
|
257 |
+
ibaratkan
|
258 |
+
ibaratnya
|
259 |
+
ibu
|
260 |
+
ikut
|
261 |
+
ingat
|
262 |
+
ingat-ingat
|
263 |
+
ingin
|
264 |
+
inginkah
|
265 |
+
inginkan
|
266 |
+
ini
|
267 |
+
inikah
|
268 |
+
inilah
|
269 |
+
itu
|
270 |
+
itukah
|
271 |
+
itulah
|
272 |
+
jadi
|
273 |
+
jadilah
|
274 |
+
jadinya
|
275 |
+
jangan
|
276 |
+
jangankan
|
277 |
+
janganlah
|
278 |
+
jauh
|
279 |
+
jawab
|
280 |
+
jawaban
|
281 |
+
jawabnya
|
282 |
+
jelas
|
283 |
+
jelaskan
|
284 |
+
jelaslah
|
285 |
+
jelasnya
|
286 |
+
jika
|
287 |
+
jikalau
|
288 |
+
juga
|
289 |
+
jumlah
|
290 |
+
jumlahnya
|
291 |
+
justru
|
292 |
+
kala
|
293 |
+
kalau
|
294 |
+
kalaulah
|
295 |
+
kalaupun
|
296 |
+
kalian
|
297 |
+
kami
|
298 |
+
kamilah
|
299 |
+
kamu
|
300 |
+
kamulah
|
301 |
+
kan
|
302 |
+
kapan
|
303 |
+
kapankah
|
304 |
+
kapanpun
|
305 |
+
karena
|
306 |
+
karenanya
|
307 |
+
kasus
|
308 |
+
kata
|
309 |
+
katakan
|
310 |
+
katakanlah
|
311 |
+
katanya
|
312 |
+
ke
|
313 |
+
keadaan
|
314 |
+
kebetulan
|
315 |
+
kecil
|
316 |
+
kedua
|
317 |
+
keduanya
|
318 |
+
keinginan
|
319 |
+
kelamaan
|
320 |
+
kelihatan
|
321 |
+
kelihatannya
|
322 |
+
kelima
|
323 |
+
keluar
|
324 |
+
kembali
|
325 |
+
kemudian
|
326 |
+
kemungkinan
|
327 |
+
kemungkinannya
|
328 |
+
kenapa
|
329 |
+
kepada
|
330 |
+
kepadanya
|
331 |
+
kesampaian
|
332 |
+
keseluruhan
|
333 |
+
keseluruhannya
|
334 |
+
keterlaluan
|
335 |
+
ketika
|
336 |
+
khususnya
|
337 |
+
kini
|
338 |
+
kinilah
|
339 |
+
kira
|
340 |
+
kira-kira
|
341 |
+
kiranya
|
342 |
+
kita
|
343 |
+
kitalah
|
344 |
+
kok
|
345 |
+
kurang
|
346 |
+
lagi
|
347 |
+
lagian
|
348 |
+
lah
|
349 |
+
lain
|
350 |
+
lainnya
|
351 |
+
lalu
|
352 |
+
lama
|
353 |
+
lamanya
|
354 |
+
lanjut
|
355 |
+
lanjutnya
|
356 |
+
lebih
|
357 |
+
lewat
|
358 |
+
lima
|
359 |
+
luar
|
360 |
+
macam
|
361 |
+
maka
|
362 |
+
makanya
|
363 |
+
makin
|
364 |
+
malah
|
365 |
+
malahan
|
366 |
+
mampu
|
367 |
+
mampukah
|
368 |
+
mana
|
369 |
+
manakala
|
370 |
+
manalagi
|
371 |
+
masa
|
372 |
+
masalah
|
373 |
+
masalahnya
|
374 |
+
masih
|
375 |
+
masihkah
|
376 |
+
masing
|
377 |
+
masing-masing
|
378 |
+
mau
|
379 |
+
maupun
|
380 |
+
melainkan
|
381 |
+
melakukan
|
382 |
+
melalui
|
383 |
+
melihat
|
384 |
+
melihatnya
|
385 |
+
memang
|
386 |
+
memastikan
|
387 |
+
memberi
|
388 |
+
memberikan
|
389 |
+
membuat
|
390 |
+
memerlukan
|
391 |
+
memihak
|
392 |
+
meminta
|
393 |
+
memintakan
|
394 |
+
memisalkan
|
395 |
+
memperbuat
|
396 |
+
mempergunakan
|
397 |
+
memperkirakan
|
398 |
+
memperlihatkan
|
399 |
+
mempersiapkan
|
400 |
+
mempersoalkan
|
401 |
+
mempertanyakan
|
402 |
+
mempunyai
|
403 |
+
memulai
|
404 |
+
memungkinkan
|
405 |
+
menaiki
|
406 |
+
menambahkan
|
407 |
+
menandaskan
|
408 |
+
menanti
|
409 |
+
menanti-nanti
|
410 |
+
menantikan
|
411 |
+
menanya
|
412 |
+
menanyai
|
413 |
+
menanyakan
|
414 |
+
mendapat
|
415 |
+
mendapatkan
|
416 |
+
mendatang
|
417 |
+
mendatangi
|
418 |
+
mendatangkan
|
419 |
+
menegaskan
|
420 |
+
mengakhiri
|
421 |
+
mengapa
|
422 |
+
mengatakan
|
423 |
+
mengatakannya
|
424 |
+
mengenai
|
425 |
+
mengerjakan
|
426 |
+
mengetahui
|
427 |
+
menggunakan
|
428 |
+
menghendaki
|
429 |
+
mengibaratkan
|
430 |
+
mengibaratkannya
|
431 |
+
mengingat
|
432 |
+
mengingatkan
|
433 |
+
menginginkan
|
434 |
+
mengira
|
435 |
+
mengucapkan
|
436 |
+
mengucapkannya
|
437 |
+
mengungkapkan
|
438 |
+
menjadi
|
439 |
+
menjawab
|
440 |
+
menjelaskan
|
441 |
+
menuju
|
442 |
+
menunjuk
|
443 |
+
menunjuki
|
444 |
+
menunjukkan
|
445 |
+
menunjuknya
|
446 |
+
menurut
|
447 |
+
menuturkan
|
448 |
+
menyampaikan
|
449 |
+
menyangkut
|
450 |
+
menyatakan
|
451 |
+
menyebutkan
|
452 |
+
menyeluruh
|
453 |
+
menyiapkan
|
454 |
+
merasa
|
455 |
+
mereka
|
456 |
+
merekalah
|
457 |
+
merupakan
|
458 |
+
meski
|
459 |
+
meskipun
|
460 |
+
meyakini
|
461 |
+
meyakinkan
|
462 |
+
minta
|
463 |
+
mirip
|
464 |
+
misal
|
465 |
+
misalkan
|
466 |
+
misalnya
|
467 |
+
mula
|
468 |
+
mulai
|
469 |
+
mulailah
|
470 |
+
mulanya
|
471 |
+
mungkin
|
472 |
+
mungkinkah
|
473 |
+
nah
|
474 |
+
naik
|
475 |
+
namun
|
476 |
+
nanti
|
477 |
+
nantinya
|
478 |
+
nyaris
|
479 |
+
nyatanya
|
480 |
+
oleh
|
481 |
+
olehnya
|
482 |
+
pada
|
483 |
+
padahal
|
484 |
+
padanya
|
485 |
+
pak
|
486 |
+
paling
|
487 |
+
panjang
|
488 |
+
pantas
|
489 |
+
para
|
490 |
+
pasti
|
491 |
+
pastilah
|
492 |
+
penting
|
493 |
+
pentingnya
|
494 |
+
per
|
495 |
+
percuma
|
496 |
+
perlu
|
497 |
+
perlukah
|
498 |
+
perlunya
|
499 |
+
pernah
|
500 |
+
persoalan
|
501 |
+
pertama
|
502 |
+
pertama-tama
|
503 |
+
pertanyaan
|
504 |
+
pertanyakan
|
505 |
+
pihak
|
506 |
+
pihaknya
|
507 |
+
pukul
|
508 |
+
pula
|
509 |
+
pun
|
510 |
+
punya
|
511 |
+
rasa
|
512 |
+
rasanya
|
513 |
+
rata
|
514 |
+
rupanya
|
515 |
+
saat
|
516 |
+
saatnya
|
517 |
+
saja
|
518 |
+
sajalah
|
519 |
+
saling
|
520 |
+
sama
|
521 |
+
sama-sama
|
522 |
+
sambil
|
523 |
+
sampai
|
524 |
+
sampai-sampai
|
525 |
+
sampaikan
|
526 |
+
sana
|
527 |
+
sangat
|
528 |
+
sangatlah
|
529 |
+
satu
|
530 |
+
saya
|
531 |
+
sayalah
|
532 |
+
se
|
533 |
+
sebab
|
534 |
+
sebabnya
|
535 |
+
sebagai
|
536 |
+
sebagaimana
|
537 |
+
sebagainya
|
538 |
+
sebagian
|
539 |
+
sebaik
|
540 |
+
sebaik-baiknya
|
541 |
+
sebaiknya
|
542 |
+
sebaliknya
|
543 |
+
sebanyak
|
544 |
+
sebegini
|
545 |
+
sebegitu
|
546 |
+
sebelum
|
547 |
+
sebelumnya
|
548 |
+
sebenarnya
|
549 |
+
seberapa
|
550 |
+
sebesar
|
551 |
+
sebetulnya
|
552 |
+
sebisanya
|
553 |
+
sebuah
|
554 |
+
sebut
|
555 |
+
sebutlah
|
556 |
+
sebutnya
|
557 |
+
secara
|
558 |
+
secukupnya
|
559 |
+
sedang
|
560 |
+
sedangkan
|
561 |
+
sedemikian
|
562 |
+
sedikit
|
563 |
+
sedikitnya
|
564 |
+
seenaknya
|
565 |
+
segala
|
566 |
+
segalanya
|
567 |
+
segera
|
568 |
+
seharusnya
|
569 |
+
sehingga
|
570 |
+
seingat
|
571 |
+
sejak
|
572 |
+
sejauh
|
573 |
+
sejenak
|
574 |
+
sejumlah
|
575 |
+
sekadar
|
576 |
+
sekadarnya
|
577 |
+
sekali
|
578 |
+
sekali-kali
|
579 |
+
sekalian
|
580 |
+
sekaligus
|
581 |
+
sekalipun
|
582 |
+
sekarang
|
583 |
+
sekarang
|
584 |
+
sekecil
|
585 |
+
seketika
|
586 |
+
sekiranya
|
587 |
+
sekitar
|
588 |
+
sekitarnya
|
589 |
+
sekurang-kurangnya
|
590 |
+
sekurangnya
|
591 |
+
sela
|
592 |
+
selain
|
593 |
+
selaku
|
594 |
+
selalu
|
595 |
+
selama
|
596 |
+
selama-lamanya
|
597 |
+
selamanya
|
598 |
+
selanjutnya
|
599 |
+
seluruh
|
600 |
+
seluruhnya
|
601 |
+
semacam
|
602 |
+
semakin
|
603 |
+
semampu
|
604 |
+
semampunya
|
605 |
+
semasa
|
606 |
+
semasih
|
607 |
+
semata
|
608 |
+
semata-mata
|
609 |
+
semaunya
|
610 |
+
sementara
|
611 |
+
semisal
|
612 |
+
semisalnya
|
613 |
+
sempat
|
614 |
+
semua
|
615 |
+
semuanya
|
616 |
+
semula
|
617 |
+
sendiri
|
618 |
+
sendirian
|
619 |
+
sendirinya
|
620 |
+
seolah
|
621 |
+
seolah-olah
|
622 |
+
seorang
|
623 |
+
sepanjang
|
624 |
+
sepantasnya
|
625 |
+
sepantasnyalah
|
626 |
+
seperlunya
|
627 |
+
seperti
|
628 |
+
sepertinya
|
629 |
+
sepihak
|
630 |
+
sering
|
631 |
+
seringnya
|
632 |
+
serta
|
633 |
+
serupa
|
634 |
+
sesaat
|
635 |
+
sesama
|
636 |
+
sesampai
|
637 |
+
sesegera
|
638 |
+
sesekali
|
639 |
+
seseorang
|
640 |
+
sesuatu
|
641 |
+
sesuatunya
|
642 |
+
sesudah
|
643 |
+
sesudahnya
|
644 |
+
setelah
|
645 |
+
setempat
|
646 |
+
setengah
|
647 |
+
seterusnya
|
648 |
+
setiap
|
649 |
+
setiba
|
650 |
+
setibanya
|
651 |
+
setidak-tidaknya
|
652 |
+
setidaknya
|
653 |
+
setinggi
|
654 |
+
seusai
|
655 |
+
sewaktu
|
656 |
+
siap
|
657 |
+
siapa
|
658 |
+
siapakah
|
659 |
+
siapapun
|
660 |
+
sini
|
661 |
+
sinilah
|
662 |
+
soal
|
663 |
+
soalnya
|
664 |
+
suatu
|
665 |
+
sudah
|
666 |
+
sudahkah
|
667 |
+
sudahlah
|
668 |
+
supaya
|
669 |
+
tadi
|
670 |
+
tadinya
|
671 |
+
tahu
|
672 |
+
tahun
|
673 |
+
tak
|
674 |
+
tambah
|
675 |
+
tambahnya
|
676 |
+
tampak
|
677 |
+
tampaknya
|
678 |
+
tandas
|
679 |
+
tandasnya
|
680 |
+
tanpa
|
681 |
+
tanya
|
682 |
+
tanyakan
|
683 |
+
tanyanya
|
684 |
+
tapi
|
685 |
+
tegas
|
686 |
+
tegasnya
|
687 |
+
telah
|
688 |
+
tempat
|
689 |
+
tengah
|
690 |
+
tentang
|
691 |
+
tentu
|
692 |
+
tentulah
|
693 |
+
tentunya
|
694 |
+
tepat
|
695 |
+
terakhir
|
696 |
+
terasa
|
697 |
+
terbanyak
|
698 |
+
terdahulu
|
699 |
+
terdapat
|
700 |
+
terdiri
|
701 |
+
terhadap
|
702 |
+
terhadapnya
|
703 |
+
teringat
|
704 |
+
teringat-ingat
|
705 |
+
terjadi
|
706 |
+
terjadilah
|
707 |
+
terjadinya
|
708 |
+
terkira
|
709 |
+
terlalu
|
710 |
+
terlebih
|
711 |
+
terlihat
|
712 |
+
termasuk
|
713 |
+
ternyata
|
714 |
+
tersampaikan
|
715 |
+
tersebut
|
716 |
+
tersebutlah
|
717 |
+
tertentu
|
718 |
+
tertuju
|
719 |
+
terus
|
720 |
+
terutama
|
721 |
+
tetap
|
722 |
+
tetapi
|
723 |
+
tiap
|
724 |
+
tiba
|
725 |
+
tiba-tiba
|
726 |
+
tidak
|
727 |
+
tidakkah
|
728 |
+
tidaklah
|
729 |
+
tiga
|
730 |
+
tinggi
|
731 |
+
toh
|
732 |
+
tunjuk
|
733 |
+
turut
|
734 |
+
tutur
|
735 |
+
tuturnya
|
736 |
+
ucap
|
737 |
+
ucapnya
|
738 |
+
ujar
|
739 |
+
ujarnya
|
740 |
+
umum
|
741 |
+
umumnya
|
742 |
+
ungkap
|
743 |
+
ungkapnya
|
744 |
+
untuk
|
745 |
+
usah
|
746 |
+
usai
|
747 |
+
waduh
|
748 |
+
wah
|
749 |
+
wahai
|
750 |
+
waktu
|
751 |
+
waktunya
|
752 |
+
walau
|
753 |
+
walaupun
|
754 |
+
wong
|
755 |
+
yaitu
|
756 |
+
yakin
|
757 |
+
yakni
|
758 |
+
yang
|
data/nltk_data/corpora/stopwords/italian
ADDED
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ad
|
2 |
+
al
|
3 |
+
allo
|
4 |
+
ai
|
5 |
+
agli
|
6 |
+
all
|
7 |
+
agl
|
8 |
+
alla
|
9 |
+
alle
|
10 |
+
con
|
11 |
+
col
|
12 |
+
coi
|
13 |
+
da
|
14 |
+
dal
|
15 |
+
dallo
|
16 |
+
dai
|
17 |
+
dagli
|
18 |
+
dall
|
19 |
+
dagl
|
20 |
+
dalla
|
21 |
+
dalle
|
22 |
+
di
|
23 |
+
del
|
24 |
+
dello
|
25 |
+
dei
|
26 |
+
degli
|
27 |
+
dell
|
28 |
+
degl
|
29 |
+
della
|
30 |
+
delle
|
31 |
+
in
|
32 |
+
nel
|
33 |
+
nello
|
34 |
+
nei
|
35 |
+
negli
|
36 |
+
nell
|
37 |
+
negl
|
38 |
+
nella
|
39 |
+
nelle
|
40 |
+
su
|
41 |
+
sul
|
42 |
+
sullo
|
43 |
+
sui
|
44 |
+
sugli
|
45 |
+
sull
|
46 |
+
sugl
|
47 |
+
sulla
|
48 |
+
sulle
|
49 |
+
per
|
50 |
+
tra
|
51 |
+
contro
|
52 |
+
io
|
53 |
+
tu
|
54 |
+
lui
|
55 |
+
lei
|
56 |
+
noi
|
57 |
+
voi
|
58 |
+
loro
|
59 |
+
mio
|
60 |
+
mia
|
61 |
+
miei
|
62 |
+
mie
|
63 |
+
tuo
|
64 |
+
tua
|
65 |
+
tuoi
|
66 |
+
tue
|
67 |
+
suo
|
68 |
+
sua
|
69 |
+
suoi
|
70 |
+
sue
|
71 |
+
nostro
|
72 |
+
nostra
|
73 |
+
nostri
|
74 |
+
nostre
|
75 |
+
vostro
|
76 |
+
vostra
|
77 |
+
vostri
|
78 |
+
vostre
|
79 |
+
mi
|
80 |
+
ti
|
81 |
+
ci
|
82 |
+
vi
|
83 |
+
lo
|
84 |
+
la
|
85 |
+
li
|
86 |
+
le
|
87 |
+
gli
|
88 |
+
ne
|
89 |
+
il
|
90 |
+
un
|
91 |
+
uno
|
92 |
+
una
|
93 |
+
ma
|
94 |
+
ed
|
95 |
+
se
|
96 |
+
perché
|
97 |
+
anche
|
98 |
+
come
|
99 |
+
dov
|
100 |
+
dove
|
101 |
+
che
|
102 |
+
chi
|
103 |
+
cui
|
104 |
+
non
|
105 |
+
più
|
106 |
+
quale
|
107 |
+
quanto
|
108 |
+
quanti
|
109 |
+
quanta
|
110 |
+
quante
|
111 |
+
quello
|
112 |
+
quelli
|
113 |
+
quella
|
114 |
+
quelle
|
115 |
+
questo
|
116 |
+
questi
|
117 |
+
questa
|
118 |
+
queste
|
119 |
+
si
|
120 |
+
tutto
|
121 |
+
tutti
|
122 |
+
a
|
123 |
+
c
|
124 |
+
e
|
125 |
+
i
|
126 |
+
l
|
127 |
+
o
|
128 |
+
ho
|
129 |
+
hai
|
130 |
+
ha
|
131 |
+
abbiamo
|
132 |
+
avete
|
133 |
+
hanno
|
134 |
+
abbia
|
135 |
+
abbiate
|
136 |
+
abbiano
|
137 |
+
avrò
|
138 |
+
avrai
|
139 |
+
avrà
|
140 |
+
avremo
|
141 |
+
avrete
|
142 |
+
avranno
|
143 |
+
avrei
|
144 |
+
avresti
|
145 |
+
avrebbe
|
146 |
+
avremmo
|
147 |
+
avreste
|
148 |
+
avrebbero
|
149 |
+
avevo
|
150 |
+
avevi
|
151 |
+
aveva
|
152 |
+
avevamo
|
153 |
+
avevate
|
154 |
+
avevano
|
155 |
+
ebbi
|
156 |
+
avesti
|
157 |
+
ebbe
|
158 |
+
avemmo
|
159 |
+
aveste
|
160 |
+
ebbero
|
161 |
+
avessi
|
162 |
+
avesse
|
163 |
+
avessimo
|
164 |
+
avessero
|
165 |
+
avendo
|
166 |
+
avuto
|
167 |
+
avuta
|
168 |
+
avuti
|
169 |
+
avute
|
170 |
+
sono
|
171 |
+
sei
|
172 |
+
è
|
173 |
+
siamo
|
174 |
+
siete
|
175 |
+
sia
|
176 |
+
siate
|
177 |
+
siano
|
178 |
+
sarò
|
179 |
+
sarai
|
180 |
+
sarà
|
181 |
+
saremo
|
182 |
+
sarete
|
183 |
+
saranno
|
184 |
+
sarei
|
185 |
+
saresti
|
186 |
+
sarebbe
|
187 |
+
saremmo
|
188 |
+
sareste
|
189 |
+
sarebbero
|
190 |
+
ero
|
191 |
+
eri
|
192 |
+
era
|
193 |
+
eravamo
|
194 |
+
eravate
|
195 |
+
erano
|
196 |
+
fui
|
197 |
+
fosti
|
198 |
+
fu
|
199 |
+
fummo
|
200 |
+
foste
|
201 |
+
furono
|
202 |
+
fossi
|
203 |
+
fosse
|
204 |
+
fossimo
|
205 |
+
fossero
|
206 |
+
essendo
|
207 |
+
faccio
|
208 |
+
fai
|
209 |
+
facciamo
|
210 |
+
fanno
|
211 |
+
faccia
|
212 |
+
facciate
|
213 |
+
facciano
|
214 |
+
farò
|
215 |
+
farai
|
216 |
+
farà
|
217 |
+
faremo
|
218 |
+
farete
|
219 |
+
faranno
|
220 |
+
farei
|
221 |
+
faresti
|
222 |
+
farebbe
|
223 |
+
faremmo
|
224 |
+
fareste
|
225 |
+
farebbero
|
226 |
+
facevo
|
227 |
+
facevi
|
228 |
+
faceva
|
229 |
+
facevamo
|
230 |
+
facevate
|
231 |
+
facevano
|
232 |
+
feci
|
233 |
+
facesti
|
234 |
+
fece
|
235 |
+
facemmo
|
236 |
+
faceste
|
237 |
+
fecero
|
238 |
+
facessi
|
239 |
+
facesse
|
240 |
+
facessimo
|
241 |
+
facessero
|
242 |
+
facendo
|
243 |
+
sto
|
244 |
+
stai
|
245 |
+
sta
|
246 |
+
stiamo
|
247 |
+
stanno
|
248 |
+
stia
|
249 |
+
stiate
|
250 |
+
stiano
|
251 |
+
starò
|
252 |
+
starai
|
253 |
+
starà
|
254 |
+
staremo
|
255 |
+
starete
|
256 |
+
staranno
|
257 |
+
starei
|
258 |
+
staresti
|
259 |
+
starebbe
|
260 |
+
staremmo
|
261 |
+
stareste
|
262 |
+
starebbero
|
263 |
+
stavo
|
264 |
+
stavi
|
265 |
+
stava
|
266 |
+
stavamo
|
267 |
+
stavate
|
268 |
+
stavano
|
269 |
+
stetti
|
270 |
+
stesti
|
271 |
+
stette
|
272 |
+
stemmo
|
273 |
+
steste
|
274 |
+
stettero
|
275 |
+
stessi
|
276 |
+
stesse
|
277 |
+
stessimo
|
278 |
+
stessero
|
279 |
+
stando
|