KevinHuSh
commited on
Commit
·
ba9251b
1
Parent(s):
15052fd
let file in knowledgebases visible in file manager (#714)
Browse files### What problem does this PR solve?
Let file in knowledgebases visible in file manager.
#162
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- api/apps/document_app.py +29 -14
- api/apps/file_app.py +8 -5
- api/db/__init__.py +8 -0
- api/db/db_models.py +21 -5
- api/db/services/document_service.py +16 -0
- api/db/services/file2document_service.py +14 -12
- api/db/services/file_service.py +88 -24
- docker/entrypoint.sh +2 -2
- rag/svr/task_executor.py +1 -0
- requirements_dev.txt +0 -2
api/apps/document_app.py
CHANGED
|
@@ -23,7 +23,7 @@ from elasticsearch_dsl import Q
|
|
| 23 |
from flask import request
|
| 24 |
from flask_login import login_required, current_user
|
| 25 |
|
| 26 |
-
from api.db.db_models import Task
|
| 27 |
from api.db.services.file2document_service import File2DocumentService
|
| 28 |
from api.db.services.file_service import FileService
|
| 29 |
from api.db.services.task_service import TaskService, queue_tasks
|
|
@@ -33,7 +33,7 @@ from api.db.services import duplicate_name
|
|
| 33 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
| 34 |
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
| 35 |
from api.utils import get_uuid
|
| 36 |
-
from api.db import FileType, TaskStatus, ParserType
|
| 37 |
from api.db.services.document_service import DocumentService
|
| 38 |
from api.settings import RetCode
|
| 39 |
from api.utils.api_utils import get_json_result
|
|
@@ -59,12 +59,19 @@ def upload():
|
|
| 59 |
return get_json_result(
|
| 60 |
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
err = []
|
| 63 |
for file in file_objs:
|
| 64 |
try:
|
| 65 |
-
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
| 66 |
-
if not e:
|
| 67 |
-
raise LookupError("Can't find this knowledgebase!")
|
| 68 |
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
|
| 69 |
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
|
| 70 |
raise RuntimeError("Exceed the maximum file number of a free user!")
|
|
@@ -99,6 +106,8 @@ def upload():
|
|
| 99 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 100 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 101 |
DocumentService.insert(doc)
|
|
|
|
|
|
|
| 102 |
except Exception as e:
|
| 103 |
err.append(file.filename + ": " + str(e))
|
| 104 |
if err:
|
|
@@ -228,11 +237,13 @@ def rm():
|
|
| 228 |
req = request.json
|
| 229 |
doc_ids = req["doc_id"]
|
| 230 |
if isinstance(doc_ids, str): doc_ids = [doc_ids]
|
|
|
|
|
|
|
|
|
|
| 231 |
errors = ""
|
| 232 |
for doc_id in doc_ids:
|
| 233 |
try:
|
| 234 |
e, doc = DocumentService.get_by_id(doc_id)
|
| 235 |
-
|
| 236 |
if not e:
|
| 237 |
return get_data_error_result(retmsg="Document not found!")
|
| 238 |
tenant_id = DocumentService.get_tenant_id(doc_id)
|
|
@@ -241,21 +252,25 @@ def rm():
|
|
| 241 |
|
| 242 |
ELASTICSEARCH.deleteByQuery(
|
| 243 |
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
|
| 244 |
-
|
| 245 |
-
|
|
|
|
|
|
|
| 246 |
if not DocumentService.delete(doc):
|
| 247 |
return get_data_error_result(
|
| 248 |
retmsg="Database error (Document removal)!")
|
| 249 |
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
except Exception as e:
|
| 256 |
errors += str(e)
|
| 257 |
|
| 258 |
-
if errors:
|
|
|
|
|
|
|
| 259 |
return get_json_result(data=True)
|
| 260 |
|
| 261 |
|
|
|
|
| 23 |
from flask import request
|
| 24 |
from flask_login import login_required, current_user
|
| 25 |
|
| 26 |
+
from api.db.db_models import Task, File
|
| 27 |
from api.db.services.file2document_service import File2DocumentService
|
| 28 |
from api.db.services.file_service import FileService
|
| 29 |
from api.db.services.task_service import TaskService, queue_tasks
|
|
|
|
| 33 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
| 34 |
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
| 35 |
from api.utils import get_uuid
|
| 36 |
+
from api.db import FileType, TaskStatus, ParserType, FileSource
|
| 37 |
from api.db.services.document_service import DocumentService
|
| 38 |
from api.settings import RetCode
|
| 39 |
from api.utils.api_utils import get_json_result
|
|
|
|
| 59 |
return get_json_result(
|
| 60 |
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
| 61 |
|
| 62 |
+
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
| 63 |
+
if not e:
|
| 64 |
+
raise LookupError("Can't find this knowledgebase!")
|
| 65 |
+
|
| 66 |
+
root_folder = FileService.get_root_folder(current_user.id)
|
| 67 |
+
pf_id = root_folder["id"]
|
| 68 |
+
FileService.init_knowledgebase_docs(pf_id, current_user.id)
|
| 69 |
+
kb_root_folder = FileService.get_kb_folder(current_user.id)
|
| 70 |
+
kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"])
|
| 71 |
+
|
| 72 |
err = []
|
| 73 |
for file in file_objs:
|
| 74 |
try:
|
|
|
|
|
|
|
|
|
|
| 75 |
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
|
| 76 |
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
|
| 77 |
raise RuntimeError("Exceed the maximum file number of a free user!")
|
|
|
|
| 106 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 107 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 108 |
DocumentService.insert(doc)
|
| 109 |
+
|
| 110 |
+
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
|
| 111 |
except Exception as e:
|
| 112 |
err.append(file.filename + ": " + str(e))
|
| 113 |
if err:
|
|
|
|
| 237 |
req = request.json
|
| 238 |
doc_ids = req["doc_id"]
|
| 239 |
if isinstance(doc_ids, str): doc_ids = [doc_ids]
|
| 240 |
+
root_folder = FileService.get_root_folder(current_user.id)
|
| 241 |
+
pf_id = root_folder["id"]
|
| 242 |
+
FileService.init_knowledgebase_docs(pf_id, current_user.id)
|
| 243 |
errors = ""
|
| 244 |
for doc_id in doc_ids:
|
| 245 |
try:
|
| 246 |
e, doc = DocumentService.get_by_id(doc_id)
|
|
|
|
| 247 |
if not e:
|
| 248 |
return get_data_error_result(retmsg="Document not found!")
|
| 249 |
tenant_id = DocumentService.get_tenant_id(doc_id)
|
|
|
|
| 252 |
|
| 253 |
ELASTICSEARCH.deleteByQuery(
|
| 254 |
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
|
| 255 |
+
|
| 256 |
+
DocumentService.clear_chunk_num(doc_id)
|
| 257 |
+
b, n = File2DocumentService.get_minio_address(doc_id=doc_id)
|
| 258 |
+
|
| 259 |
if not DocumentService.delete(doc):
|
| 260 |
return get_data_error_result(
|
| 261 |
retmsg="Database error (Document removal)!")
|
| 262 |
|
| 263 |
+
f2d = File2DocumentService.get_by_document_id(doc_id)
|
| 264 |
+
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
| 265 |
+
File2DocumentService.delete_by_document_id(doc_id)
|
| 266 |
+
|
| 267 |
+
MINIO.rm(b, n)
|
| 268 |
except Exception as e:
|
| 269 |
errors += str(e)
|
| 270 |
|
| 271 |
+
if errors:
|
| 272 |
+
return get_json_result(data=False, retmsg=errors, retcode=RetCode.SERVER_ERROR)
|
| 273 |
+
|
| 274 |
return get_json_result(data=True)
|
| 275 |
|
| 276 |
|
api/apps/file_app.py
CHANGED
|
@@ -26,7 +26,7 @@ from api.db.services.document_service import DocumentService
|
|
| 26 |
from api.db.services.file2document_service import File2DocumentService
|
| 27 |
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
| 28 |
from api.utils import get_uuid
|
| 29 |
-
from api.db import FileType
|
| 30 |
from api.db.services import duplicate_name
|
| 31 |
from api.db.services.file_service import FileService
|
| 32 |
from api.settings import RetCode
|
|
@@ -45,7 +45,7 @@ def upload():
|
|
| 45 |
|
| 46 |
if not pf_id:
|
| 47 |
root_folder = FileService.get_root_folder(current_user.id)
|
| 48 |
-
pf_id = root_folder
|
| 49 |
|
| 50 |
if 'file' not in request.files:
|
| 51 |
return get_json_result(
|
|
@@ -132,7 +132,7 @@ def create():
|
|
| 132 |
input_file_type = request.json.get("type")
|
| 133 |
if not pf_id:
|
| 134 |
root_folder = FileService.get_root_folder(current_user.id)
|
| 135 |
-
pf_id = root_folder
|
| 136 |
|
| 137 |
try:
|
| 138 |
if not FileService.is_parent_folder_exist(pf_id):
|
|
@@ -176,7 +176,8 @@ def list():
|
|
| 176 |
desc = request.args.get("desc", True)
|
| 177 |
if not pf_id:
|
| 178 |
root_folder = FileService.get_root_folder(current_user.id)
|
| 179 |
-
pf_id = root_folder
|
|
|
|
| 180 |
try:
|
| 181 |
e, file = FileService.get_by_id(pf_id)
|
| 182 |
if not e:
|
|
@@ -199,7 +200,7 @@ def list():
|
|
| 199 |
def get_root_folder():
|
| 200 |
try:
|
| 201 |
root_folder = FileService.get_root_folder(current_user.id)
|
| 202 |
-
return get_json_result(data={"root_folder": root_folder
|
| 203 |
except Exception as e:
|
| 204 |
return server_error_response(e)
|
| 205 |
|
|
@@ -250,6 +251,8 @@ def rm():
|
|
| 250 |
return get_data_error_result(retmsg="File or Folder not found!")
|
| 251 |
if not file.tenant_id:
|
| 252 |
return get_data_error_result(retmsg="Tenant not found!")
|
|
|
|
|
|
|
| 253 |
|
| 254 |
if file.type == FileType.FOLDER.value:
|
| 255 |
file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
|
|
|
|
| 26 |
from api.db.services.file2document_service import File2DocumentService
|
| 27 |
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
| 28 |
from api.utils import get_uuid
|
| 29 |
+
from api.db import FileType, FileSource
|
| 30 |
from api.db.services import duplicate_name
|
| 31 |
from api.db.services.file_service import FileService
|
| 32 |
from api.settings import RetCode
|
|
|
|
| 45 |
|
| 46 |
if not pf_id:
|
| 47 |
root_folder = FileService.get_root_folder(current_user.id)
|
| 48 |
+
pf_id = root_folder["id"]
|
| 49 |
|
| 50 |
if 'file' not in request.files:
|
| 51 |
return get_json_result(
|
|
|
|
| 132 |
input_file_type = request.json.get("type")
|
| 133 |
if not pf_id:
|
| 134 |
root_folder = FileService.get_root_folder(current_user.id)
|
| 135 |
+
pf_id = root_folder["id"]
|
| 136 |
|
| 137 |
try:
|
| 138 |
if not FileService.is_parent_folder_exist(pf_id):
|
|
|
|
| 176 |
desc = request.args.get("desc", True)
|
| 177 |
if not pf_id:
|
| 178 |
root_folder = FileService.get_root_folder(current_user.id)
|
| 179 |
+
pf_id = root_folder["id"]
|
| 180 |
+
FileService.init_knowledgebase_docs(pf_id, current_user.id)
|
| 181 |
try:
|
| 182 |
e, file = FileService.get_by_id(pf_id)
|
| 183 |
if not e:
|
|
|
|
| 200 |
def get_root_folder():
|
| 201 |
try:
|
| 202 |
root_folder = FileService.get_root_folder(current_user.id)
|
| 203 |
+
return get_json_result(data={"root_folder": root_folder})
|
| 204 |
except Exception as e:
|
| 205 |
return server_error_response(e)
|
| 206 |
|
|
|
|
| 251 |
return get_data_error_result(retmsg="File or Folder not found!")
|
| 252 |
if not file.tenant_id:
|
| 253 |
return get_data_error_result(retmsg="Tenant not found!")
|
| 254 |
+
if file.source_type == FileSource.KNOWLEDGEBASE:
|
| 255 |
+
continue
|
| 256 |
|
| 257 |
if file.type == FileType.FOLDER.value:
|
| 258 |
file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
|
api/db/__init__.py
CHANGED
|
@@ -83,3 +83,11 @@ class ParserType(StrEnum):
|
|
| 83 |
NAIVE = "naive"
|
| 84 |
PICTURE = "picture"
|
| 85 |
ONE = "one"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
NAIVE = "naive"
|
| 84 |
PICTURE = "picture"
|
| 85 |
ONE = "one"
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class FileSource(StrEnum):
|
| 89 |
+
LOCAL = ""
|
| 90 |
+
KNOWLEDGEBASE = "knowledgebase"
|
| 91 |
+
S3 = "s3"
|
| 92 |
+
|
| 93 |
+
KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"
|
api/db/db_models.py
CHANGED
|
@@ -21,14 +21,13 @@ import operator
|
|
| 21 |
from functools import wraps
|
| 22 |
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
| 23 |
from flask_login import UserMixin
|
| 24 |
-
|
| 25 |
from peewee import (
|
| 26 |
-
|
| 27 |
-
CompositeKey,
|
| 28 |
Field, Model, Metadata
|
| 29 |
)
|
| 30 |
from playhouse.pool import PooledMySQLDatabase
|
| 31 |
-
|
| 32 |
from api.db import SerializedType, ParserType
|
| 33 |
from api.settings import DATABASE, stat_logger, SECRET_KEY
|
| 34 |
from api.utils.log_utils import getLogger
|
|
@@ -344,7 +343,7 @@ class DataBaseModel(BaseModel):
|
|
| 344 |
|
| 345 |
|
| 346 |
@DB.connection_context()
|
| 347 |
-
def init_database_tables():
|
| 348 |
members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
|
| 349 |
table_objs = []
|
| 350 |
create_failed_list = []
|
|
@@ -361,6 +360,7 @@ def init_database_tables():
|
|
| 361 |
if create_failed_list:
|
| 362 |
LOGGER.info(f"create tables failed: {create_failed_list}")
|
| 363 |
raise Exception(f"create tables failed: {create_failed_list}")
|
|
|
|
| 364 |
|
| 365 |
|
| 366 |
def fill_db_model_object(model_object, human_model_dict):
|
|
@@ -699,6 +699,11 @@ class File(DataBaseModel):
|
|
| 699 |
help_text="where dose it store")
|
| 700 |
size = IntegerField(default=0)
|
| 701 |
type = CharField(max_length=32, null=False, help_text="file extension")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 702 |
|
| 703 |
class Meta:
|
| 704 |
db_table = "file"
|
|
@@ -817,3 +822,14 @@ class API4Conversation(DataBaseModel):
|
|
| 817 |
|
| 818 |
class Meta:
|
| 819 |
db_table = "api_4_conversation"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
from functools import wraps
|
| 22 |
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
| 23 |
from flask_login import UserMixin
|
| 24 |
+
from playhouse.migrate import MySQLMigrator, migrate
|
| 25 |
from peewee import (
|
| 26 |
+
BigIntegerField, BooleanField, CharField,
|
| 27 |
+
CompositeKey, IntegerField, TextField, FloatField, DateTimeField,
|
| 28 |
Field, Model, Metadata
|
| 29 |
)
|
| 30 |
from playhouse.pool import PooledMySQLDatabase
|
|
|
|
| 31 |
from api.db import SerializedType, ParserType
|
| 32 |
from api.settings import DATABASE, stat_logger, SECRET_KEY
|
| 33 |
from api.utils.log_utils import getLogger
|
|
|
|
| 343 |
|
| 344 |
|
| 345 |
@DB.connection_context()
|
| 346 |
+
def init_database_tables(alter_fields=[]):
|
| 347 |
members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
|
| 348 |
table_objs = []
|
| 349 |
create_failed_list = []
|
|
|
|
| 360 |
if create_failed_list:
|
| 361 |
LOGGER.info(f"create tables failed: {create_failed_list}")
|
| 362 |
raise Exception(f"create tables failed: {create_failed_list}")
|
| 363 |
+
migrate_db()
|
| 364 |
|
| 365 |
|
| 366 |
def fill_db_model_object(model_object, human_model_dict):
|
|
|
|
| 699 |
help_text="where dose it store")
|
| 700 |
size = IntegerField(default=0)
|
| 701 |
type = CharField(max_length=32, null=False, help_text="file extension")
|
| 702 |
+
source_type = CharField(
|
| 703 |
+
max_length=128,
|
| 704 |
+
null=False,
|
| 705 |
+
default="",
|
| 706 |
+
help_text="where dose this document come from")
|
| 707 |
|
| 708 |
class Meta:
|
| 709 |
db_table = "file"
|
|
|
|
| 822 |
|
| 823 |
class Meta:
|
| 824 |
db_table = "api_4_conversation"
|
| 825 |
+
|
| 826 |
+
|
| 827 |
+
def migrate_db():
|
| 828 |
+
try:
|
| 829 |
+
with DB.transaction():
|
| 830 |
+
migrator = MySQLMigrator(DB)
|
| 831 |
+
migrate(
|
| 832 |
+
migrator.add_column('file', 'source_type', CharField(max_length=128, null=False, default="", help_text="where dose this document come from"))
|
| 833 |
+
)
|
| 834 |
+
except Exception as e:
|
| 835 |
+
pass
|
api/db/services/document_service.py
CHANGED
|
@@ -150,6 +150,22 @@ class DocumentService(CommonService):
|
|
| 150 |
Knowledgebase.id == kb_id).execute()
|
| 151 |
return num
|
| 152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
@classmethod
|
| 154 |
@DB.connection_context()
|
| 155 |
def get_tenant_id(cls, doc_id):
|
|
|
|
| 150 |
Knowledgebase.id == kb_id).execute()
|
| 151 |
return num
|
| 152 |
|
| 153 |
+
@classmethod
|
| 154 |
+
@DB.connection_context()
|
| 155 |
+
def clear_chunk_num(cls, doc_id):
|
| 156 |
+
doc = cls.model.get_by_id(doc_id)
|
| 157 |
+
assert doc, "Can't fine document in database."
|
| 158 |
+
|
| 159 |
+
num = Knowledgebase.update(
|
| 160 |
+
token_num=Knowledgebase.token_num -
|
| 161 |
+
doc.token_num,
|
| 162 |
+
chunk_num=Knowledgebase.chunk_num -
|
| 163 |
+
doc.chunk_num,
|
| 164 |
+
doc_num=Knowledgebase.doc_num-1
|
| 165 |
+
).where(
|
| 166 |
+
Knowledgebase.id == doc.kb_id).execute()
|
| 167 |
+
return num
|
| 168 |
+
|
| 169 |
@classmethod
|
| 170 |
@DB.connection_context()
|
| 171 |
def get_tenant_id(cls, doc_id):
|
api/db/services/file2document_service.py
CHANGED
|
@@ -15,12 +15,12 @@
|
|
| 15 |
#
|
| 16 |
from datetime import datetime
|
| 17 |
|
|
|
|
| 18 |
from api.db.db_models import DB
|
| 19 |
-
from api.db.db_models import File,
|
| 20 |
from api.db.services.common_service import CommonService
|
| 21 |
from api.db.services.document_service import DocumentService
|
| 22 |
-
from api.
|
| 23 |
-
from api.utils import current_timestamp, datetime_format
|
| 24 |
|
| 25 |
|
| 26 |
class File2DocumentService(CommonService):
|
|
@@ -71,13 +71,15 @@ class File2DocumentService(CommonService):
|
|
| 71 |
@DB.connection_context()
|
| 72 |
def get_minio_address(cls, doc_id=None, file_id=None):
|
| 73 |
if doc_id:
|
| 74 |
-
|
| 75 |
else:
|
| 76 |
-
|
| 77 |
-
if
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
| 15 |
#
|
| 16 |
from datetime import datetime
|
| 17 |
|
| 18 |
+
from api.db import FileSource
|
| 19 |
from api.db.db_models import DB
|
| 20 |
+
from api.db.db_models import File, File2Document
|
| 21 |
from api.db.services.common_service import CommonService
|
| 22 |
from api.db.services.document_service import DocumentService
|
| 23 |
+
from api.utils import current_timestamp, datetime_format, get_uuid
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
class File2DocumentService(CommonService):
|
|
|
|
| 71 |
@DB.connection_context()
|
| 72 |
def get_minio_address(cls, doc_id=None, file_id=None):
|
| 73 |
if doc_id:
|
| 74 |
+
f2d = cls.get_by_document_id(doc_id)
|
| 75 |
else:
|
| 76 |
+
f2d = cls.get_by_file_id(file_id)
|
| 77 |
+
if f2d:
|
| 78 |
+
file = File.get_by_id(f2d[0].file_id)
|
| 79 |
+
if file.source_type == FileSource.LOCAL:
|
| 80 |
+
return file.parent_id, file.location
|
| 81 |
+
doc_id = f2d[0].document_id
|
| 82 |
+
|
| 83 |
+
assert doc_id, "please specify doc_id"
|
| 84 |
+
e, doc = DocumentService.get_by_id(doc_id)
|
| 85 |
+
return doc.kb_id, doc.location
|
api/db/services/file_service.py
CHANGED
|
@@ -16,10 +16,12 @@
|
|
| 16 |
from flask_login import current_user
|
| 17 |
from peewee import fn
|
| 18 |
|
| 19 |
-
from api.db import FileType
|
| 20 |
from api.db.db_models import DB, File2Document, Knowledgebase
|
| 21 |
from api.db.db_models import File, Document
|
| 22 |
from api.db.services.common_service import CommonService
|
|
|
|
|
|
|
| 23 |
from api.utils import get_uuid
|
| 24 |
|
| 25 |
|
|
@@ -33,10 +35,15 @@ class FileService(CommonService):
|
|
| 33 |
if keywords:
|
| 34 |
files = cls.model.select().where(
|
| 35 |
(cls.model.tenant_id == tenant_id)
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
| 37 |
else:
|
| 38 |
-
files = cls.model.select().where((cls.model.tenant_id == tenant_id)
|
| 39 |
-
|
|
|
|
|
|
|
| 40 |
count = files.count()
|
| 41 |
if desc:
|
| 42 |
files = files.order_by(cls.model.getter_by(orderby).desc())
|
|
@@ -135,29 +142,69 @@ class FileService(CommonService):
|
|
| 135 |
@classmethod
|
| 136 |
@DB.connection_context()
|
| 137 |
def get_root_folder(cls, tenant_id):
|
| 138 |
-
file
|
| 139 |
-
cls.model.parent_id == cls.model.id)
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
file = {
|
| 143 |
-
"id": file_id,
|
| 144 |
-
"parent_id": file_id,
|
| 145 |
-
"tenant_id": tenant_id,
|
| 146 |
-
"created_by": tenant_id,
|
| 147 |
-
"name": "/",
|
| 148 |
-
"type": FileType.FOLDER.value,
|
| 149 |
-
"size": 0,
|
| 150 |
-
"location": "",
|
| 151 |
-
}
|
| 152 |
-
cls.save(**file)
|
| 153 |
-
else:
|
| 154 |
-
file_id = file[0].id
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
return file
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
@classmethod
|
| 162 |
@DB.connection_context()
|
| 163 |
def get_parent_folder(cls, file_id):
|
|
@@ -241,3 +288,20 @@ class FileService(CommonService):
|
|
| 241 |
dfs(folder_id)
|
| 242 |
return size
|
| 243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
from flask_login import current_user
|
| 17 |
from peewee import fn
|
| 18 |
|
| 19 |
+
from api.db import FileType, KNOWLEDGEBASE_FOLDER_NAME, FileSource
|
| 20 |
from api.db.db_models import DB, File2Document, Knowledgebase
|
| 21 |
from api.db.db_models import File, Document
|
| 22 |
from api.db.services.common_service import CommonService
|
| 23 |
+
from api.db.services.document_service import DocumentService
|
| 24 |
+
from api.db.services.file2document_service import File2DocumentService
|
| 25 |
from api.utils import get_uuid
|
| 26 |
|
| 27 |
|
|
|
|
| 35 |
if keywords:
|
| 36 |
files = cls.model.select().where(
|
| 37 |
(cls.model.tenant_id == tenant_id)
|
| 38 |
+
(cls.model.parent_id == pf_id),
|
| 39 |
+
(fn.LOWER(cls.model.name).like(f"%%{keywords.lower()}%%")),
|
| 40 |
+
~(cls.model.id == pf_id)
|
| 41 |
+
)
|
| 42 |
else:
|
| 43 |
+
files = cls.model.select().where((cls.model.tenant_id == tenant_id),
|
| 44 |
+
(cls.model.parent_id == pf_id),
|
| 45 |
+
~(cls.model.id == pf_id)
|
| 46 |
+
)
|
| 47 |
count = files.count()
|
| 48 |
if desc:
|
| 49 |
files = files.order_by(cls.model.getter_by(orderby).desc())
|
|
|
|
| 142 |
@classmethod
|
| 143 |
@DB.connection_context()
|
| 144 |
def get_root_folder(cls, tenant_id):
|
| 145 |
+
for file in cls.model.select().where((cls.model.tenant_id == tenant_id),
|
| 146 |
+
(cls.model.parent_id == cls.model.id)
|
| 147 |
+
):
|
| 148 |
+
return file.to_dict()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
+
file_id = get_uuid()
|
| 151 |
+
file = {
|
| 152 |
+
"id": file_id,
|
| 153 |
+
"parent_id": file_id,
|
| 154 |
+
"tenant_id": tenant_id,
|
| 155 |
+
"created_by": tenant_id,
|
| 156 |
+
"name": "/",
|
| 157 |
+
"type": FileType.FOLDER.value,
|
| 158 |
+
"size": 0,
|
| 159 |
+
"location": "",
|
| 160 |
+
}
|
| 161 |
+
cls.save(**file)
|
| 162 |
+
return file
|
| 163 |
+
|
| 164 |
+
@classmethod
|
| 165 |
+
@DB.connection_context()
|
| 166 |
+
def get_kb_folder(cls, tenant_id):
|
| 167 |
+
for root in cls.model.select().where(cls.model.tenant_id == tenant_id and
|
| 168 |
+
cls.model.parent_id == cls.model.id):
|
| 169 |
+
for folder in cls.model.select().where(cls.model.tenant_id == tenant_id and
|
| 170 |
+
cls.model.parent_id == root.id and
|
| 171 |
+
cls.model.name == KNOWLEDGEBASE_FOLDER_NAME
|
| 172 |
+
):
|
| 173 |
+
return folder.to_dict()
|
| 174 |
+
assert False, "Can't find the KB folder. Database init error."
|
| 175 |
+
|
| 176 |
+
@classmethod
|
| 177 |
+
@DB.connection_context()
|
| 178 |
+
def new_a_file_from_kb(cls, tenant_id, name, parent_id, ty=FileType.FOLDER.value, size=0, location=""):
|
| 179 |
+
for file in cls.query(tenant_id=tenant_id, parent_id=parent_id, name=name):
|
| 180 |
+
return file.to_dict()
|
| 181 |
+
file = {
|
| 182 |
+
"id": get_uuid(),
|
| 183 |
+
"parent_id": parent_id,
|
| 184 |
+
"tenant_id": tenant_id,
|
| 185 |
+
"created_by": tenant_id,
|
| 186 |
+
"name": name,
|
| 187 |
+
"type": ty,
|
| 188 |
+
"size": size,
|
| 189 |
+
"location": location,
|
| 190 |
+
"source_type": FileSource.KNOWLEDGEBASE
|
| 191 |
+
}
|
| 192 |
+
cls.save(**file)
|
| 193 |
return file
|
| 194 |
|
| 195 |
+
@classmethod
|
| 196 |
+
@DB.connection_context()
|
| 197 |
+
def init_knowledgebase_docs(cls, root_id, tenant_id):
|
| 198 |
+
for _ in cls.model.select().where((cls.model.name == KNOWLEDGEBASE_FOLDER_NAME)\
|
| 199 |
+
& (cls.model.parent_id == root_id)):
|
| 200 |
+
return
|
| 201 |
+
folder = cls.new_a_file_from_kb(tenant_id, KNOWLEDGEBASE_FOLDER_NAME, root_id)
|
| 202 |
+
|
| 203 |
+
for kb in Knowledgebase.select(*[Knowledgebase.id, Knowledgebase.name]).where(Knowledgebase.tenant_id==tenant_id):
|
| 204 |
+
kb_folder = cls.new_a_file_from_kb(tenant_id, kb.name, folder["id"])
|
| 205 |
+
for doc in DocumentService.query(kb_id=kb.id):
|
| 206 |
+
FileService.add_file_from_kb(doc.to_dict(), kb_folder["id"], tenant_id)
|
| 207 |
+
|
| 208 |
@classmethod
|
| 209 |
@DB.connection_context()
|
| 210 |
def get_parent_folder(cls, file_id):
|
|
|
|
| 288 |
dfs(folder_id)
|
| 289 |
return size
|
| 290 |
|
| 291 |
+
@classmethod
|
| 292 |
+
@DB.connection_context()
|
| 293 |
+
def add_file_from_kb(cls, doc, kb_folder_id, tenant_id):
|
| 294 |
+
for _ in File2DocumentService.get_by_document_id(doc["id"]): return
|
| 295 |
+
file = {
|
| 296 |
+
"id": get_uuid(),
|
| 297 |
+
"parent_id": kb_folder_id,
|
| 298 |
+
"tenant_id": tenant_id,
|
| 299 |
+
"created_by": tenant_id,
|
| 300 |
+
"name": doc["name"],
|
| 301 |
+
"type": doc["type"],
|
| 302 |
+
"size": doc["size"],
|
| 303 |
+
"location": doc["location"],
|
| 304 |
+
"source_type": FileSource.KNOWLEDGEBASE
|
| 305 |
+
}
|
| 306 |
+
cls.save(**file)
|
| 307 |
+
File2DocumentService.save(**{"id": get_uuid(), "file_id": file["id"], "document_id": doc["id"]})
|
docker/entrypoint.sh
CHANGED
|
@@ -8,14 +8,14 @@ PY=/root/miniconda3/envs/py11/bin/python
|
|
| 8 |
|
| 9 |
function task_exe(){
|
| 10 |
while [ 1 -eq 1 ];do
|
| 11 |
-
$PY rag/svr/task_executor.py
|
| 12 |
done
|
| 13 |
}
|
| 14 |
|
| 15 |
WS=1
|
| 16 |
for ((i=0;i<WS;i++))
|
| 17 |
do
|
| 18 |
-
task_exe
|
| 19 |
done
|
| 20 |
|
| 21 |
while [ 1 -eq 1 ];do
|
|
|
|
| 8 |
|
| 9 |
function task_exe(){
|
| 10 |
while [ 1 -eq 1 ];do
|
| 11 |
+
$PY rag/svr/task_executor.py ;
|
| 12 |
done
|
| 13 |
}
|
| 14 |
|
| 15 |
WS=1
|
| 16 |
for ((i=0;i<WS;i++))
|
| 17 |
do
|
| 18 |
+
task_exe &
|
| 19 |
done
|
| 20 |
|
| 21 |
while [ 1 -eq 1 ];do
|
rag/svr/task_executor.py
CHANGED
|
@@ -109,6 +109,7 @@ def collect():
|
|
| 109 |
if not msg: return pd.DataFrame()
|
| 110 |
|
| 111 |
if TaskService.do_cancel(msg["id"]):
|
|
|
|
| 112 |
return pd.DataFrame()
|
| 113 |
tasks = TaskService.get_tasks(msg["id"])
|
| 114 |
assert tasks, "{} empty task!".format(msg["id"])
|
|
|
|
| 109 |
if not msg: return pd.DataFrame()
|
| 110 |
|
| 111 |
if TaskService.do_cancel(msg["id"]):
|
| 112 |
+
cron_logger.info("Task {} has been canceled.".format(msg["id"]))
|
| 113 |
return pd.DataFrame()
|
| 114 |
tasks = TaskService.get_tasks(msg["id"])
|
| 115 |
assert tasks, "{} empty task!".format(msg["id"])
|
requirements_dev.txt
CHANGED
|
@@ -78,8 +78,6 @@ pycryptodomex==3.20.0
|
|
| 78 |
pydantic==2.6.2
|
| 79 |
pydantic_core==2.16.3
|
| 80 |
PyJWT==2.8.0
|
| 81 |
-
PyMuPDF==1.23.25
|
| 82 |
-
PyMuPDFb==1.23.22
|
| 83 |
PyMySQL==1.1.0
|
| 84 |
PyPDF2==3.0.1
|
| 85 |
pypdfium2==4.27.0
|
|
|
|
| 78 |
pydantic==2.6.2
|
| 79 |
pydantic_core==2.16.3
|
| 80 |
PyJWT==2.8.0
|
|
|
|
|
|
|
| 81 |
PyMySQL==1.1.0
|
| 82 |
PyPDF2==3.0.1
|
| 83 |
pypdfium2==4.27.0
|