Spaces:
Runtime error
Runtime error
| import logging | |
| from enum import Enum | |
| def configure_logging(level=logging.INFO, config_file_path='./common.log'): | |
| formatter = logging.Formatter("[%(asctime)s.%(msecs)03d] %(module)30s:%(lineno)4d %(levelname)-7s - %(message)s") | |
| console_handler = logging.StreamHandler() | |
| console_handler.setLevel(level) | |
| console_handler.setFormatter(formatter) | |
| logging.basicConfig( | |
| filename=config_file_path, | |
| filemode="a", | |
| level=level, | |
| datefmt="%Y-%m-%d %H:%M:%S", | |
| format="[%(asctime)s.%(msecs)03d] %(module)30s:%(lineno)4d %(levelname)-7s - %(message)s", | |
| handlers=[console_handler] | |
| ) | |
| def get_elastic_query(query): | |
| return { | |
| "query": { | |
| "multi_match": { | |
| "query": f"{query}", | |
| "fields": ["text"], | |
| "fuzziness": "AUTO", | |
| "analyzer": "russian", | |
| } | |
| } | |
| } | |
| def get_elastic_people_query(query): | |
| has_business_curator = ( | |
| "бизнес куратор" in query.lower() | |
| or "бизнес-куратор" in query.lower() | |
| or "куратор" in query.lower() | |
| ) | |
| business_curator_boost = 30 if has_business_curator else 15 | |
| return { | |
| "query": { | |
| "bool": { | |
| "should": [ | |
| { | |
| "multi_match": { | |
| "query": f"{query}", | |
| "fields": ["person_name^3"], | |
| "fuzziness": "AUTO", | |
| "analyzer": "standard", | |
| } | |
| }, | |
| { | |
| "nested": { | |
| "path": "business_processes", | |
| "query": { | |
| "multi_match": { | |
| "query": f"{query}", | |
| "fields": [ | |
| "business_processes.production_activities_section", | |
| "business_processes.processes_name", | |
| ], | |
| "fuzziness": "AUTO", | |
| "analyzer": "standard", | |
| } | |
| }, | |
| } | |
| }, | |
| { | |
| "nested": { | |
| "path": "organizatinal_structure", | |
| "query": { | |
| "multi_match": { | |
| "query": f"{query}", | |
| "fields": ["organizatinal_structure.position^2"], | |
| "fuzziness": "AUTO", | |
| "analyzer": "standard", | |
| } | |
| }, | |
| } | |
| }, | |
| { | |
| "nested": { | |
| "path": "business_curator", | |
| "query": { | |
| "multi_match": { | |
| "query": f"{query}", | |
| "fields": [ | |
| f"business_curator.company_name^{business_curator_boost}" | |
| ], | |
| "fuzziness": "AUTO", | |
| "analyzer": "standard", | |
| } | |
| }, | |
| } | |
| }, | |
| ] | |
| } | |
| }, | |
| "min_score": 13.0, | |
| } | |
| def get_elastic_group_query(query): | |
| return { | |
| "query": { | |
| "bool": { | |
| "should": [ | |
| { | |
| "multi_match": { | |
| "query": f"{query}", | |
| "fields": ["group_name"], | |
| "fuzziness": "AUTO", | |
| "analyzer": "standard", | |
| } | |
| }, | |
| { | |
| "multi_match": { | |
| "query": "персонального состава Персональный состав Комитета ПАО ГМК Норильский никель Рабочей группы", | |
| "fields": ["group_name"], | |
| "operator": "or", | |
| "boost": 0.1, | |
| } | |
| }, | |
| ] | |
| } | |
| }, | |
| "min_score": 7.5, | |
| } | |
| def get_elastic_rocks_nn_query(query): | |
| return { | |
| "query": { | |
| "function_score": { | |
| "query": { | |
| "multi_match": { | |
| "query": f"{query}", | |
| "fields": ["division_name", "division_name_2", "company_name"], | |
| "fuzziness": "AUTO", | |
| "analyzer": "custom_analyzer", | |
| } | |
| }, | |
| "functions": [{"filter": {"term": {"_id": "3"}}, "weight": 0.5}], | |
| "boost_mode": "multiply", | |
| } | |
| }, | |
| "min_score": 0.5, | |
| } | |
| def get_elastic_segmentation_query(query): | |
| return { | |
| "query": { | |
| "bool": { | |
| "should": [ | |
| { | |
| "multi_match": { | |
| "query": f"{query}", | |
| "fields": [ | |
| "segmentation_model", | |
| "segmentation_model2", | |
| "company_name", | |
| ], | |
| "fuzziness": "AUTO", | |
| "analyzer": "russian", | |
| } | |
| }, | |
| { | |
| "multi_match": { | |
| "query": "модели сегментации модель сегментации", | |
| "fields": ["segmentation_model", "segmentation_model2"], | |
| "operator": "or", | |
| "boost": 0.1, | |
| } | |
| }, | |
| ] | |
| } | |
| }, | |
| "min_score": 1.0, | |
| } | |
| def get_elastic_abbreviation_query(query): | |
| return { | |
| "query": { | |
| "multi_match": { | |
| "query": f"{query}", | |
| "fuzziness": "AUTO", | |
| "fields": ["text"], | |
| "analyzer": "russian", | |
| } | |
| } | |
| } | |
| def combine_answer(answer): | |
| """ | |
| Args: | |
| answer: | |
| Returns: | |
| """ | |
| answer_combined = {} | |
| indexes = [] | |
| for key in answer: | |
| if key != 'people_search': | |
| for answer_key in answer[key]: | |
| answer_value = answer[key][answer_key] | |
| filename_i = answer_value["doc_name"] | |
| title_i = answer_value["title"] | |
| if ( | |
| filename_i in answer_combined | |
| and answer_value['index_answer'] not in indexes | |
| ): | |
| answer_combined[filename_i]["chunks"].append(answer_value) | |
| else: | |
| answer_combined[filename_i] = { | |
| "filename": filename_i, | |
| "title": title_i, | |
| "chunks": [answer_value], | |
| } | |
| indexes.append(answer_value['index_answer']) | |
| return list(answer_combined.values()) | |
| class TypeQuestion(Enum): | |
| TYPE_ONE = '[1]' | |
| TYPE_TWO = '[2]' | |
| TYPE_THREE = '[3]' | |
| def get_source_format(filename: str) -> str: | |
| """ | |
| Получает формат файла из имени файла. | |
| """ | |
| format_ = filename.split('.')[-1] | |
| return format_.upper() | |