Spaces:
Sleeping
Sleeping
Update settings.yaml
Browse files- settings.yaml +110 -5
settings.yaml
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
|
|
|
|
|
| 1 |
llm:
|
| 2 |
api_key: "3bf18984-b4df-49ba-a30b-6cbae3964b08"
|
| 3 |
type: openai_chat
|
| 4 |
model_supports_json: true
|
| 5 |
-
model:
|
| 6 |
-
api_base:
|
| 7 |
# max_tokens: 10000 # Adjusted based on Claude 3 Haiku's typical context window
|
| 8 |
request_timeout: 30
|
| 9 |
tokens_per_minute: 100000
|
|
@@ -11,9 +13,112 @@ llm:
|
|
| 11 |
max_retry_wait: 5
|
| 12 |
temperature: 0.1
|
| 13 |
|
| 14 |
-
|
| 15 |
async_mode: threaded
|
| 16 |
llm:
|
|
|
|
| 17 |
type: openai_embedding
|
| 18 |
-
model:
|
| 19 |
-
api_base:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
encoding_model: cl100k_base
|
| 2 |
+
skip_workflows: []
|
| 3 |
llm:
|
| 4 |
api_key: "3bf18984-b4df-49ba-a30b-6cbae3964b08"
|
| 5 |
type: openai_chat
|
| 6 |
model_supports_json: true
|
| 7 |
+
model: claude-3-5-sonnet-20240620
|
| 8 |
+
api_base: http://localhost:8000/v1
|
| 9 |
# max_tokens: 10000 # Adjusted based on Claude 3 Haiku's typical context window
|
| 10 |
request_timeout: 30
|
| 11 |
tokens_per_minute: 100000
|
|
|
|
| 13 |
max_retry_wait: 5
|
| 14 |
temperature: 0.1
|
| 15 |
|
| 16 |
+
embeddings:
|
| 17 |
async_mode: threaded
|
| 18 |
llm:
|
| 19 |
+
api_key: "EMBEDDING_API_KEY"
|
| 20 |
type: openai_embedding
|
| 21 |
+
model: mixedbread-ai/mxbai-embed-large-v1
|
| 22 |
+
api_base: http://localhost:7997
|
| 23 |
+
|
| 24 |
+
chunks:
|
| 25 |
+
size: 1200
|
| 26 |
+
overlap: 100
|
| 27 |
+
group_by_columns: [id] # by default, we don't allow chunks to cross documents
|
| 28 |
+
|
| 29 |
+
input:
|
| 30 |
+
type: file # or blob
|
| 31 |
+
file_type: text # or csv
|
| 32 |
+
base_dir: "input"
|
| 33 |
+
file_encoding: utf-8
|
| 34 |
+
file_pattern: ".*\\.txt$"
|
| 35 |
+
|
| 36 |
+
cache:
|
| 37 |
+
type: file # or blob
|
| 38 |
+
base_dir: "cache"
|
| 39 |
+
# connection_string: <azure_blob_storage_connection_string>
|
| 40 |
+
# container_name: <azure_blob_storage_container_name>
|
| 41 |
+
|
| 42 |
+
storage:
|
| 43 |
+
type: file # or blob
|
| 44 |
+
base_dir: "output/${timestamp}/artifacts"
|
| 45 |
+
# connection_string: <azure_blob_storage_connection_string>
|
| 46 |
+
# container_name: <azure_blob_storage_container_name>
|
| 47 |
+
|
| 48 |
+
reporting:
|
| 49 |
+
type: file # or console, blob
|
| 50 |
+
base_dir: "output/${timestamp}/reports"
|
| 51 |
+
# connection_string: <azure_blob_storage_connection_string>
|
| 52 |
+
# container_name: <azure_blob_storage_container_name>
|
| 53 |
+
|
| 54 |
+
entity_extraction:
|
| 55 |
+
## llm: override the global llm settings for this task
|
| 56 |
+
## parallelization: override the global parallelization settings for this task
|
| 57 |
+
## async_mode: override the global async_mode settings for this task
|
| 58 |
+
prompt: "prompts/entity_extraction.txt"
|
| 59 |
+
entity_types: [organization,person,geo,event]
|
| 60 |
+
max_gleanings: 1
|
| 61 |
+
|
| 62 |
+
summarize_descriptions:
|
| 63 |
+
## llm: override the global llm settings for this task
|
| 64 |
+
## parallelization: override the global parallelization settings for this task
|
| 65 |
+
## async_mode: override the global async_mode settings for this task
|
| 66 |
+
prompt: "prompts/summarize_descriptions.txt"
|
| 67 |
+
max_length: 500
|
| 68 |
+
|
| 69 |
+
claim_extraction:
|
| 70 |
+
## llm: override the global llm settings for this task
|
| 71 |
+
## parallelization: override the global parallelization settings for this task
|
| 72 |
+
## async_mode: override the global async_mode settings for this task
|
| 73 |
+
# enabled: true
|
| 74 |
+
prompt: "prompts/claim_extraction.txt"
|
| 75 |
+
description: "Any claims or facts that could be relevant to information discovery."
|
| 76 |
+
max_gleanings: 1
|
| 77 |
+
|
| 78 |
+
community_reports:
|
| 79 |
+
## llm: override the global llm settings for this task
|
| 80 |
+
## parallelization: override the global parallelization settings for this task
|
| 81 |
+
## async_mode: override the global async_mode settings for this task
|
| 82 |
+
prompt: "prompts/community_report.txt"
|
| 83 |
+
max_length: 2000
|
| 84 |
+
max_input_length: 8000
|
| 85 |
+
|
| 86 |
+
cluster_graph:
|
| 87 |
+
max_cluster_size: 10
|
| 88 |
+
|
| 89 |
+
embed_graph:
|
| 90 |
+
enabled: false # if true, will generate node2vec embeddings for nodes
|
| 91 |
+
# num_walks: 10
|
| 92 |
+
# walk_length: 40
|
| 93 |
+
# window_size: 2
|
| 94 |
+
# iterations: 3
|
| 95 |
+
# random_seed: 597832
|
| 96 |
+
|
| 97 |
+
umap:
|
| 98 |
+
enabled: false # if true, will generate UMAP embeddings for nodes
|
| 99 |
+
|
| 100 |
+
snapshots:
|
| 101 |
+
graphml: false
|
| 102 |
+
raw_entities: false
|
| 103 |
+
top_level_nodes: false
|
| 104 |
+
|
| 105 |
+
local_search:
|
| 106 |
+
# text_unit_prop: 0.5
|
| 107 |
+
# community_prop: 0.1
|
| 108 |
+
# conversation_history_max_turns: 5
|
| 109 |
+
# top_k_mapped_entities: 10
|
| 110 |
+
# top_k_relationships: 10
|
| 111 |
+
# llm_temperature: 0 # temperature for sampling
|
| 112 |
+
# llm_top_p: 1 # top-p sampling
|
| 113 |
+
# llm_n: 1 # Number of completions to generate
|
| 114 |
+
# max_tokens: 12000
|
| 115 |
+
|
| 116 |
+
global_search:
|
| 117 |
+
# llm_temperature: 0 # temperature for sampling
|
| 118 |
+
# llm_top_p: 1 # top-p sampling
|
| 119 |
+
# llm_n: 1 # Number of completions to generate
|
| 120 |
+
# max_tokens: 12000
|
| 121 |
+
# data_max_tokens: 12000
|
| 122 |
+
# map_max_tokens: 1000
|
| 123 |
+
# reduce_max_tokens: 2000
|
| 124 |
+
# concurrency: 32
|