Spaces:
danilonovais
/
Running on CPU Upgrade

File size: 3,212 Bytes
a7a6ad0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
// Node 20 script: sync knowledge repos into Supabase with embeddings
// Requires: OPENAI_API_KEY, SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY, KNOWLEDGE_REPO_URL, KNOWLEDGE_DIRS
import { createClient } from '@supabase/supabase-js';
import crypto from 'node:crypto';
import { execSync } from 'node:child_process';
import fs from 'node:fs';
import path from 'node:path';
import process from 'node:process';
import OpenAI from 'openai';

const {
  OPENAI_API_KEY,
  SUPABASE_URL,
  SUPABASE_SERVICE_ROLE_KEY,
  KNOWLEDGE_REPO_URL,
  KNOWLEDGE_DIRS = 'projects/n8n,projects/videos-e-animacoes,projects/midjorney-prompt',
} = process.env;

if (!SUPABASE_URL || !SUPABASE_SERVICE_ROLE_KEY || !KNOWLEDGE_REPO_URL) {
  console.error('Missing env SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY or KNOWLEDGE_REPO_URL');
  process.exit(1);
}

const openai = OPENAI_API_KEY ? new OpenAI({ apiKey: OPENAI_API_KEY }) : null;
const supabase = createClient(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY);

const workdir = path.resolve('knowledge');
if (!fs.existsSync(workdir)) fs.mkdirSync(workdir, { recursive: true });

const repoDir = path.join(workdir, 'CHATGPT-knowledge-base');
if (!fs.existsSync(repoDir)) {
  console.log('Cloning KB repo...');
  execSync(`git clone --depth 1 ${KNOWLEDGE_REPO_URL} ${repoDir}`, { stdio: 'inherit' });
} else {
  console.log('Pulling KB repo...');
  execSync(`git -C ${repoDir} pull`, { stdio: 'inherit' });
}

const dirs = KNOWLEDGE_DIRS.split(',').map(s => s.trim());

function sha256(s){ return crypto.createHash('sha256').update(s).digest('hex'); }

async function upsertDoc(pth, content) {
  const title = path.basename(pth);
  const hash = sha256(content);

  // Upsert document
  const { data: doc, error: docErr } = await supabase
    .from('knowledge.documents')
    .upsert({ path: pth, title, content, hash }, { onConflict: 'path' })
    .select()
    .single();
  if (docErr) throw docErr;

  if (openai) {
    // Embedding
    const input = content.slice(0, 12000); // truncate
    const emb = await openai.embeddings.create({
      model: 'text-embedding-3-large',
      input
    });
    const vector = emb.data[0].embedding;

    const { error: embErr } = await supabase
      .from('knowledge.embeddings')
      .upsert({ doc_id: doc.id, embedding: vector, model: 'text-embedding-3-large' });
    if (embErr) throw embErr;
  } else {
    console.warn('OPENAI_API_KEY not set, skipping embeddings for', pth);
  }
}

async function main() {
  for (const rel of dirs) {
    const abs = path.join(repoDir, rel);
    if (!fs.existsSync(abs)) {
      console.warn('Skip missing dir:', rel);
      continue;
    }
    const entries = await fs.promises.readdir(abs, { withFileTypes: true });
    for (const ent of entries) {
      const full = path.join(abs, ent.name);
      if (ent.isDirectory()) continue;
      if (!/\.(md|markdown|json|txt)$/i.test(ent.name)) continue;

      const content = await fs.promises.readFile(full, 'utf8');
      const repoRelPath = path.relative(repoDir, full);
      console.log('Ingest:', repoRelPath);
      await upsertDoc(repoRelPath, content);
    }
  }
  console.log('Sync complete');
}

main().catch(err => { console.error(err); process.exit(1); });