codesearchBase / app.py
Forrest99's picture
Update app.py
11f129c verified
raw
history blame
16.5 kB
from flask import Flask, request, jsonify, render_template_string
from sentence_transformers import SentenceTransformer, util
import logging
import sys
import signal
# 初始化 Flask 应用
app = Flask(__name__)
# 配置日志,级别设为 INFO
logging.basicConfig(level=logging.INFO)
app.logger = logging.getLogger("CodeSearchAPI")
# 预定义代码片段
CODE_SNIPPETS = [
"echo 'Hello, World!';",
"function add($a, $b) { return $a + $b; }",
"$randomNumber = rand();",
"function isEven($num) { return $num % 2 == 0; }",
"strlen('example');",
"date('Y-m-d');",
"file_exists('example.txt');",
"file_get_contents('example.txt');",
"file_put_contents('example.txt', 'Hello, World!');",
"date('H:i:s');",
"strtoupper('example');",
"strtolower('EXAMPLE');",
"strrev('example');",
"count([1, 2, 3]);",
"max([1, 2, 3]);",
"min([1, 2, 3]);",
"sort([3, 1, 2]);",
"array_merge([1, 2], [3, 4]);",
"array_splice($array, $offset, $length);",
"empty([]);",
"substr_count('example', 'e');",
"strpos('example', 'amp') !== false;",
"strval(123);",
"intval('123');",
"is_numeric('123');",
"array_search('value', $array);",
"$array = [];",
"array_reverse([1, 2, 3]);",
"array_unique([1, 2, 2, 3]);",
"in_array('value', $array);",
"$array = ['key' => 'value'];",
"$array['new_key'] = 'new_value';",
"unset($array['key']);",
"array_keys($array);",
"array_values($array);",
"array_merge($array1, $array2);",
"empty($array);",
"$array['key'];",
"array_key_exists('key', $array);",
"$array = [];",
"count(file('example.txt'));",
"file_put_contents('example.txt', implode(PHP_EOL, $array));",
"file('example.txt', FILE_IGNORE_NEW_LINES);",
"str_word_count(file_get_contents('example.txt'));",
"function isLeapYear($year) { return ($year % 4 == 0 && ($year % 100 != 0 || $year % 400 == 0)); }",
"date('Y-m-d H:i:s');",
"(strtotime('2023-12-31') - strtotime('2023-01-01')) / (60 * 60 * 24);",
"getcwd();",
"scandir('.');",
"mkdir('new_directory');",
"rmdir('directory');",
"is_file('example.txt');",
"is_dir('directory');",
"filesize('example.txt');",
"rename('old.txt', 'new.txt');",
"copy('source.txt', 'destination.txt');",
"rename('source.txt', 'destination.txt');",
"unlink('example.txt');",
"getenv('PATH');",
"putenv('PATH=/new/path');",
"exec('start https://example.com');",
"file_get_contents('https://example.com');",
"json_decode('{\"key\":\"value\"}', true);",
"file_put_contents('example.json', json_encode($data));",
"json_decode(file_get_contents('example.json'), true);",
"implode(',', $array);",
"explode(',', 'a,b,c');",
"implode(PHP_EOL, $array);",
"explode(' ', 'a b c');",
"explode(',', 'a,b,c');",
"str_split('example');",
"str_replace('old', 'new', 'old text');",
"trim(' example ');",
"preg_replace('/[^a-zA-Z0-9]/', '', 'example!');",
"empty('');",
"strrev('example') == 'example';",
"fputcsv($file, $array);",
"array_map('str_getcsv', file('example.csv'));",
"count(file('example.csv'));",
"shuffle($array);",
"$array[array_rand($array)];",
"array_rand($array, $num);",
"rand(1, 6);",
"rand(0, 1);",
"substr(str_shuffle('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), 0, 8);",
"printf('#%06X', mt_rand(0, 0xFFFFFF));",
"uniqid();",
"class Example {}",
"$example = new Example();",
"class Example { function method() {} }",
"class Example { public $property; }",
"class Child extends Parent {}",
"class Child extends Parent { function method() {} }",
"Example::method();",
"Example::staticMethod();",
"is_object($example);",
"get_object_vars($example);",
"$example->property = 'value';",
"unset($example->property);",
"try{foo();}catch(e){}",
"throw new Error('CustomError')",
"""try{foo();}catch(e){const info=e.message;}""",
"console.error(err)",
"const timer={start(){this.s=Date.now()},stop(){return Date.now()-this.s}}",
"const runtime=(s)=>Date.now()-s",
"""const progress=(i,n)=>process.stdout.write(Math.floor(i/n100)+'%\r')""",
"const delay=(ms)=>new Promise(r=>setTimeout(r,ms))",
"const f=(x)=>x2",
"const m=arr.map(x=>x2)",
"const f2=arr.filter(x=>x>0)",
"const r=arr.reduce((a,x)=>a+x,0)",
"const a=[1,2,3].map(x=>x)",
"const o={a:1,b:2};const d={k:v for([k,v] of Object.entries(o))}",
"const s=new Set([1,2,3]);const p=new Set(x for(x of s))",
"const inter=new Set([...a].filter(x=>b.has(x)))",
"const uni=new Set([...a,...b])",
"const diff=new Set([...a].filter(x=>!b.has(x)))",
"const noNone=list.filter(x=>x!=null)",
"""try{fs.openSync(path)}catch{}""",
"typeof x==='string'",
"const b=!!str",
"if(cond)doSomething()",
"while(cond){}",
"for(const x of arr){}",
"for(const k in obj){}",
"for(const c of str){}",
"for(...){if(cond)break}",
"for(...){if(cond)continue}",
"function fn(){}",
"function fn(a=1){}",
"function fn(){return [1,2]}",
"function fn(...a){}",
"function fn(kwargs){const{a,b}=kwargs}",
"""function timed(fn){return(...a)=>{const s=Date.now();const r=fn(...a);console.log(Date.now()-s);return r}}""",
"""const deco=fn=>(...a)=>fn(...a)""",
"""const memo=fn=>{const c={};return x=>c[x]||=(fn(x))}""",
"functiongen(){yield 1;yield 2}",
"const g=gen();",
"const it={i:0,next(){return this.i<2?{value:this.i++,done:false}:{done:true}}}",
"for(const x of it){}",
"for(const [i,x] of arr.entries()){}",
"const z=arr1.map((v,i)=>[v,arr2[i]])",
"const dict=Object.fromEntries(arr1.map((v,i)=>[v,arr2[i]]))",
"JSON.stringify(arr1)===JSON.stringify(arr2)",
"JSON.stringify(obj1)===JSON.stringify(obj2)",
"JSON.stringify(new Set(a))===JSON.stringify(new Set(b))",
"const uniq=[...new Set(arr)]",
"set.clear()",
"set.size===0",
"set.add(x)",
"set.delete(x)",
"set.has(x)",
"set.size",
"const hasInt=([...a].some(x=>b.has(x)))",
"arr1.every(x=>arr2.includes(x))",
"str.includes(sub)",
"str[0]",
"str[str.length-1]",
"""const isText=path=>['.txt','.md'].includes(require('path').extname(path))""",
"""const isImage=path=>['.png','.jpg','.jpeg','.gif'].includes(require('path').extname(path))""",
"Math.round(n)",
"Math.ceil(n)",
"Math.floor(n)",
"n.toFixed(2)",
"""const randStr=(l)=>[...Array(l)].map(()=>Math.random().toString(36).charAt(2)).join('')""",
"const exists=require('fs').existsSync(path)",
"""const walk=(d)=>require('fs').readdirSync(d).flatMap(f=>{const p=require('path').join(d,f);return require('fs').statSync(p).isDirectory()?walk(p):p})""",
"""const ext=require('path').extname(fp)""",
"""const name=require('path').basename(fp)""",
"""const full=require('path').resolve(fp)""",
"process.version",
"process.platform",
"require('os').cpus().length",
"require('os').totalmem()",
"""const d=require('os').diskUsageSync?require('os').diskUsageSync('/'):null""",
"require('os').networkInterfaces()",
"""require('dns').resolve('www.google.com',e=>console.log(!e))""",
"""require('https').get(url,res=>res.pipe(require('fs').createWriteStream(dest)))""",
"""const upload=async f=>Promise.resolve('ok')""",
"""require('https').request({method:'POST',host,u:path},()=>{}).end(data)""",
"""require('https').get(url+'?'+new URLSearchParams(params),res=>{})""",
"""const req=()=>fetch(url,{headers})""",
"""const jsdom=require('jsdom');const d=new jsdom.JSDOM(html)""",
"""const title=jsdom.JSDOM(html).window.document.querySelector('title').textContent""",
"""const links=[...d.window.document.querySelectorAll('a')].map(a=>a.href)""",
"""Promise.all(links.map(u=>fetch(u).then(r=>r.blob()).then(b=>require('fs').writeFileSync(require('path').basename(u),Buffer.from(b)))))""",
"""const freq=html.split(/\W+/).reduce((c,w)=>{c[w]=(c[w]||0)+1;return c},{})""",
"""const login=()=>fetch(url,{method:'POST',body:creds})""",
"""const text=html.replace(/<[^>]+>/g,'')""",
"""const emails=html.match(/[\w.-]+@[\w.-]+/g)""",
"""const phones=html.match(/\+?\d[\d -]{7,}\d/g)""",
"""const nums=html.match(/\d+/g)""",
"""const newHtml=html.replace(/foo/g,'bar')""",
"""const ok=/^\d{3}$/.test(str)""",
"""const noTags=html.replace(/<[^>]*>/g,'')""",
"""const enc=html.replace(/./g,c=>'&#'+c.charCodeAt(0)+';')""",
"""const dec=enc.replace(/&#(\d+);/g,(m,n)=>String.fromCharCode(n))""",
"""const {app,BrowserWindow}=require('electron');app.on('ready',()=>new BrowserWindow().loadURL('about:blank'))""",
"$button = new GtkButton('Click Me'); $window->add($button);",
"$button->connect('clicked', function() { echo 'Button clicked!'; });",
"$dialog = new GtkMessageDialog($window, GtkDialogFlags::MODAL, GtkMessageType::INFO, GtkButtonsType::OK, 'Hello!'); $dialog->run();",
"$entry = new GtkEntry(); $input = $entry->get_text();",
"$window->set_title('New Title');",
"$window->set_default_size(800, 600);",
"$window->set_position(Gtk::WIN_POS_CENTER);",
"$menubar = new GtkMenuBar(); $menu = new GtkMenu(); $menuitem = new GtkMenuItem('File'); $menuitem->set_submenu($menu); $menubar->append($menuitem); $window->add($menubar);",
"$combobox = new GtkComboBoxText(); $combobox->append_text('Option 1'); $combobox->append_text('Option 2'); $window->add($combobox);",
"$radiobutton1 = new GtkRadioButton('Option 1'); $radiobutton2 = new GtkRadioButton($radiobutton1, 'Option 2'); $window->add($radiobutton1); $window->add($radiobutton2);",
"$checkbutton = new GtkCheckButton('Check Me'); $window->add($checkbutton);",
"$image = new GtkImage('image.png'); $window->add($image);",
"exec('play audio.mp3');",
"exec('play video.mp4');",
"$current_time = exec('get_current_time_command');",
"exec('screenshot_command');",
"exec('record_screen_command');",
"$mouse_position = exec('get_mouse_position_command');",
"exec('simulate_keyboard_input_command');",
"exec('simulate_mouse_click_command');",
"time();",
"date('Y-m-d H:i:s', $timestamp);",
"strtotime('2023-10-01 12:00:00');",
"date('l');",
"date('t');",
"date('Y-01-01');",
"date('Y-12-31');",
"date('Y-m-01', strtotime('2023-10-01'));",
"date('Y-m-t', strtotime('2023-10-01'));",
"date('N') < 6;",
"date('N') >= 6;",
"date('H');",
"date('i');",
"date('s');",
"sleep(1);",
"floor(microtime(true) * 1000);",
"date('Y-m-d H:i:s', $time);",
"strtotime($time_string);",
"$thread = new Thread(); $thread->start();",
"$thread->sleep(1);",
"$threads = []; for ($i = 0; $i < 5; $i++) { $threads[$i] = new Thread(); $threads[$i]->start(); }",
"$thread->getName();",
"$thread->setDaemon(true);",
"$lock = new Mutex(); $lock->lock(); $lock->unlock();",
"$pid = pcntl_fork();",
"getmypid();",
"posix_kill($pid, 0);",
"$pids = []; for ($i = 0; $i < 5; $i++) { $pids[$i] = pcntl_fork(); if ($pids[$i] == 0) { exit; } }",
"$queue = new Threaded(); $queue->push('value');",
"$pipe = fopen('php://stdin', 'r'); fwrite($pipe, 'value'); fclose($pipe);",
"set_time_limit(0);",
"exec('ls');",
"exec('ls', $output);",
"exec('ls', $output, $status);",
"$status === 0;",
"__FILE__;",
"$argv;",
"$parser = new ArgParser(); $parser->addArgument('arg1'); $parser->parse($argv);",
"$parser->printHelp();",
"print_r(get_loaded_extensions());",
"exec('pip install package_name');",
"exec('pip uninstall package_name');",
"exec('pip show package_name | grep Version');",
"exec('python -m venv venv');",
"exec('pip list');",
"exec('pip install --upgrade package_name');",
"$db = new SQLite3('database.db');",
"$result = $db->query('SELECT * FROM table');",
"$db->exec(\"INSERT INTO table (column) VALUES ('value')\");",
"$db->exec(\"DELETE FROM table WHERE id = 1\");",
"$db->exec(\"UPDATE table SET column = 'new_value' WHERE id = 1\");",
"$result = $db->query('SELECT * FROM table'); while ($row = $result->fetchArray()) { print_r($row); }",
"$stmt = $db->prepare('SELECT * FROM table WHERE id = :id'); $stmt->bindValue(':id', 1); $result = $stmt->execute();",
"$db->close();",
"$db->exec('CREATE TABLE table (id INTEGER PRIMARY KEY, column TEXT)');",
"$db->exec('DROP TABLE table');",
"$result = $db->query(\"SELECT name FROM sqlite_master WHERE type='table' AND name='table'\");",
"$result = $db->query(\"SELECT name FROM sqlite_master WHERE type='table'\");",
"$model = new Model(); $model->save();",
"$model = Model::find(1);",
"$model = Model::find(1); $model->delete();",
"$model = Model::find(1); $model->column = 'new_value'; $model->save();",
"class Model extends ORM { protected static $table = 'table'; }",
"class ChildModel extends ParentModel {}",
"protected static $primaryKey = 'id';",
"protected static $unique = ['column'];",
"protected static $defaults = ['column' => 'default_value'];",
"$file = fopen('data.csv', 'w'); fputcsv($file, $data); fclose($file);",
"$excel = new ExcelWriter('data.xlsx'); $excel->write($data); $excel->close();",
"$json = json_encode($data); file_put_contents('data.json', $json);",
"$excel = new ExcelReader('data.xlsx'); $data = $excel->read(); $excel->close();",
"$excel = new ExcelWriter('merged.xlsx'); foreach ($files as $file) { $data = (new ExcelReader($file))->read(); $excel->write($data); } $excel->close();",
"$excel = new ExcelWriter('data.xlsx'); $excel->addSheet('New Sheet'); $excel->close();",
"$excel = new ExcelWriter('data.xlsx'); $excel->copyStyle('Sheet1', 'Sheet2'); $excel->close();",
"$excel = new ExcelWriter('data.xlsx'); $excel->setCellColor('A1', 'FF0000'); $excel->close();",
"$excel = new ExcelWriter('data.xlsx'); $excel->setFontStyle('A1', 'bold'); $excel->close();",
"$excel = new ExcelReader('data.xlsx'); $value = $excel->getCellValue('A1'); $excel->close();",
"$excel = new ExcelWriter('data.xlsx'); $excel->setCellValue('A1', 'Hello'); $excel->close();",
"list($width, $height) = getimagesize('image.png');",
"$image = new Imagick('image.png'); $image->resizeImage(100, 100, Imagick::FILTER_LANCZOS, 1); $image->writeImage('resized_image.png');"
]
# 全局服务状态
service_ready = False
# 优雅关闭处理
def handle_shutdown(signum, frame):
app.logger.info("收到终止信号,开始关闭...")
sys.exit(0)
signal.signal(signal.SIGTERM, handle_shutdown)
signal.signal(signal.SIGINT, handle_shutdown)
# 初始化模型和预计算编码
try:
app.logger.info("开始加载模型...")
model = SentenceTransformer(
"flax-sentence-embeddings/st-codesearch-distilroberta-base",
cache_folder="/model-cache"
)
# 预计算代码片段的编码(强制使用 CPU)
code_emb = model.encode(CODE_SNIPPETS, convert_to_tensor=True, device="cpu")
service_ready = True
app.logger.info("服务初始化完成")
except Exception as e:
app.logger.error("初始化失败: %s", str(e))
raise
# Hugging Face 健康检查端点,必须响应根路径
@app.route('/')
def hf_health_check():
# 如果请求接受 HTML,则返回一个简单的 HTML 页面(包含测试链接)
if request.accept_mimetypes.accept_html:
html = """
<h2>CodeSearch API</h2>
<p>服务状态:{{ status }}</p>
<p>你可以在地址栏输入 /search?query=你的查询 来测试接口</p>
"""
status = "ready" if service_ready else "initializing"
return render_template_string(html, status=status)
# 否则返回 JSON 格式的健康检查
if service_ready:
return jsonify({"status": "ready"}), 200
else:
return jsonify({"status": "initializing"}), 503
# 搜索 API 端点,同时支持 GET 和 POST 请求
@app.route('/search', methods=['GET', 'POST'])
def handle_search():
if not service_ready:
app.logger.info("服务未就绪")
return jsonify({"error": "服务正在初始化"}), 503
try:
# 根据请求方法提取查询内容
if request.method == 'GET':
query = request.args.get('query', '').strip()
else:
data = request.get_json() or {}
query = data.get('query', '').strip()
if not query:
app.logger.info("收到空的查询请求")
return jsonify({"error": "查询不能为空"}), 400
# 记录接收到的查询
app.logger.info("收到查询请求: %s", query)
# 对查询进行编码,并进行语义搜索
query_emb = model.encode(query, convert_to_tensor=True, device="cpu")
hits = util.semantic_search(query_emb, code_emb, top_k=1)[0]
best = hits[0]
result = {
"code": CODE_SNIPPETS[best['corpus_id']],
"score": round(float(best['score']), 4)
}
# 记录返回结果
app.logger.info("返回结果: %s", result)
return jsonify(result)
except Exception as e:
app.logger.error("请求处理失败: %s", str(e))
return jsonify({"error": "服务器内部错误"}), 500
if __name__ == "__main__":
# 本地测试用,Hugging Face Spaces 通常通过 gunicorn 启动
app.run(host='0.0.0.0', port=7860)