70 lines
3.2 KiB
SQL
70 lines
3.2 KiB
SQL
-- 迁移脚本:从PostgreSQL迁移到ParadeDB
|
||
-- 注意:在执行此脚本前,请确保已经备份了数据
|
||
|
||
-- 1. 导出数据(在PostgreSQL中执行)
|
||
-- pg_dump -U postgres -h localhost -p 5432 -d your_database > backup.sql
|
||
|
||
-- 2. 导入数据(在ParadeDB中执行)
|
||
-- psql -U postgres -h localhost -p 5432 -d your_database < backup.sql
|
||
|
||
-- 3. 验证数据
|
||
|
||
|
||
-- Insert some sample data
|
||
-- INSERT INTO tenants (id, name, description, status, api_key)
|
||
-- VALUES
|
||
-- (1, 'Demo Tenant', 'This is a demo tenant for testing', 'active', 'sk-00000001abcdefg123456')
|
||
-- ON CONFLICT DO NOTHING;
|
||
|
||
-- SELECT setval('tenants_id_seq', (SELECT MAX(id) FROM tenants));
|
||
|
||
|
||
-- -- Create knowledge base
|
||
-- INSERT INTO knowledge_bases (id, name, description, tenant_id, chunking_config, image_processing_config, embedding_model_id)
|
||
-- VALUES
|
||
-- ('kb-00000001', 'Default Knowledge Base', 'Default knowledge base for testing', 1, '{"chunk_size": 512, "chunk_overlap": 50, "separators": ["\n\n", "\n", "。"], "keep_separator": true}', '{"enable_multimodal": false, "model_id": ""}', 'model-embedding-00000001'),
|
||
-- ('kb-00000002', 'Test Knowledge Base', 'Test knowledge base for development', 1, '{"chunk_size": 512, "chunk_overlap": 50, "separators": ["\n\n", "\n", "。"], "keep_separator": true}', '{"enable_multimodal": false, "model_id": ""}', 'model-embedding-00000001'),
|
||
-- ('kb-00000003', 'Test Knowledge Base 2', 'Test knowledge base for development 2', 1, '{"chunk_size": 512, "chunk_overlap": 50, "separators": ["\n\n", "\n", "。"], "keep_separator": true}', '{"enable_multimodal": false, "model_id": ""}', 'model-embedding-00000001')
|
||
-- ON CONFLICT DO NOTHING;
|
||
|
||
|
||
SELECT COUNT(*) FROM tenants;
|
||
SELECT COUNT(*) FROM models;
|
||
SELECT COUNT(*) FROM knowledge_bases;
|
||
SELECT COUNT(*) FROM knowledges;
|
||
|
||
|
||
-- 测试中文全文搜索
|
||
|
||
-- 创建文档表
|
||
CREATE TABLE chinese_documents (
|
||
id SERIAL PRIMARY KEY,
|
||
title TEXT,
|
||
content TEXT,
|
||
published_date DATE
|
||
);
|
||
|
||
-- 在表上创建 BM25 索引,使用结巴分词器支持中文
|
||
CREATE INDEX idx_documents_bm25 ON chinese_documents
|
||
USING bm25 (id, content)
|
||
WITH (
|
||
key_field = 'id',
|
||
text_fields = '{
|
||
"content": {
|
||
"tokenizer": {"type": "chinese_lindera"}
|
||
}
|
||
}'
|
||
);
|
||
|
||
INSERT INTO chinese_documents (title, content, published_date)
|
||
VALUES
|
||
('人工智能的发展', '人工智能技术正在快速发展,影响了我们生活的方方面面。大语言模型是最近的一个重要突破。', '2023-01-15'),
|
||
('机器学习基础', '机器学习是人工智能的一个重要分支,包括监督学习、无监督学习和强化学习等方法。', '2023-02-20'),
|
||
('深度学习应用', '深度学习在图像识别、自然语言处理和语音识别等领域有广泛应用。', '2023-03-10'),
|
||
('自然语言处理技术', '自然语言处理允许计算机理解、解释和生成人类语言,是人工智能的核心技术之一。', '2023-04-05'),
|
||
('计算机视觉入门', '计算机视觉让机器能够"看到"并理解视觉世界,广泛应用于安防、医疗等领域。', '2023-05-12');
|
||
|
||
INSERT INTO chinese_documents (title, content, published_date)
|
||
VALUES
|
||
('hello world', 'hello world', '2023-05-12');
|