70 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			SQL
		
	
	
	
			
		
		
	
	
			70 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			SQL
		
	
	
	
-- 迁移脚本:从PostgreSQL迁移到ParadeDB
 | 
						||
-- 注意:在执行此脚本前,请确保已经备份了数据
 | 
						||
 | 
						||
-- 1. 导出数据(在PostgreSQL中执行)
 | 
						||
-- pg_dump -U postgres -h localhost -p 5432 -d your_database > backup.sql
 | 
						||
 | 
						||
-- 2. 导入数据(在ParadeDB中执行)
 | 
						||
-- psql -U postgres -h localhost -p 5432 -d your_database < backup.sql
 | 
						||
 | 
						||
-- 3. 验证数据
 | 
						||
 | 
						||
 | 
						||
-- Insert some sample data
 | 
						||
-- INSERT INTO tenants (id, name, description, status, api_key)
 | 
						||
-- VALUES 
 | 
						||
--     (1, 'Demo Tenant', 'This is a demo tenant for testing', 'active', 'sk-00000001abcdefg123456')
 | 
						||
-- ON CONFLICT DO NOTHING;
 | 
						||
 | 
						||
-- SELECT setval('tenants_id_seq', (SELECT MAX(id) FROM tenants));
 | 
						||
 | 
						||
 | 
						||
-- -- Create knowledge base
 | 
						||
-- INSERT INTO knowledge_bases (id, name, description, tenant_id, chunking_config, image_processing_config, embedding_model_id)
 | 
						||
-- VALUES 
 | 
						||
--     ('kb-00000001', 'Default Knowledge Base', 'Default knowledge base for testing', 1, '{"chunk_size": 512, "chunk_overlap": 50, "separators": ["\n\n", "\n", "。"], "keep_separator": true}', '{"enable_multimodal": false, "model_id": ""}', 'model-embedding-00000001'),
 | 
						||
--     ('kb-00000002', 'Test Knowledge Base', 'Test knowledge base for development', 1, '{"chunk_size": 512, "chunk_overlap": 50, "separators": ["\n\n", "\n", "。"], "keep_separator": true}', '{"enable_multimodal": false, "model_id": ""}', 'model-embedding-00000001'),
 | 
						||
--     ('kb-00000003', 'Test Knowledge Base 2', 'Test knowledge base for development 2', 1, '{"chunk_size": 512, "chunk_overlap": 50, "separators": ["\n\n", "\n", "。"], "keep_separator": true}', '{"enable_multimodal": false, "model_id": ""}', 'model-embedding-00000001')
 | 
						||
-- ON CONFLICT DO NOTHING;
 | 
						||
 | 
						||
 | 
						||
SELECT COUNT(*) FROM tenants;
 | 
						||
SELECT COUNT(*) FROM models;
 | 
						||
SELECT COUNT(*) FROM knowledge_bases;
 | 
						||
SELECT COUNT(*) FROM knowledges;
 | 
						||
 | 
						||
 | 
						||
-- 测试中文全文搜索
 | 
						||
 | 
						||
-- 创建文档表
 | 
						||
CREATE TABLE chinese_documents (
 | 
						||
    id SERIAL PRIMARY KEY,
 | 
						||
    title TEXT,
 | 
						||
    content TEXT,
 | 
						||
    published_date DATE
 | 
						||
);
 | 
						||
 | 
						||
-- 在表上创建 BM25 索引,使用结巴分词器支持中文
 | 
						||
CREATE INDEX idx_documents_bm25 ON chinese_documents
 | 
						||
USING bm25 (id, content)
 | 
						||
WITH (
 | 
						||
    key_field = 'id',
 | 
						||
    text_fields = '{
 | 
						||
        "content": {
 | 
						||
          "tokenizer": {"type": "chinese_lindera"}
 | 
						||
        }
 | 
						||
    }'
 | 
						||
);
 | 
						||
 | 
						||
INSERT INTO chinese_documents (title, content, published_date)
 | 
						||
VALUES 
 | 
						||
('人工智能的发展', '人工智能技术正在快速发展,影响了我们生活的方方面面。大语言模型是最近的一个重要突破。', '2023-01-15'),
 | 
						||
('机器学习基础', '机器学习是人工智能的一个重要分支,包括监督学习、无监督学习和强化学习等方法。', '2023-02-20'),
 | 
						||
('深度学习应用', '深度学习在图像识别、自然语言处理和语音识别等领域有广泛应用。', '2023-03-10'),
 | 
						||
('自然语言处理技术', '自然语言处理允许计算机理解、解释和生成人类语言,是人工智能的核心技术之一。', '2023-04-05'),
 | 
						||
('计算机视觉入门', '计算机视觉让机器能够"看到"并理解视觉世界,广泛应用于安防、医疗等领域。', '2023-05-12');
 | 
						||
 | 
						||
INSERT INTO chinese_documents (title, content, published_date)
 | 
						||
VALUES 
 | 
						||
('hello world', 'hello world', '2023-05-12');
 |