-- Create extensions CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS vector; CREATE EXTENSION IF NOT EXISTS pg_trgm; CREATE EXTENSION IF NOT EXISTS pg_search; -- Create tenant table CREATE TABLE IF NOT EXISTS tenants ( id SERIAL PRIMARY KEY, name VARCHAR(255) NOT NULL, description TEXT, api_key VARCHAR(64) NOT NULL, retriever_engines JSONB NOT NULL DEFAULT '[]', status VARCHAR(50) DEFAULT 'active', business VARCHAR(255) NOT NULL, storage_quota BIGINT NOT NULL DEFAULT 10737418240, -- 默认10GB配额(Bytes) storage_used BIGINT NOT NULL DEFAULT 0, -- 已使用的存储空间(Bytes) created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, deleted_at TIMESTAMP WITH TIME ZONE ); -- Set the starting value for tenants id sequence ALTER SEQUENCE tenants_id_seq RESTART WITH 10000; -- Add indexes CREATE INDEX IF NOT EXISTS idx_tenants_api_key ON tenants(api_key); CREATE INDEX IF NOT EXISTS idx_tenants_status ON tenants(status); -- Create model table CREATE TABLE IF NOT EXISTS models ( id VARCHAR(64) PRIMARY KEY DEFAULT uuid_generate_v4(), tenant_id INTEGER NOT NULL, name VARCHAR(255) NOT NULL, type VARCHAR(50) NOT NULL, source VARCHAR(50) NOT NULL, description TEXT, parameters JSONB NOT NULL, is_default BOOLEAN NOT NULL DEFAULT false, status VARCHAR(50) NOT NULL DEFAULT 'active', created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, deleted_at TIMESTAMP WITH TIME ZONE ); -- Add indexes for models CREATE INDEX IF NOT EXISTS idx_models_type ON models(type); CREATE INDEX IF NOT EXISTS idx_models_source ON models(source); -- Create knowledge_base table CREATE TABLE IF NOT EXISTS knowledge_bases ( id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(), name VARCHAR(255) NOT NULL, description TEXT, tenant_id INTEGER NOT NULL, chunking_config JSONB NOT NULL DEFAULT '{"chunk_size": 512, "chunk_overlap": 50, "split_markers": ["\n\n", "\n", "。"], "keep_separator": true}', image_processing_config JSONB NOT NULL DEFAULT '{"enable_multimodal": false, "model_id": ""}', embedding_model_id VARCHAR(64) NOT NULL, summary_model_id VARCHAR(64) NOT NULL, rerank_model_id VARCHAR(64) NOT NULL, vlm_model_id VARCHAR(64) NOT NULL, cos_config JSONB NOT NULL DEFAULT '{}', vlm_config JSONB NOT NULL DEFAULT '{}', created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, deleted_at TIMESTAMP WITH TIME ZONE ); -- Add indexes for knowledge_bases CREATE INDEX IF NOT EXISTS idx_knowledge_bases_tenant_id ON knowledge_bases(tenant_id); -- Create knowledge table CREATE TABLE IF NOT EXISTS knowledges ( id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(), tenant_id INTEGER NOT NULL, knowledge_base_id VARCHAR(36) NOT NULL, type VARCHAR(50) NOT NULL, title VARCHAR(255) NOT NULL, description TEXT, source VARCHAR(128) NOT NULL, parse_status VARCHAR(50) NOT NULL DEFAULT 'unprocessed', enable_status VARCHAR(50) NOT NULL DEFAULT 'enabled', embedding_model_id VARCHAR(64), file_name VARCHAR(255), file_type VARCHAR(50), file_size BIGINT, file_path TEXT, file_hash VARCHAR(64), storage_size BIGINT NOT NULL DEFAULT 0, -- 存储大小(Byte) metadata JSONB, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, processed_at TIMESTAMP WITH TIME ZONE, error_message TEXT, deleted_at TIMESTAMP WITH TIME ZONE ); -- Add indexes for knowledge CREATE INDEX IF NOT EXISTS idx_knowledges_tenant_id ON knowledges(tenant_id); CREATE INDEX IF NOT EXISTS idx_knowledges_base_id ON knowledges(knowledge_base_id); CREATE INDEX IF NOT EXISTS idx_knowledges_parse_status ON knowledges(parse_status); CREATE INDEX IF NOT EXISTS idx_knowledges_enable_status ON knowledges(enable_status); -- Create session table CREATE TABLE IF NOT EXISTS sessions ( id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(), tenant_id INTEGER NOT NULL, title VARCHAR(255), description TEXT, knowledge_base_id VARCHAR(36), max_rounds INTEGER NOT NULL DEFAULT 5, enable_rewrite BOOLEAN NOT NULL DEFAULT true, fallback_strategy VARCHAR(255) NOT NULL DEFAULT 'fixed', fallback_response TEXT NOT NULL DEFAULT '很抱歉,我暂时无法回答这个问题。', keyword_threshold FLOAT NOT NULL DEFAULT 0.5, vector_threshold FLOAT NOT NULL DEFAULT 0.5, rerank_model_id VARCHAR(64), embedding_top_k INTEGER NOT NULL DEFAULT 10, rerank_top_k INTEGER NOT NULL DEFAULT 10, rerank_threshold FLOAT NOT NULL DEFAULT 0.65, summary_model_id VARCHAR(64), summary_parameters JSONB NOT NULL DEFAULT '{}', created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, deleted_at TIMESTAMP WITH TIME ZONE ); -- Create Index for sessions CREATE INDEX IF NOT EXISTS idx_sessions_tenant_id ON sessions(tenant_id); -- Create message table CREATE TABLE IF NOT EXISTS messages ( id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(), request_id VARCHAR(36) NOT NULL, session_id VARCHAR(36) NOT NULL, role VARCHAR(50) NOT NULL, content TEXT NOT NULL, knowledge_references JSONB NOT NULL DEFAULT '[]', is_completed BOOLEAN NOT NULL DEFAULT false, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, deleted_at TIMESTAMP WITH TIME ZONE ); -- Create Index for messages CREATE INDEX IF NOT EXISTS idx_messages_session_id ON messages(session_id); CREATE TABLE IF NOT EXISTS chunks ( id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(), tenant_id INTEGER NOT NULL, knowledge_base_id VARCHAR(36) NOT NULL, knowledge_id VARCHAR(36) NOT NULL, content TEXT NOT NULL, chunk_index INTEGER NOT NULL, is_enabled BOOLEAN NOT NULL DEFAULT true, start_at INTEGER NOT NULL, end_at INTEGER NOT NULL, pre_chunk_id VARCHAR(36), next_chunk_id VARCHAR(36), chunk_type VARCHAR(20) NOT NULL DEFAULT 'text', parent_chunk_id VARCHAR(36), image_info TEXT, relation_chunks JSONB, indirect_relation_chunks JSONB, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, deleted_at TIMESTAMP WITH TIME ZONE ); CREATE INDEX IF NOT EXISTS idx_chunks_tenant_kg ON chunks(tenant_id, knowledge_id); CREATE INDEX IF NOT EXISTS idx_chunks_parent_id ON chunks(parent_chunk_id); CREATE INDEX IF NOT EXISTS idx_chunks_chunk_type ON chunks(chunk_type); CREATE TABLE IF NOT EXISTS embeddings ( id SERIAL PRIMARY KEY, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, source_id VARCHAR(64) NOT NULL, source_type INTEGER NOT NULL, chunk_id VARCHAR(64), knowledge_id VARCHAR(64), knowledge_base_id VARCHAR(64), content TEXT, dimension INTEGER NOT NULL, embedding halfvec ); CREATE UNIQUE INDEX IF NOT EXISTS embeddings_unique_source ON embeddings(source_id, source_type); CREATE INDEX IF NOT EXISTS embeddings_search_idx ON embeddings USING bm25 (id, knowledge_base_id, content, knowledge_id, chunk_id) WITH ( key_field = 'id', text_fields = '{ "content": { "tokenizer": {"type": "chinese_lindera"} } }' ); CREATE INDEX ON embeddings USING hnsw ((embedding::halfvec(3584)) halfvec_cosine_ops) WITH (m = 16, ef_construction = 64) WHERE (dimension = 3584); CREATE INDEX ON embeddings USING hnsw ((embedding::halfvec(798)) halfvec_cosine_ops) WITH (m = 16, ef_construction = 64) WHERE (dimension = 798);