204 lines
7.8 KiB
SQL
204 lines
7.8 KiB
SQL
-- Create extensions
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
CREATE EXTENSION IF NOT EXISTS vector;
|
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
CREATE EXTENSION IF NOT EXISTS pg_search;
|
|
|
|
|
|
-- Create tenant table
|
|
CREATE TABLE IF NOT EXISTS tenants (
|
|
id SERIAL PRIMARY KEY,
|
|
name VARCHAR(255) NOT NULL,
|
|
description TEXT,
|
|
api_key VARCHAR(64) NOT NULL,
|
|
retriever_engines JSONB NOT NULL DEFAULT '[]',
|
|
status VARCHAR(50) DEFAULT 'active',
|
|
business VARCHAR(255) NOT NULL,
|
|
storage_quota BIGINT NOT NULL DEFAULT 10737418240, -- 默认10GB配额(Bytes)
|
|
storage_used BIGINT NOT NULL DEFAULT 0, -- 已使用的存储空间(Bytes)
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
deleted_at TIMESTAMP WITH TIME ZONE
|
|
);
|
|
|
|
-- Set the starting value for tenants id sequence
|
|
ALTER SEQUENCE tenants_id_seq RESTART WITH 10000;
|
|
|
|
-- Add indexes
|
|
CREATE INDEX IF NOT EXISTS idx_tenants_api_key ON tenants(api_key);
|
|
CREATE INDEX IF NOT EXISTS idx_tenants_status ON tenants(status);
|
|
|
|
-- Create model table
|
|
CREATE TABLE IF NOT EXISTS models (
|
|
id VARCHAR(64) PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
tenant_id INTEGER NOT NULL,
|
|
name VARCHAR(255) NOT NULL,
|
|
type VARCHAR(50) NOT NULL,
|
|
source VARCHAR(50) NOT NULL,
|
|
description TEXT,
|
|
parameters JSONB NOT NULL,
|
|
is_default BOOLEAN NOT NULL DEFAULT false,
|
|
status VARCHAR(50) NOT NULL DEFAULT 'active',
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
deleted_at TIMESTAMP WITH TIME ZONE
|
|
);
|
|
|
|
-- Add indexes for models
|
|
CREATE INDEX IF NOT EXISTS idx_models_type ON models(type);
|
|
CREATE INDEX IF NOT EXISTS idx_models_source ON models(source);
|
|
|
|
-- Create knowledge_base table
|
|
CREATE TABLE IF NOT EXISTS knowledge_bases (
|
|
id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
name VARCHAR(255) NOT NULL,
|
|
description TEXT,
|
|
tenant_id INTEGER NOT NULL,
|
|
chunking_config JSONB NOT NULL DEFAULT '{"chunk_size": 512, "chunk_overlap": 50, "split_markers": ["\n\n", "\n", "。"], "keep_separator": true}',
|
|
image_processing_config JSONB NOT NULL DEFAULT '{"enable_multimodal": false, "model_id": ""}',
|
|
embedding_model_id VARCHAR(64) NOT NULL,
|
|
summary_model_id VARCHAR(64) NOT NULL,
|
|
rerank_model_id VARCHAR(64) NOT NULL,
|
|
vlm_model_id VARCHAR(64) NOT NULL,
|
|
cos_config JSONB NOT NULL DEFAULT '{}',
|
|
vlm_config JSONB NOT NULL DEFAULT '{}',
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
deleted_at TIMESTAMP WITH TIME ZONE
|
|
);
|
|
|
|
-- Add indexes for knowledge_bases
|
|
CREATE INDEX IF NOT EXISTS idx_knowledge_bases_tenant_id ON knowledge_bases(tenant_id);
|
|
|
|
-- Create knowledge table
|
|
CREATE TABLE IF NOT EXISTS knowledges (
|
|
id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
tenant_id INTEGER NOT NULL,
|
|
knowledge_base_id VARCHAR(36) NOT NULL,
|
|
type VARCHAR(50) NOT NULL,
|
|
title VARCHAR(255) NOT NULL,
|
|
description TEXT,
|
|
source VARCHAR(128) NOT NULL,
|
|
parse_status VARCHAR(50) NOT NULL DEFAULT 'unprocessed',
|
|
enable_status VARCHAR(50) NOT NULL DEFAULT 'enabled',
|
|
embedding_model_id VARCHAR(64),
|
|
file_name VARCHAR(255),
|
|
file_type VARCHAR(50),
|
|
file_size BIGINT,
|
|
file_path TEXT,
|
|
file_hash VARCHAR(64),
|
|
storage_size BIGINT NOT NULL DEFAULT 0, -- 存储大小(Byte)
|
|
metadata JSONB,
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
processed_at TIMESTAMP WITH TIME ZONE,
|
|
error_message TEXT,
|
|
deleted_at TIMESTAMP WITH TIME ZONE
|
|
);
|
|
|
|
-- Add indexes for knowledge
|
|
CREATE INDEX IF NOT EXISTS idx_knowledges_tenant_id ON knowledges(tenant_id);
|
|
CREATE INDEX IF NOT EXISTS idx_knowledges_base_id ON knowledges(knowledge_base_id);
|
|
CREATE INDEX IF NOT EXISTS idx_knowledges_parse_status ON knowledges(parse_status);
|
|
CREATE INDEX IF NOT EXISTS idx_knowledges_enable_status ON knowledges(enable_status);
|
|
|
|
-- Create session table
|
|
CREATE TABLE IF NOT EXISTS sessions (
|
|
id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
tenant_id INTEGER NOT NULL,
|
|
title VARCHAR(255),
|
|
description TEXT,
|
|
knowledge_base_id VARCHAR(36),
|
|
max_rounds INTEGER NOT NULL DEFAULT 5,
|
|
enable_rewrite BOOLEAN NOT NULL DEFAULT true,
|
|
fallback_strategy VARCHAR(255) NOT NULL DEFAULT 'fixed',
|
|
fallback_response TEXT NOT NULL DEFAULT '很抱歉,我暂时无法回答这个问题。',
|
|
keyword_threshold FLOAT NOT NULL DEFAULT 0.5,
|
|
vector_threshold FLOAT NOT NULL DEFAULT 0.5,
|
|
rerank_model_id VARCHAR(64),
|
|
embedding_top_k INTEGER NOT NULL DEFAULT 10,
|
|
rerank_top_k INTEGER NOT NULL DEFAULT 10,
|
|
rerank_threshold FLOAT NOT NULL DEFAULT 0.65,
|
|
summary_model_id VARCHAR(64),
|
|
summary_parameters JSONB NOT NULL DEFAULT '{}',
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
deleted_at TIMESTAMP WITH TIME ZONE
|
|
);
|
|
|
|
-- Create Index for sessions
|
|
CREATE INDEX IF NOT EXISTS idx_sessions_tenant_id ON sessions(tenant_id);
|
|
|
|
|
|
-- Create message table
|
|
CREATE TABLE IF NOT EXISTS messages (
|
|
id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
request_id VARCHAR(36) NOT NULL,
|
|
session_id VARCHAR(36) NOT NULL,
|
|
role VARCHAR(50) NOT NULL,
|
|
content TEXT NOT NULL,
|
|
knowledge_references JSONB NOT NULL DEFAULT '[]',
|
|
is_completed BOOLEAN NOT NULL DEFAULT false,
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
deleted_at TIMESTAMP WITH TIME ZONE
|
|
);
|
|
|
|
-- Create Index for messages
|
|
CREATE INDEX IF NOT EXISTS idx_messages_session_id ON messages(session_id);
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS chunks (
|
|
id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
tenant_id INTEGER NOT NULL,
|
|
knowledge_base_id VARCHAR(36) NOT NULL,
|
|
knowledge_id VARCHAR(36) NOT NULL,
|
|
content TEXT NOT NULL,
|
|
chunk_index INTEGER NOT NULL,
|
|
is_enabled BOOLEAN NOT NULL DEFAULT true,
|
|
start_at INTEGER NOT NULL,
|
|
end_at INTEGER NOT NULL,
|
|
pre_chunk_id VARCHAR(36),
|
|
next_chunk_id VARCHAR(36),
|
|
chunk_type VARCHAR(20) NOT NULL DEFAULT 'text',
|
|
parent_chunk_id VARCHAR(36),
|
|
image_info TEXT,
|
|
relation_chunks JSONB,
|
|
indirect_relation_chunks JSONB,
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
deleted_at TIMESTAMP WITH TIME ZONE
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_chunks_tenant_kg ON chunks(tenant_id, knowledge_id);
|
|
CREATE INDEX IF NOT EXISTS idx_chunks_parent_id ON chunks(parent_chunk_id);
|
|
CREATE INDEX IF NOT EXISTS idx_chunks_chunk_type ON chunks(chunk_type);
|
|
|
|
CREATE TABLE IF NOT EXISTS embeddings (
|
|
id SERIAL PRIMARY KEY,
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
source_id VARCHAR(64) NOT NULL,
|
|
source_type INTEGER NOT NULL,
|
|
chunk_id VARCHAR(64),
|
|
knowledge_id VARCHAR(64),
|
|
knowledge_base_id VARCHAR(64),
|
|
content TEXT,
|
|
dimension INTEGER NOT NULL,
|
|
embedding halfvec
|
|
);
|
|
|
|
CREATE UNIQUE INDEX IF NOT EXISTS embeddings_unique_source ON embeddings(source_id, source_type);
|
|
CREATE INDEX IF NOT EXISTS embeddings_search_idx ON embeddings
|
|
USING bm25 (id, knowledge_base_id, content, knowledge_id, chunk_id)
|
|
WITH (
|
|
key_field = 'id',
|
|
text_fields = '{
|
|
"content": {
|
|
"tokenizer": {"type": "chinese_lindera"}
|
|
}
|
|
}'
|
|
);
|
|
CREATE INDEX ON embeddings USING hnsw ((embedding::halfvec(3584)) halfvec_cosine_ops) WITH (m = 16, ef_construction = 64) WHERE (dimension = 3584);
|
|
CREATE INDEX ON embeddings USING hnsw ((embedding::halfvec(798)) halfvec_cosine_ops) WITH (m = 16, ef_construction = 64) WHERE (dimension = 798); |