|
- import sys
- sys.path.append(".")
- from server.knowledge_base.migrate import (create_tables, reset_tables, import_from_db,
- folder2db, prune_db_docs, prune_folder_files)
- from configs.model_config import NLTK_DATA_PATH, EMBEDDING_MODEL
- import nltk
- nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
- from datetime import datetime
-
-
- if __name__ == "__main__":
- import argparse
-
- parser = argparse.ArgumentParser(description="please specify only one operate method once time.")
-
- parser.add_argument(
- "-r",
- "--recreate-vs",
- action="store_true",
- help=('''
- recreate vector store.
- use this option if you have copied document files to the content folder, but vector store has not been populated or DEFAUL_VS_TYPE/EMBEDDING_MODEL changed.
- '''
- )
- )
- parser.add_argument(
- "--create-tables",
- action="store_true",
- help=("create empty tables if not existed")
- )
- parser.add_argument(
- "--clear-tables",
- action="store_true",
- help=("create empty tables, or drop the database tables before recreate vector stores")
- )
- parser.add_argument(
- "--import-db",
- help="import tables from specified sqlite database"
- )
- parser.add_argument(
- "-u",
- "--update-in-db",
- action="store_true",
- help=('''
- update vector store for files exist in database.
- use this option if you want to recreate vectors for files exist in db and skip files exist in local folder only.
- '''
- )
- )
- parser.add_argument(
- "-i",
- "--increment",
- action="store_true",
- help=('''
- update vector store for files exist in local folder and not exist in database.
- use this option if you want to create vectors incrementally.
- '''
- )
- )
- parser.add_argument(
- "--prune-db",
- action="store_true",
- help=('''
- delete docs in database that not existed in local folder.
- it is used to delete database docs after user deleted some doc files in file browser
- '''
- )
- )
- parser.add_argument(
- "--prune-folder",
- action="store_true",
- help=('''
- delete doc files in local folder that not existed in database.
- is is used to free local disk space by delete unused doc files.
- '''
- )
- )
- parser.add_argument(
- "-n",
- "--kb-name",
- type=str,
- nargs="+",
- default=[],
- help=("specify knowledge base names to operate on. default is all folders exist in KB_ROOT_PATH.")
- )
- parser.add_argument(
- "-e",
- "--embed-model",
- type=str,
- default=EMBEDDING_MODEL,
- help=("specify embeddings model.")
- )
-
- args = parser.parse_args()
- start_time = datetime.now()
-
- if args.create_tables:
- create_tables() # confirm tables exist
-
- if args.clear_tables:
- reset_tables()
- print("database tables reset")
-
- if args.recreate_vs:
- create_tables()
- print("recreating all vector stores")
- folder2db(kb_names=args.kb_name, mode="recreate_vs", embed_model=args.embed_model)
- elif args.import_db:
- import_from_db(args.import_db)
- elif args.update_in_db:
- folder2db(kb_names=args.kb_name, mode="update_in_db", embed_model=args.embed_model)
- elif args.increment:
- folder2db(kb_names=args.kb_name, mode="increment", embed_model=args.embed_model)
- elif args.prune_db:
- prune_db_docs(args.kb_name)
- elif args.prune_folder:
- prune_folder_files(args.kb_name)
-
- end_time = datetime.now()
- print(f"总计用时: {end_time-start_time}")
|