微信扫码
添加专属顾问
我要投稿
from langchain.document_loaders import DirectoryLoader# Load HTML files already saved in a local directorypath = "../../RAG/rtdocs_new/"global_pattern = '*.html'loader = DirectoryLoader(path=path, glob=global_pattern)docs = loader.load()# Print num documents and a preview.print(f"loaded {len(docs)} documents")print(docs[0].page_content)pprint.pprint(docs[0].metadata)
import torchfrom sentence_transformers import SentenceTransformer# Initialize torch settings for device-agnostic code.N_GPU = torch.cuda.device_count()DEVICE = torch.device('cuda:N_GPU' if torch.cuda.is_available() else 'cpu')# Download the model from huggingface model hub.model_name = "BAAI/bge-large-en-v1.5"encoder = SentenceTransformer(model_name, device=DEVICE)# Get the model parameters and save for later.EMBEDDING_DIM = encoder.get_sentence_embedding_dimension()MAX_SEQ_LENGTH_IN_TOKENS = encoder.get_max_seq_length()# Inspect model parameters.print(f"model_name: {model_name}")print(f"EMBEDDING_DIM: {EMBEDDING_DIM}")print(f"MAX_SEQ_LENGTH: {MAX_SEQ_LENGTH}")
from langchain.text_splitter import RecursiveCharacterTextSplitterCHUNK_SIZE = 512chunk_overlap = np.round(CHUNK_SIZE * 0.10, 0)print(f"chunk_size: {CHUNK_SIZE}, chunk_overlap: {chunk_overlap}")# Define the splitter.child_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE,chunk_overlap=chunk_overlap)# Chunk the docs.chunks = child_splitter.split_documents(docs)print(f"{len(docs)} docs split into {len(chunks)} child documents.")# Encoder input is doc.page_content as strings.list_of_strings = [doc.page_content for doc in chunks if hasattr(doc, 'page_content')]# Embedding inference using HuggingFace encoder.embeddings = torch.tensor(encoder.encode(list_of_strings))# Normalize the embeddings.embeddings = np.array(embeddings / np.linalg.norm(embeddings))# Milvus expects a list of `numpy.ndarray` of `numpy.float32` numbers.converted_values = list(map(np.float32, embeddings))# Create dict_list for Milvus insertion.dict_list = []for chunk, vector in zip(chunks, converted_values):# Assemble embedding vector, original text chunk, metadata.chunk_dict = {'chunk': chunk.page_content,'source': chunk.metadata.get('source', ""),'vector': vector,}dict_list.append(chunk_dict)
# Connect a client to the Milvus Lite server.from pymilvus import MilvusClientmc = MilvusClient("milvus_demo.db")# Create a collection with flexible schema and AUTOINDEX.COLLECTION_NAME = "MilvusDocs"mc.create_collection(COLLECTION_NAME,EMBEDDING_DIM,consistency_level="Eventually",auto_id=True,overwrite=True)# Insert data into the Milvus collection.print("Start inserting entities")start_time = time.time()mc.insert(COLLECTION_NAME,data=dict_list,progress_bar=True)end_time = time.time()print(f"Milvus insert time for {len(dict_list)} vectors: ", end="")print(f"{round(end_time - start_time, 2)} seconds")
SAMPLE_QUESTION = "What do the parameters for HNSW mean?"# Embed the question using the same encoder.query_embeddings = torch.tensor(encoder.encode(SAMPLE_QUESTION))# Normalize embeddings to unit length.query_embeddings = F.normalize(query_embeddings, p=2, dim=1)# Convert the embeddings to list of list of np.float32.query_embeddings = list(map(np.float32, query_embeddings))# Define metadata fields you can filter on.OUTPUT_FIELDS = list(dict_list[0].keys())OUTPUT_FIELDS.remove('vector')# Define how many top-k results you want to retrieve.TOP_K = 2# Run semantic vector search using your query and the vector database.results = mc.search(COLLECTION_NAME,data=query_embeddings,output_fields=OUTPUT_FIELDS,limit=TOP_K,consistency_level="Eventually")
# (Recommended) Create a new conda environment.conda create -n myenv python=3.11 -yconda activate myenv# Install vLLM with CUDA 12.1.pip install -U vllm transformers torch
import vllm, torchfrom vllm import LLM, SamplingParams# Clear the GPU memory cache.torch.cuda.empty_cache()# Check the GPU.!nvidia-smi
# Login to HuggingFace using your new token.from huggingface_hub import loginfrom google.colab import userdatahf_token = userdata.get('HF_TOKEN')login(token = hf_token, add_to_git_credential=True)
# 1. Choose a modelMODELTORUN = "meta-llama/Meta-Llama-3.1-8B-Instruct"# 2. Clear the GPU memory cache, you're going to need it all!torch.cuda.empty_cache()# 3. Instantiate a vLLM model instance.llm = LLM(model=MODELTORUN,enforce_eager=True,dtype=torch.bfloat16,gpu_memory_utilization=0.5,max_model_len=1000,seed=415,max_num_batched_tokens=3000)
# Separate all the context together by space.contexts_combined = ' '.join(contexts)# Lance Martin, LangChain, says put the best contexts at the end.contexts_combined = ' '.join(reversed(contexts))# Separate all the unique sources together by comma.source_combined = ' '.join(reversed(list(dict.fromkeys(sources))))SYSTEM_PROMPT = f"""First, check if the provided Context is relevant tothe user's question.Second, only if the provided Context is strongly relevant, answer the question using the Context.Otherwise, if the Context is not strongly relevant, answer the question without using the Context.Be clear, concise, relevant.Answer clearly, in fewer than 2 sentences.Grounding sources: {source_combined}Context: {contexts_combined}question: {SAMPLE_QUESTION}"""prompts = [SYSTEM_PROMPT]
# Sampling parameterssampling_params = SamplingParams(temperature=0.2, top_p=0.95)# Invoke the vLLM model.outputs = llm.generate(prompts, sampling_params)# Print the outputs.for output in outputs:prompt = output.promptgenerated_text = output.outputs[0].text# !r calls repr(), which prints a string inside quotes.print()print(f"Question: {SAMPLE_QUESTION!r}")pprint.pprint(f"Generated text: {generated_text!r}")
53AI,企业落地大模型首选服务商
产品:场景落地咨询+大模型应用平台+行业解决方案
承诺:免费POC验证,效果达标后再合作。零风险落地应用大模型,已交付160+中大型企业
2026-05-18
别再错过啦,AI Agent记忆革命:95.2%检索率的持久记忆系统深度解析
2026-05-18
有多少人把Agent与RAG的检索策略,简化成了 if-else?
2026-05-18
RAG 全链路技术详解
2026-05-18
捅破个人AI天花板!YC总裁开源GBrain:8层架构打造AI第二大脑,解决记忆难题,狂揽 1.6w Star!网友:第六层才是护城河!
2026-05-16
RAG 在企业的落地,从来不是一个“大模型问题”
2026-05-14
2026年知识库幻觉根治指南:从 Naive RAG 到 Agentic RAG
2026-05-11
到底是谁会相信RAG已死啊?
2026-05-11
RAG又进化了!微软整了个企业级AgenticRAG
2026-03-23
2026-04-06
2026-02-22
2026-03-18
2026-03-20
2026-02-27
2026-02-21
2026-03-21
2026-03-31
2026-04-27
2026-05-18
2026-05-11
2026-05-07
2026-05-06
2026-04-27
2026-04-21
2026-03-17
2026-03-11