{"id":438,"date":"2026-03-06T13:21:24","date_gmt":"2026-03-06T05:21:24","guid":{"rendered":"https:\/\/www.liaoxinghui.com\/?p=438"},"modified":"2026-03-06T13:21:24","modified_gmt":"2026-03-06T05:21:24","slug":"build-rag-system-langchain-pinecone-document-qa","status":"publish","type":"post","link":"https:\/\/www.liaoxinghui.com\/?p=438","title":{"rendered":"\u6784\u5efaRAG\u7cfb\u7edf\uff1a\u4f7f\u7528LangChain\u548cPinecone\u5b9e\u73b0\u6587\u6863\u667a\u80fd\u95ee\u7b54"},"content":{"rendered":"<h2>1. \u4e1a\u52a1\u573a\u666f\u4e0e\u76ee\u6807<\/h2>\n<p>\u516c\u53f8\u5185\u90e8\u6709\u5927\u91cf\u6280\u672f\u6587\u6863\uff0c\u5982\u5f00\u53d1\u6307\u5357\u3001API\u6587\u6863\u548c\u6545\u969c\u6392\u9664\u624b\u518c\u3002\u5458\u5de5\u5728\u67e5\u627e\u4fe1\u606f\u65f6\u6548\u7387\u4f4e\u4e0b\uff0c\u7ecf\u5e38\u9700\u8981\u7ffb\u9605\u591a\u4e2a\u6587\u4ef6\u3002\u76ee\u6807\u662f\u6784\u5efa\u4e00\u4e2aRAG\u7cfb\u7edf\uff0c\u901a\u8fc7\u68c0\u7d22\u589e\u5f3a\u751f\u6210\u6280\u672f\uff0c\u5c06\u6587\u6863\u7d22\u5f15\u5230\u5411\u91cf\u6570\u636e\u5e93Pinecone\uff0c\u7ed3\u5408LangChain\u548c\u751f\u6210\u6a21\u578b\uff0c\u5b9e\u73b0\u5feb\u901f\u3001\u51c6\u786e\u7684\u667a\u80fd\u95ee\u7b54\u3002\u8fd9\u80fd\u63d0\u5347\u5458\u5de5\u67e5\u8be2\u6548\u7387\uff0c\u51cf\u5c11\u9519\u8bef\uff0c\u9002\u5408\u4f01\u4e1a\u5185\u90e8\u77e5\u8bc6\u7ba1\u7406\u573a\u666f\u3002<\/p>\n<h2>2. \u73af\u5883\u51c6\u5907<\/h2>\n<p>\u4f7f\u7528uv\u4f5c\u4e3aPython\u5305\u7ba1\u7406\u5668\u5feb\u901f\u5b89\u88c5\u4f9d\u8d56\u3002\u9996\u5148\uff0c\u786e\u4fdd\u5b89\u88c5\u4e86uv\uff08\u53ef\u901a\u8fc7<code>pip install uv<\/code>\u5b89\u88c5\uff09\uff0c\u7136\u540e\u521b\u5efa\u9879\u76ee\u76ee\u5f55\u5e76\u521d\u59cb\u5316\u4f9d\u8d56\u3002\u6838\u5fc3\u4f9d\u8d56\u5305\u62ecLangChain\u3001Pinecone\u5ba2\u6237\u7aef\u3001OpenAI\u5e93\u7b49\u3002<\/p>\n<pre><code class=\"lang-bash language-bash bash\"># \u521d\u59cb\u5316\u9879\u76ee\u5e76\u5b89\u88c5\u4f9d\u8d56\nuv init rag_project &amp;&amp; cd rag_project\nuv add langchain pinecone-client openai python-dotenv tiktoken<\/code><\/pre>\n<p>\u4f9d\u8d56\u8bf4\u660e\uff1a<\/p>\n<ul>\n<li><code>langchain<\/code>\uff1a\u6784\u5efaRAG\u94fe\u7684\u6838\u5fc3\u6846\u67b6\u3002<\/li>\n<li><code>pinecone-client<\/code>\uff1a\u8fde\u63a5Pinecone\u5411\u91cf\u6570\u636e\u5e93\u3002<\/li>\n<li><code>openai<\/code>\uff1a\u7528\u4e8e\u6587\u672c\u5d4c\u5165\u548c\u751f\u6210\u3002<\/li>\n<li><code>python-dotenv<\/code>\uff1a\u7ba1\u7406\u73af\u5883\u53d8\u91cf\u5982API\u5bc6\u94a5\u3002<\/li>\n<li><code>tiktoken<\/code>\uff1a\u7528\u4e8e\u6587\u672c\u5206\u5757\u548cToken\u8ba1\u6570\u3002<\/li>\n<\/ul>\n<h2>3. \u6570\u636e\u8bf4\u660e<\/h2>\n<p>\u5047\u8bbe\u6211\u4eec\u6709\u4f01\u4e1a\u5185\u90e8\u6280\u672f\u6587\u6863\uff0c\u4ee5Markdown\u683c\u5f0f\u5b58\u50a8\u3002\u5982\u679c\u6ca1\u6709\u771f\u5b9e\u6570\u636e\uff0c\u53ef\u4ee5\u6a21\u62df\u751f\u6210\u793a\u4f8b\u6587\u6863\u6765\u6f14\u793a\u903b\u8f91\u3002\u4ee5\u4e0b\u4ee3\u7801\u6a21\u62df\u751f\u621010\u4e2a\u7b80\u5355\u7684\u6280\u672f\u6587\u6863\u7247\u6bb5\u3002<\/p>\n<pre><code class=\"lang-python language-python python\"># \u6a21\u62df\u6570\u636e\u751f\u6210\u903b\u8f91\nimport os\nfrom langchain.schema import Document\n\n# \u521b\u5efa\u6a21\u62df\u6587\u6863\u76ee\u5f55\nos.makedirs(&#039;data&#039;, exist_ok=True)\n# \u751f\u6210\u793a\u4f8b\u6587\u6863\u5185\u5bb9\ntech_docs = [\n    &quot;\u5982\u4f55\u914d\u7f6eDocker\u5bb9\u5668\uff1a\u4f7f\u7528docker run\u547d\u4ee4\uff0c\u8bbe\u7f6e\u73af\u5883\u53d8\u91cfENV=prod\u3002&quot;,\n    &quot;API\u8ba4\u8bc1\u6307\u5357\uff1a\u8c03\u7528\u524d\u9700\u83b7\u53d6token\uff0c\u6709\u6548\u671f\u4e3a24\u5c0f\u65f6\u3002&quot;,\n    &quot;\u6545\u969c\u6392\u9664\uff1a\u670d\u52a1\u5668500\u9519\u8bef\uff0c\u68c0\u67e5\u65e5\u5fd7\u6587\u4ef6\/var\/log\/app.log\u3002&quot;,\n    &quot;\u6570\u636e\u5e93\u5907\u4efd\u6b65\u9aa4\uff1a\u4f7f\u7528pg_dump\u5de5\u5177\uff0c\u6bcf\u5929\u51cc\u6668\u6267\u884c\u3002&quot;,\n    &quot;\u524d\u7aef\u90e8\u7f72\uff1a\u8fd0\u884cnpm build\u540e\uff0c\u4e0a\u4f20\u5230CDN\u3002&quot;,\n    &quot;\u5b89\u5168\u6700\u4f73\u5b9e\u8df5\uff1a\u5b9a\u671f\u66f4\u65b0\u5bc6\u7801\uff0c\u542f\u7528\u53cc\u56e0\u7d20\u8ba4\u8bc1\u3002&quot;,\n    &quot;\u76d1\u63a7\u6307\u6807\uff1aCPU\u4f7f\u7528\u7387\u8d85\u8fc780%\u65f6\u53d1\u9001\u8b66\u62a5\u3002&quot;,\n    &quot;CI\/CD\u6d41\u6c34\u7ebf\uff1a\u4f7f\u7528GitHub Actions\uff0c\u96c6\u6210\u6d4b\u8bd5\u81ea\u52a8\u5316\u3002&quot;,\n    &quot;\u4ee3\u7801\u5ba1\u67e5\u6d41\u7a0b\uff1a\u63d0\u4ea4PR\u540e\uff0c\u81f3\u5c11\u4e24\u540d\u540c\u4e8b\u5ba1\u6838\u3002&quot;,\n    &quot;\u6570\u636e\u8fc1\u79fb\u811a\u672c\uff1a\u4f7f\u7528Python pandas\uff0c\u5904\u7406CSV\u6587\u4ef6\u5bfc\u5165\u3002&quot;\n]\n# \u8f6c\u6362\u4e3aDocument\u5bf9\u8c61\ndocuments = [Document(page_content=doc, metadata={&quot;source&quot;: f&quot;doc_{i}&quot;}) for i, doc in enumerate(tech_docs)]\nprint(f&quot;\u751f\u6210{len(documents)}\u4e2a\u6587\u6863\u793a\u4f8b&quot;)<\/code><\/pre>\n<p>\u6570\u636e\u53e3\u5f84\uff1a\u6587\u6863\u4e3a\u7eaf\u6587\u672c\uff0c\u6bcf\u884c\u4e00\u4e2a\u77e5\u8bc6\u70b9\uff0c\u7528\u4e8e\u6f14\u793a\u7d22\u5f15\u548c\u68c0\u7d22\u3002<\/p>\n<h2>4. \u8bad\u7ec3\/\u5b9e\u73b0\u6b65\u9aa4<\/h2>\n<p>\u4ee5\u4e0b\u662f\u6784\u5efaRAG\u7cfb\u7edf\u7684\u5b8c\u6574\u4ee3\u7801\uff0c\u5305\u62ec\u6587\u6863\u7d22\u5f15\u548c\u95ee\u7b54\u94fe\u8bbe\u7f6e\u3002\u786e\u4fdd\u5df2\u8bbe\u7f6e\u73af\u5883\u53d8\u91cf<code>OPENAI_API_KEY<\/code>\u548c<code>PINECONE_API_KEY<\/code>\u3002<\/p>\n<pre><code class=\"lang-python language-python python\"># \u5b8c\u6574\u5b9e\u73b0\u6b65\u9aa4\uff1a\u52a0\u8f7d\u6587\u6863\u3001\u7d22\u5f15\u5230Pinecone\u3001\u8bbe\u7f6eRAG\u94fe\nimport os\nfrom langchain.text_splitter import RecursiveCharacterTextSplitter\nfrom langchain.embeddings.openai import OpenAIEmbeddings\nfrom langchain.vectorstores import Pinecone\nfrom langchain.chains import RetrievalQA\nfrom langchain.llms import OpenAI\nimport pinecone\n\n# \u52a0\u8f7d\u6a21\u62df\u6587\u6863\uff08\u4f7f\u7528\u4e0a\u4e00\u6b65\u751f\u6210\u7684documents\u53d8\u91cf\uff09\ntext_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)\ntexts = text_splitter.split_documents(documents)\n\n# \u521d\u59cb\u5316\u5d4c\u5165\u6a21\u578b\u548cPinecone\nembeddings = OpenAIEmbeddings()\npinecone.init(api_key=os.getenv(&#039;PINECONE_API_KEY&#039;), environment=&#039;us-east1-gcp&#039;)\nindex_name = &#039;rag-tech-docs&#039;\n# \u68c0\u67e5\u5e76\u521b\u5efaPinecone\u7d22\u5f15\nif index_name not in pinecone.list_indexes():\n    pinecone.create_index(index_name, dimension=1536, metric=&#039;cosine&#039;)\n# \u7d22\u5f15\u6587\u6863\u5230Pinecone\ndocsearch = Pinecone.from_documents(texts, embeddings, index_name=index_name)\nprint(&quot;\u6587\u6863\u7d22\u5f15\u5b8c\u6210\uff0c\u53ef\u7528\u4e8e\u68c0\u7d22&quot;)\n\n# \u8bbe\u7f6e\u68c0\u7d22\u548c\u751f\u6210\u94fe\nretriever = docsearch.as_retriever(search_kwargs={&quot;k&quot;: 3})\nllm = OpenAI(temperature=0)  # \u4f7f\u7528OpenAI GPT\u6a21\u578b\uff0c\u6e29\u5ea60\u964d\u4f4e\u968f\u673a\u6027\nqa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type=&quot;stuff&quot;, retriever=retriever)\nprint(&quot;RAG\u95ee\u7b54\u94fe\u51c6\u5907\u5c31\u7eea&quot;)<\/code><\/pre>\n<p>\u4efb\u52a1\u7c7b\u578b\uff1a\u8fd9\u662f\u4e00\u4e2aAI\u5de5\u7a0b\u5316\u90e8\u7f72\u4efb\u52a1\uff0c\u5177\u4f53\u662f\u6587\u6863\u68c0\u7d22\u4e0e\u751f\u6210\u7684\u7ba1\u9053\u6784\u5efa\u3002<\/p>\n<h2>5. \u8c03\u7528\u65b9\u5f0f<\/h2>\n<p>\u63d0\u4f9b\u79bb\u7ebf\u6279\u91cf\u7d22\u5f15\u548c\u5355\u6761\u95ee\u7b54\u793a\u4f8b\u3002\u79bb\u7ebf\u6279\u91cf\u7d22\u5f15\u5df2\u5728\u4e0a\u4e00\u6b65\u5b8c\u6210\uff0c\u8fd9\u91cc\u5c55\u793a\u5355\u6761\u67e5\u8be2\u7684\u8c03\u7528\u3002<\/p>\n<pre><code class=\"lang-python language-python python\"># \u5355\u6761\u95ee\u7b54\u793a\u4f8b\nquery = &quot;\u5982\u4f55\u914d\u7f6eDocker\u5bb9\u5668\uff1f&quot;\nanswer = qa_chain.run(query)\nprint(f&quot;\u95ee\u9898: {query}\\n\u56de\u7b54: {answer}&quot;)\n# \u79bb\u7ebf\u6279\u91cf\u5904\u7406\u793a\u4f8b\uff1a\u5047\u8bbe\u6709\u65b0\u6587\u6863\u9700\u8981\u7d22\u5f15\nnew_docs = [Document(page_content=&quot;\u65b0\u6587\u6863\u5185\u5bb9\uff1aKubernetes\u90e8\u7f72\u6307\u5357\u3002&quot;, metadata={&quot;source&quot;: &quot;new_doc&quot;})]\nnew_texts = text_splitter.split_documents(new_docs)\nPinecone.from_documents(new_texts, embeddings, index_name=index_name)\nprint(&quot;\u65b0\u6587\u6863\u6279\u91cf\u7d22\u5f15\u5b8c\u6210&quot;)<\/code><\/pre>\n<h2>6. \u6307\u6807\u8bf4\u660e<\/h2>\n<ul>\n<li><strong>RAG (Retrieval-Augmented Generation)<\/strong>\uff1a\u4e00\u79cdAI\u6280\u672f\uff0c\u5148\u68c0\u7d22\u76f8\u5173\u6587\u6863\uff0c\u518d\u7528\u751f\u6210\u6a21\u578b\u57fa\u4e8e\u68c0\u7d22\u7ed3\u679c\u56de\u7b54\u95ee\u9898\uff0c\u63d0\u9ad8\u51c6\u786e\u6027\u3002<\/li>\n<li><strong>\u5411\u91cf\u6570\u636e\u5e93 (\u5982Pinecone)<\/strong>\uff1a\u5b58\u50a8\u6587\u672c\u7684\u5411\u91cf\u5d4c\u5165\uff0c\u901a\u8fc7\u8ba1\u7b97\u76f8\u4f3c\u5ea6\u5feb\u901f\u68c0\u7d22\u76f8\u5173\u6587\u6863\u3002<\/li>\n<li><strong>\u5d4c\u5165 (Embedding)<\/strong>\uff1a\u5c06\u6587\u672c\u8f6c\u6362\u4e3a\u6570\u5b57\u5411\u91cf\uff0c\u4f7f\u673a\u5668\u80fd\u7406\u89e3\u8bed\u4e49\u76f8\u4f3c\u6027\u3002<\/li>\n<li><strong>\u53ec\u56de\u7387 (Recall)<\/strong>\uff1a\u68c0\u7d22\u7cfb\u7edf\u627e\u5230\u6240\u6709\u76f8\u5173\u6587\u6863\u7684\u6bd4\u4f8b\uff0c\u8d8a\u9ad8\u8d8a\u597d\u3002<\/li>\n<li><strong>\u51c6\u786e\u7387 (Accuracy)<\/strong>\uff1a\u751f\u6210\u56de\u7b54\u7684\u6b63\u786e\u6027\uff0c\u53ef\u901a\u8fc7\u4eba\u5de5\u8bc4\u4f30\u8861\u91cf\u3002<\/li>\n<li><strong>\u5e7b\u89c9\u56de\u7b54 (Hallucination)<\/strong>\uff1a\u751f\u6210\u6a21\u578b\u4ea7\u751f\u4e0d\u57fa\u4e8e\u68c0\u7d22\u5185\u5bb9\u7684\u865a\u5047\u4fe1\u606f\u3002<\/li>\n<li><strong>\u68c0\u7d22\u94fe (RetrievalQA)<\/strong>\uff1aLangChain\u4e2d\u7684\u7ec4\u4ef6\uff0c\u7ec4\u5408\u68c0\u7d22\u5668\u548c\u751f\u6210\u6a21\u578b\u8fdb\u884c\u95ee\u7b54\u3002<\/li>\n<\/ul>\n<h2>7. \u4e0a\u7ebf\u540e\u8bc4\u4f30<\/h2>\n<p>\u7cfb\u7edf\u4e0a\u7ebf\u540e\uff0c\u76d1\u63a7\u4ee5\u4e0b\u8fd0\u884c\u6307\u6807\uff1a<\/p>\n<ul>\n<li><strong>\u67e5\u8be2\u5ef6\u8fdf<\/strong>\uff1a\u6bcf\u6b21\u95ee\u7b54\u7684\u54cd\u5e94\u65f6\u95f4\uff0c\u76ee\u6807\u4f4e\u4e8e2\u79d2\u3002<\/li>\n<li><strong>\u51c6\u786e\u7387<\/strong>\uff1a\u968f\u673a\u62bd\u6837\u95ee\u7b54\uff0c\u7531\u4e13\u5bb6\u8bc4\u4f30\u56de\u7b54\u662f\u5426\u6b63\u786e\uff0c\u76ee\u6807\u5927\u4e8e90%\u3002<\/li>\n<li><strong>\u53ec\u56de\u7387<\/strong>\uff1a\u6d4b\u8bd5\u67e5\u8be2\u65f6\uff0c\u68c0\u67e5\u662f\u5426\u68c0\u7d22\u5230\u6240\u6709\u76f8\u5173\u6587\u6863\u3002\n\u8bc4\u4f30\u65b9\u6cd5\uff1a\u4f7f\u7528\u6d4b\u8bd5\u96c6\u8fdb\u884c\u81ea\u52a8\u5316\u6d4b\u8bd5\uff0c\u5e76\u5b9a\u671f\u4eba\u5de5\u5ba1\u6838\uff0c\u8c03\u6574\u68c0\u7d22\u53c2\u6570\u6216\u6587\u6863\u5206\u5757\u7b56\u7565\u3002<\/li>\n<\/ul>\n<h2>8. \u5e38\u89c1\u5751\u4e0e\u6392\u67e5<\/h2>\n<ul>\n<li><strong>API\u5bc6\u94a5\u9519\u8bef<\/strong>\uff1a\u786e\u4fdd<code>OPENAI_API_KEY<\/code>\u548c<code>PINECONE_API_KEY<\/code>\u5df2\u6b63\u786e\u8bbe\u7f6e\u73af\u5883\u53d8\u91cf\uff0c\u53ef\u901a\u8fc7<code>os.getenv<\/code>\u68c0\u67e5\u3002<\/li>\n<li><strong>\u68c0\u7d22\u7cbe\u5ea6\u4f4e<\/strong>\uff1a\u6587\u6863\u8d28\u91cf\u5dee\u6216\u5206\u5757\u4e0d\u5f53\uff1b\u589e\u5927\u5206\u5757\u91cd\u53e0\u6216\u8c03\u6574\u5206\u5757\u5927\u5c0f\uff0c\u4f8b\u5982\u8bbe\u7f6e<code>chunk_size=200<\/code>\u548c<code>chunk_overlap=50<\/code>\u3002<\/li>\n<li><strong>\u751f\u6210\u5e7b\u89c9\u56de\u7b54<\/strong>\uff1a\u964d\u4f4e\u751f\u6210\u6a21\u578b\u6e29\u5ea6\uff08\u5982<code>temperature=0<\/code>\uff09\uff0c\u5e76\u786e\u4fdd\u68c0\u7d22\u6587\u6863\u76f8\u5173\uff1b\u53ef\u6dfb\u52a0\u540e\u5904\u7406\u8fc7\u6ee4\u3002<\/li>\n<li><strong>Pinecone\u7d22\u5f15\u4e0d\u5b58\u5728<\/strong>\uff1a\u8fd0\u884c\u524d\u68c0\u67e5\u7d22\u5f15\u540d\u79f0\uff0c\u4f7f\u7528<code>pinecone.list_indexes()<\/code>\u786e\u8ba4\u3002<\/li>\n<\/ul>","protected":false},"excerpt":{"rendered":"<p>\u672c\u6587\u624b\u628a\u624b\u6559\u4f60\u5982\u4f55\u7528LangChain\u548cPinecone\u642d\u5efa\u4e00\u4e2aRAG\u7cfb\u7edf\uff0c\u5b9e\u73b0\u4f01\u4e1a\u5185\u90e8\u6280\u672f\u6587\u6863\u7684\u667a\u80fd\u95ee\u7b54\uff0c\u5305\u542b\u5b8c\u6574\u4ee3\u7801\u548c\u5de5\u7a0b\u7ec6\u8282\uff0c\u5e2e\u52a9AI\u5de5\u7a0b\u5e08\u5feb\u901f\u843d\u5730\u5e94\u7528\u3002<\/p>","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[94,98],"tags":[128,136,135],"class_list":["post-438","post","type-post","status-publish","format-standard","hentry","category-ai","tag-rag","tag-135"],"views":166,"_links":{"self":[{"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=\/wp\/v2\/posts\/438","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=438"}],"version-history":[{"count":1,"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=\/wp\/v2\/posts\/438\/revisions"}],"predecessor-version":[{"id":445,"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=\/wp\/v2\/posts\/438\/revisions\/445"}],"wp:attachment":[{"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=438"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=438"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=438"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}