Eine Herausforderung, während Sie versuchen, Azure Search Dokument hinzuzufügenPython

Python-Programme
Anonymous
 Eine Herausforderung, während Sie versuchen, Azure Search Dokument hinzuzufügen

Post by Anonymous »

Ich suche nach Möglichkeit, Azure AI mit Vektorindexierung aus dem Konfluence -Datcenter -Server abzufragen. Ich verwende den folgenden Python -Code
Ich erhalte einen Fehler
Fehler - Fehler beim Aufladung von Dokumenten in Azure: Dict 'Dict'. vector_store.add_documents(split_docs)
I review
https://python.langchain.com/docs/conce ... torstores/
https://python.langchain.com/docs/conce ... splitters/
I understand Ich muss die Struktur der Split_Docs in die Auflistung konvertieren. Ist es der richtige Ansatz? manuelle Aktionen. Bereits automatisierte Aktionen werden von der folgenden Anleitung entfernt. Es ist auch möglich, dass einige Versionen "Y" -Teil übersprungen, also ist jede y-1-Version x-1. 'https://confluencewiki.xxx.com/display/ ... se+-+Rules+ und+Steps+For+10.5+line', ' wenn ': '2024-08-12t05: 06: 02.270-07: 00'} < /p>

Code: Select all

import os
import logging
from langchain_community.document_loaders import ConfluenceLoader
from langchain_openai import AzureOpenAIEmbeddings
from langchain_community.vectorstores import AzureSearch
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_openai import AzureChatOpenAI
from azure.search.documents.indexes import SearchIndexClient
from azure.core.credentials import AzureKeyCredential
from langchain_core.documents import Document

# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("confluence_rag.log"),  # Log to a file
logging.StreamHandler()  # Also log to console
]
)
logger = logging.getLogger(__name__)

# Configuration
CONFLUENCE_URL = "https://confluencewiki.XXX.com/"  # Base URL of your Confluence Data Center
CONFLUENCE_USERNAME = "XXZ"
CONFLUENCE_TOKEN = "X+X"  # Use password or personal access token
CONFLUENCE_SPACE_KEY = "SCAN"  # Optional: Specify a space key to limit the scope

AZURE_SEARCH_ENDPOINT = "https://XXX.search.windows.net"
AZURE_SEARCH_INDEX_NAME = "confluence-index"
AZURE_SEARCH_API_KEY = "XXX"

AZURE_OPENAI_ENDPOINT = "https://XXX.openai.azure.com/"
AZURE_OPENAI_API_KEY = "XXX"
AZURE_OPENAI_DEPLOYMENT_NAME = "embeddingmodel"
AZURE_OPENAI_EMBEDDING_DEPLOYMENT = "embeddingmodel"   # Embedding deployment name

# Initialize Azure OpenAI Embeddings
logger.info(f"Initializing Azure OpenAI Embeddings with deployment: {AZURE_OPENAI_EMBEDDING_DEPLOYMENT}")
try:
embeddings = AzureOpenAIEmbeddings(
azure_endpoint=AZURE_OPENAI_ENDPOINT,
api_key=AZURE_OPENAI_API_KEY,
deployment=AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
api_version="2023-05-15",
)
test_embedding = embeddings.embed_query("Test sentence")
logger.info(f"Embedding test successful: {len(test_embedding)} dimensions")
except Exception as e:
logger.error(f"Failed to initialize Azure OpenAI Embeddings: {e}")
raise

# Initialize Azure OpenAI Chat Model
logger.info(f"Initializing Azure OpenAI Chat Model with deployment: {AZURE_OPENAI_DEPLOYMENT_NAME}")
try:
llm = AzureChatOpenAI(
azure_endpoint=AZURE_OPENAI_ENDPOINT,
api_key=AZURE_OPENAI_API_KEY,
deployment_name=AZURE_OPENAI_DEPLOYMENT_NAME,
api_version="2023-05-15",
temperature=0.7,
)
except Exception as e:
logger.error(f"Failed to initialize Azure OpenAI Chat Model: {e}")
raise

# Step 1: Load Confluence Data
logger.info(f"Loading Confluence data from space: {CONFLUENCE_SPACE_KEY}")
loader = ConfluenceLoader(
url=CONFLUENCE_URL,
token=CONFLUENCE_TOKEN,
cloud=False,
space_key=CONFLUENCE_SPACE_KEY,
limit=100,
)
try:
documents = loader.load()
logger.info(f"Loaded {len(documents)} documents from Confluence")
except Exception as e:
logger.error(f"Failed to load Confluence data: {e}")
raise

# Step 2: Split Documents
logger.info("Splitting documents into chunks")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_documents(documents)
logger.info(f"Split into {len(split_docs)} chunks")
logger.info(f"First split_doc: {split_docs[0].page_content}, Metadata: {split_docs[0].metadata}")

# Test Azure Search Connection
logger.info("Testing Azure Search connection")
try:
credential = AzureKeyCredential(AZURE_SEARCH_API_KEY)
index_client = SearchIndexClient(endpoint=AZURE_SEARCH_ENDPOINT, credential=credential)
index_client.get_service_statistics()
logger.info("Azure Search connection successful")
except Exception as e:
logger.error(f"Azure Search connection failed: {e}")
raise

# Step 3: Initialize Azure Search Vector Store
logger.info(f"Initializing Azure Search Vector Store with index: {AZURE_SEARCH_INDEX_NAME}")
try:
vector_store = AzureSearch(
azure_search_endpoint=AZURE_SEARCH_ENDPOINT,
azure_search_key=AZURE_SEARCH_API_KEY,
index_name=AZURE_SEARCH_INDEX_NAME,
embedding_function=embeddings.embed_query,
fields=[
{"name": "id", "type": "Edm.String", "key": True},
{"name": "title", "type": "Edm.String", "searchable": True, "filterable": True},
{"name": "content", "type": "Edm.String", "searchable": True},
{
"name": "embedding",
"type": "Collection(Edm.Single)",
"searchable": True,
"vectorSearchDimensions": 1536,
"vectorSearchConfiguration": "my-vector-profile",
},
],
vector_search_configuration={
"vectorSearches": [
{
"profiles": [
{"name": "my-vector-profile", "algorithmConfigurationName": "my-hnsw-config"}
],
"algorithms": [
{"name": "my-hnsw-config", "kind": "hnsw"}
],
}
]
}
)
except Exception as e:
logger.error(f"Failed to initialize Azure Search Vector Store: {e}")
logger.debug(f"Endpoint: {AZURE_SEARCH_ENDPOINT}, Index: {AZURE_SEARCH_INDEX_NAME}")
raise

# Step 4:  Add Documents to Azure Search
logger.info("Uploading documents to Azure Search")
logger.info(f"First document type: {type(split_docs[0])}")
logger.info(f"First document content: {split_docs[0]}")
logger.info(f"First loaded document type: {type(documents[0])}")
logger.info(f"First loaded document content: {documents[0]}")

try:
vector_store.add_documents(split_docs)
logger.info(f"Uploaded {len(split_docs)} documents to Azure Search")
except Exception as e:
logger.error(f"Failed to upload documents to Azure Search: {e}")
raise

# Step 5: Set up RetrievalQA Chain
logger.info("Setting up RetrievalQA chain")
retriever = vector_store.as_retriever(search_kwargs={"k": 5})
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True,
)

# Step 6: Query the System
def query_confluence(user_query):
logger.info(f"Querying Confluence with: {user_query}")
try:
result = qa_chain({"query": user_query})
logger.info("Query completed successfully")
return result["result"], result["source_documents"]
except Exception as e:
logger.error(f"Query failed: {e}")
raise

# Example Usage
if __name__ == "__main__":
user_query = "Please point out the most important data from IT space?"
logger.debug(f"Starting main execution with query: {user_query}")
answer, sources = query_confluence(user_query)
logger.info("Answer generated:")
print("Answer:", answer)
print("\nSources:")
for doc in sources:
logger.info(f"Source: {doc.metadata['title']} (ID: {doc.metadata['id']})")
print(f"- {doc.metadata['title']} (ID: {doc.metadata['id']})")

Quick Reply

Change Text Case: 
   
  • Similar Topics
    Replies
    Views
    Last post