Unten finden Sie den Python-Codeausschnitt, der den Index in ACS erstellt und die Dokumente aus dem Azure Databricks-Notebook hochlädt.
Code: Select all
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
SimpleField,
SearchFieldDataType,
SearchableField,
SearchField,
VectorSearch,
HnswAlgorithmConfiguration,
VectorSearchProfile,
SemanticConfiguration,
SemanticPrioritizedFields,
SemanticField,
SemanticSearch,
SearchIndex,
AzureOpenAIVectorizer,
AzureOpenAIVectorizerParameters
)
# Azure Cognitive Search setup
service_endpoint = "https://yourserviceendpoint.search.windows.net"
admin_key = "ABC"
index_name = "courses-index"
# Wrap admin_key in AzureKeyCredential
credential = AzureKeyCredential(admin_key)
# Create the index client with AzureKeyCredential
index_client = SearchIndexClient(endpoint=service_endpoint, credential=credential)
# Define the index schema
fields = [
SimpleField(name="id", type="Edm.String", key=True),
SimpleField(name="content", type="Edm.String"),
SearchField(
name="embedding",
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
searchable=True,
vector_search_dimensions=384,
vector_search_profile_name="myHnswProfile"
)
# SearchField(name="embedding", type='Collection(Edm.Single)', searchable=True)
]
# Configure the vector search configuration
vector_search = VectorSearch(
algorithms=[
HnswAlgorithmConfiguration(
name="myHnsw"
)
],
profiles=[
VectorSearchProfile(
name="myHnswProfile",
algorithm_configuration_name="myHnsw"
)
]
)
# Create the index
index = SearchIndex(
name=index_name,
fields=fields,
vector_search=vector_search
)
# Send the index creation request
index_client.create_index(index)
print(f"Index '{index_name}' created successfully.")
Code: Select all
from azure.search.documents import SearchClient
# Generate embeddings and upload data
search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credential)
documents = []
for i, row in courses_pd.iterrows():
document = {
"id": str(i),
"content": row["content"],
"embedding": row["embeddings"] # Ensure embeddings are a list of floats
}
documents.append(document)
# Upload documents to the index
search_client.upload_documents(documents=documents)
print(f"Uploaded {len(documents)} documents to Azure Cognitive Search.")
Code: Select all
from azure.search.documents.models import VectorQuery
# Generate embedding for the query
query = "machine learning"
query_embedding = model.encode(query).tolist() # Convert to list of floats
# Create a VectorQuery
vector_query = VectorQuery(
vector=query_embedding,
k=3, # Number of nearest neighbors
fields="embedding" # Name of the field where embeddings are stored
)
# Perform the search
results = search_client.search(
vector_queries=[vector_query],
select=["id", "content"]
)
# Print the results
for result in results:
print(f"ID: {result['id']}, Content: {result['content']}")
Code: Select all
vector is not a known attribute of class and will be ignored
k is not a known attribute of class and will be ignored
HttpResponseError: (InvalidRequestParameter) The vector query's 'kind' parameter is not set.
Dokumente werden hochgeladen und der Index wird erstellt, wie ich im Portal sehen kann.

Ich muss etwas tun Falsch, entweder die Art und Weise, wie ich den Suchindex eingerichtet habe oder die Art und Weise, wie ich den Index abfrage, die Dokumentation und die Github-Codebasis bieten diesbezüglich nichts, daher ist die Suche nach Hilfe in der Community neu in diesem Bereich.