Wenn ich den Proxy-Dienst direkt vom JavaScript-Frontend aus aufrufe
Code: Select all
const res = await fetch(API_URL ...
Relevanter Code – Web-App (.NET 4.7.2), der Aufrufer
Code: Select all
/// Controller
[RoutePrefix("LLMProxy")]
public class LLMProxyController : Controller
{
[HttpPost]
[Route("Chat")]
public async Task Query()
{
string prefix = ConfigurationManager.AppSettings["ParameterStorePrefix"];
string parsedPref = TenantHelper.NormalizeParameterStorePrefix(prefix);
Request.InputStream.Position = 0;
using var reader = new System.IO.StreamReader(Request.InputStream);
var body = await reader.ReadToEndAsync().ConfigureAwait(false);
var dto = Newtonsoft.Json.JsonConvert.DeserializeObject(body);
if (dto == null || string.IsNullOrWhiteSpace(dto.Prompt))
return new HttpStatusCodeResult(400, "Missing prompt");
var raw = dto.JsonBlob?.ToString(Newtonsoft.Json.Formatting.None);
var proxyBusinessModel = new LLMProxyBusinessModel();
var result = await proxyBusinessModel.PostLLMChat(new Entity.ChatInputEntity
{
// ...
}, dto.ChatbotId).ConfigureAwait(false);
var json = JsonConvert.SerializeObject(result);
return Content(json, "application/json");
}
}
/// LLMProxyBusinessModel
public class LLMProxyBusinessModel
{
public string LLMProxyUrl
{
get => System.Configuration.ConfigurationManager.AppSettings["LLMProxyBaseUrl"].TrimEnd('/');
}
public async Task PostLLMChat(ChatInputEntity input, string chatbotId)
{
var url = string.IsNullOrEmpty(chatbotId)
? $"{LLMProxyUrl}/llm/chat"
: $"{LLMProxyUrl}/llm/chat?chatbotId={Uri.EscapeDataString(chatbotId)}";
var handler = new HttpClientHandler { UseProxy = false }; // == Also tried without ==
using (var http = new HttpClient(handler) { Timeout = TimeSpan.FromMinutes(3) })
{
http.DefaultRequestHeaders.ExpectContinue = false; // == Added this to try ==
http.DefaultRequestHeaders.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("application/json"));
var json = Newtonsoft.Json.JsonConvert.SerializeObject(input);
using var content = new StringContent(json, Encoding.UTF8, "application/json");
using var resp = await http.PostAsync(url, content).ConfigureAwait(false);
var raw = await resp.Content.ReadAsStringAsync().ConfigureAwait(false);
resp.EnsureSuccessStatusCode();
return Newtonsoft.Json.JsonConvert.DeserializeObject(raw);
}
}
}
Code: Select all
/// Controller
[ApiController]
[Route("llm")]
public class LLMController(RespondService respondService, ChatService chatService, ILogger logger) : Controller
{
private readonly ILogger _logger = logger;
private readonly RespondService _respondService = respondService;
private readonly ChatService _chatService = chatService;
[EndpointName("Chat")]
[EndpointSummary("Handles a chat request that maintains conversation state.")]
[EndpointDescription("Can receive a system prompt to set the context for the chat session.")]
[HttpPost("chat")]
public async Task Chat([FromBody] ChatInput input, [FromQuery] string chatbotId = "", CancellationToken ct = default)
{
var linked = CancellationTokenSource.CreateLinkedTokenSource(ct);
linked.CancelAfter(TimeSpan.FromMinutes(3)); // == This was added as a test, no change ==
if (input is null)
{
_logger.LogWarning("Chat input is null.");
return BadRequest("Chat input cannot be null.");
}
_logger.LogInformation("Processing chat input: {Input}, chatbotId: {ChatbotId}", input, chatbotId);
try
{
var response = await _chatService.HandleChatInput(input, chatbotId, HttpContext, linked.Token);
return Ok(response);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error processing chat request.");
return StatusCode(500, "Internal server error.");
}
}
}
/// Business logic
public async Task HandleChatInput(
ChatInput input,
string chatbotId,
HttpContext http,
CancellationToken ct = default)
{
// setup code, no HTTP, both go beyond this
// ...
var response = await GetChatResponseAsync(input, chatbotId, session, messageId, ct);
return response;
}
private async Task GetChatResponseAsync(
ChatInput input,
string chatbotId,
ChatSession session,
string messageId,
CancellationToken ct = default)
{
var chatKernel = _router.GetKernelForModel(input, chatbotId);
var chat = chatKernel.GetRequiredService();
// ==== CRITICAL PART ====
// == In both cases, we reach this part. But when calling from the web app, I timeout here.
var reply = await chat.GetChatMessageContentAsync(
session.History,
executionSettings: ChatHelper.GetExecutionSettings(chatbotId),
kernel: chatKernel,
cancellationToken: ct);
// == When calling from the web app, we never reach the return and just time out above ==
return new LLMOutput
{
Response = reply.Content,
ReturnValue = session.ReturnValue,
MessageId = messageId
};
}
Code: Select all
/// When I call via the web app, whos Razor page I am within:
const llmPostUrl = '@Url.Action("Chat", "LLMProxy", new { area = "" })';
const res = await fetch(llmPostUrl, {
method: "POST",
headers: HEADERS,
body: JSON.stringify({
UserId: userId,
Prompt: txt,
OrganisationId: organisationId,
WorkGroupId: workGroupId,
ChatbotId: chatbotId,
NewSession: createNewSession,
JsonBlob: jsonBlob, })
});
/// When I call via the same JS file but call the proxy directly
var API_URL = "http://localhost:5183/llm"
const res = await fetch(API_URL + "/chat?chatbotId=" + chatbotId, {
method: "POST",
headers: HEADERS,
body: JSON.stringify({
prompt: txt,
jsonBlob: jsonBlob,
reasoning: true,
model: null
})
});
Timeouts sind großzügig (Web-App 3 Min., LLM Proxy 150 Sek.). Der Modellaufruf ist normalerweise schnell, wenn er funktioniert.
HTTP-Protokolle zeigen:
Code: Select all
Start processing HTTP request POST https://api.openai.com/v1/chat/completions
Sending HTTP request POST https://api.openai.com/v1/chat/completions
... and then nothing until timeout.
Ein Neustart des LLM-Proxys kann das Verhalten umkehren (funktioniert > Neustart > hängt).
- Auf IHttpClientFactory mit dem Namen client („LLMProvider“) umgestellt und in SK eingefügt.
- HTTP/1.1 erzwungen:
Code: Select all
http.DefaultRequestVersion = HttpVersion.Version11;
http.DefaultVersionPolicy = HttpVersionPolicy.RequestVersionOrLower;
- Verbindungsrecycling + Schnellverbindungsfehler über SocketsHttpHandler hinzugefügt:
Code: Select all
ConnectTimeout = 10s
PooledConnectionLifetime = 5m
PooledConnectionIdleTimeout = 2m
DnsRefreshTimeout = 5m
MaxConnectionsPerServer = 256
ExpectContinue = false
UseProxy = false
- One-Shot-Verbindung: Schließen (um gepoolte Sockets zu umgehen) – Keine Änderung.
- Vorübergehend deaktivierte TLS-Sperrprüfungen / angeheftetes TLS 1.2 – Keine Änderung.
Mobile version