chat.py
- Pfad:
/var/www/scripts/pipeline/chat.py
- Namespace: pipeline
- Zeilen: 222 | Größe: 6,096 Bytes
- Geändert: 2025-12-27 15:34:45 | Gescannt: 2025-12-31 10:22:15
Code Hygiene Score: 84
- Dependencies: 50 (25%)
- LOC: 92 (20%)
- Methods: 100 (20%)
- Secrets: 100 (15%)
- Classes: 100 (10%)
- Magic Numbers: 80 (10%)
Issues 2
| Zeile |
Typ |
Beschreibung |
| 161 |
magic_number |
Magic Number gefunden: 60 |
| 163 |
magic_number |
Magic Number gefunden: 60 |
Dependencies 10
- use sys
- use requests
- use config.ANTHROPIC_API_KEY
- use config.ANTHROPIC_MODEL
- use config.EMBEDDING_MODEL
- use config.OLLAMA_HOST
- use config.QDRANT_HOST
- use config.QDRANT_PORT
- use constants.OLLAMA_TIMEOUT
- use anthropic
Funktionen 9
-
get_embedding()
Zeile 19
-
search_qdrant()
Zeile 28
-
build_context()
Zeile 39
-
get_sources()
Zeile 58
-
ask_claude()
Zeile 72
-
ask_ollama()
Zeile 104
-
chat()
Zeile 122
-
interactive_mode()
Zeile 159
-
main()
Zeile 198
Code
#!/usr/bin/env python3
"""
RAG-Chat für KI-System
Semantische Suche + LLM-Antwort mit Quellenangabe.
Usage:
python chat.py "Deine Frage hier"
python chat.py # Interaktiver Modus
"""
import sys
import requests
from config import ANTHROPIC_API_KEY, ANTHROPIC_MODEL, EMBEDDING_MODEL, OLLAMA_HOST, QDRANT_HOST, QDRANT_PORT
from constants import OLLAMA_TIMEOUT
def get_embedding(text):
"""Get embedding from Ollama."""
response = requests.post(
f"{OLLAMA_HOST}/api/embeddings", json={"model": EMBEDDING_MODEL, "prompt": text}, timeout=OLLAMA_TIMEOUT
)
response.raise_for_status()
return response.json().get("embedding")
def search_qdrant(query_embedding, limit=5):
"""Search for similar chunks in Qdrant."""
response = requests.post(
f"http://{QDRANT_HOST}:{QDRANT_PORT}/collections/documents/points/search",
json={"vector": query_embedding, "limit": limit, "with_payload": True},
timeout=30,
)
response.raise_for_status()
return response.json().get("result", [])
def build_context(results, max_tokens=3000):
"""Build context from search results."""
context_parts = []
total_chars = 0
max_chars = max_tokens * 4 # Rough estimate
for i, result in enumerate(results):
content = result["payload"].get("content", "")
doc_title = result["payload"].get("document_title", "Unbekannt")
if total_chars + len(content) > max_chars:
break
context_parts.append(f"[Quelle {i + 1}: {doc_title}]\n{content}")
total_chars += len(content)
return "\n\n---\n\n".join(context_parts)
def get_sources(results):
"""Extract source information from results."""
sources = []
seen = set()
for result in results:
doc_title = result["payload"].get("document_title", "")
if doc_title and doc_title not in seen:
sources.append({"title": doc_title, "score": round(result["score"], 3)})
seen.add(doc_title)
return sources
def ask_claude(question, context):
"""Get answer from Claude API."""
import anthropic
system_prompt = """Du bist ein hilfreicher Assistent für Fragen zu systemischem Teamcoaching und Teamentwicklung.
Beantworte die Frage des Nutzers basierend auf dem bereitgestellten Kontext.
- Antworte auf Deutsch
- Sei präzise und hilfreich
- Wenn der Kontext die Frage nicht beantwortet, sage das ehrlich
- Verweise auf die Quellen wenn passend"""
user_prompt = f"""Kontext aus den Dokumenten:
{context}
---
Frage: {question}"""
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
message = client.messages.create(
model=ANTHROPIC_MODEL,
max_tokens=1500,
system=system_prompt,
messages=[{"role": "user", "content": user_prompt}],
)
return message.content[0].text
def ask_ollama(question, context, model="gemma3:27b-it-qat"):
"""Get answer from Ollama (local LLM)."""
prompt = f"""Du bist ein hilfreicher Assistent. Beantworte die Frage basierend auf dem Kontext.
Kontext:
{context}
Frage: {question}
Antwort:"""
response = requests.post(
f"{OLLAMA_HOST}/api/generate", json={"model": model, "prompt": prompt, "stream": False}, timeout=120
)
response.raise_for_status()
return response.json().get("response", "")
def chat(question, use_claude=True, top_k=5):
"""
Main RAG chat function.
Args:
question: User's question
use_claude: Use Claude API (True) or Ollama (False)
top_k: Number of chunks to retrieve
Returns:
dict with answer and sources
"""
# 1. Get embedding for question
query_embedding = get_embedding(question)
if not query_embedding:
return {"error": "Embedding generation failed"}
# 2. Search for relevant chunks
results = search_qdrant(query_embedding, limit=top_k)
if not results:
return {"error": "No relevant documents found"}
# 3. Build context
context = build_context(results)
# 4. Get answer from LLM
if use_claude and ANTHROPIC_API_KEY:
answer = ask_claude(question, context)
else:
answer = ask_ollama(question, context)
# 5. Get sources
sources = get_sources(results)
return {"question": question, "answer": answer, "sources": sources, "chunks_used": len(results)}
def interactive_mode():
"""Run interactive chat session."""
print("=" * 60)
print("RAG-Chat - Systemisches Teamcoaching")
print("=" * 60)
print("Frage stellen oder 'quit' zum Beenden\n")
while True:
try:
question = input("Du: ").strip()
if not question:
continue
if question.lower() in ["quit", "exit", "q"]:
print("Auf Wiedersehen!")
break
print("\nSuche relevante Dokumente...")
result = chat(question)
if "error" in result:
print(f"Fehler: {result['error']}\n")
continue
print(f"\nAssistent: {result['answer']}\n")
if result["sources"]:
print("Quellen:")
for src in result["sources"]:
print(f" - {src['title']} (Score: {src['score']})")
print()
except KeyboardInterrupt:
print("\n\nAuf Wiedersehen!")
break
except Exception as e:
print(f"Fehler: {e}\n")
def main():
"""CLI entry point for interactive chat or single question mode."""
if len(sys.argv) > 1:
# Single question mode
question = " ".join(sys.argv[1:])
result = chat(question)
if "error" in result:
print(f"Fehler: {result['error']}")
return
print(f"\nAntwort: {result['answer']}\n")
if result["sources"]:
print("Quellen:")
for src in result["sources"]:
print(f" - {src['title']} (Score: {src['score']})")
else:
# Interactive mode
interactive_mode()
if __name__ == "__main__":
main()