Try

dfssdfsfdsf
2026-04-09 21:00:04 +02:00
parent 983b67e6fc
commit 89bc86b615
27 changed files with 2921 additions and 408 deletions
--- a/server/services/ai-sorter.mjs
+++ b/server/services/ai-sorter.mjs
@@ -7,6 +7,132 @@ import { Mistral } from '@mistralai/mistralai'
 import { config } from '../config/index.mjs'
 import { log } from '../middleware/logger.mjs'

+function sleep(ms) {
+  return new Promise((r) => setTimeout(r, ms))
+}
+
+function is503Error(error) {
+  const status = error?.status ?? error?.statusCode ?? error?.response?.status
+  if (status === 503) return true
+  const msg = String(error?.message || '').toLowerCase()
+  return msg.includes('503') || msg.includes('service unavailable')
+}
+
+function isRetryableError(err) {
+  if (is503Error(err)) return true
+  const status = err?.status ?? err?.statusCode ?? err?.response?.status
+  if (status === 429) return true
+  const msg = (err?.message || '').toLowerCase()
+  return (
+    msg.includes('429') ||
+    msg.includes('rate limit') ||
+    msg.includes('too many requests')
+  )
+}
+
+/**
+ * Rule-based fallback when Mistral is unavailable or rate-limited.
+ * @param {{ from?: string, subject?: string, snippet?: string }} emailData
+ */
+export function ruleBasedCategory(emailData) {
+  const from = (emailData.from || '').toLowerCase()
+  const subject = (emailData.subject || '').toLowerCase()
+  const snippet = (emailData.snippet || '').toLowerCase()
+
+  // NEWSLETTERS — mass emails, unsubscribe links
+  if (
+    from.includes('noreply') ||
+    from.includes('no-reply') ||
+    from.includes('newsletter') ||
+    from.includes('marketing') ||
+    subject.includes('newsletter') ||
+    subject.includes('unsubscribe') ||
+    subject.includes('abbestellen')
+  ) {
+    return 'newsletters'
+  }
+
+  // PROMOTIONS — sales, offers, discounts
+  if (
+    subject.includes('sale') ||
+    subject.includes('offer') ||
+    subject.includes('deal') ||
+    subject.includes('discount') ||
+    subject.includes('% off') ||
+    subject.includes('angebot') ||
+    subject.includes('rabatt') ||
+    from.includes('promo') ||
+    from.includes('deals') ||
+    from.includes('offers')
+  ) {
+    return 'promotions'
+  }
+
+  // INVOICES — billing documents
+  if (
+    subject.includes('invoice') ||
+    subject.includes('rechnung') ||
+    subject.includes('payment') ||
+    subject.includes('zahlung') ||
+    subject.includes('bill ') ||
+    subject.includes('receipt') ||
+    subject.includes('quittung')
+  ) {
+    return 'invoices'
+  }
+
+  // SECURITY — ONLY real security alerts (very specific)
+  if (
+    (subject.includes('security alert') ||
+      subject.includes('sign-in') ||
+      subject.includes('new login') ||
+      subject.includes('suspicious') ||
+      subject.includes('verify your') ||
+      subject.includes('2fa') ||
+      subject.includes('two-factor') ||
+      subject.includes('password reset') ||
+      (subject.includes('passwort') && subject.includes('zurücksetzen'))) &&
+    (from.includes('security') ||
+      from.includes('noreply') ||
+      from.includes('accounts') ||
+      from.includes('alerts'))
+  ) {
+    return 'security'
+  }
+
+  // CALENDAR — meetings and events
+  if (
+    subject.includes('meeting') ||
+    subject.includes('invitation') ||
+    subject.includes('calendar') ||
+    subject.includes('appointment') ||
+    subject.includes('termin') ||
+    subject.includes('einladung') ||
+    subject.endsWith('.ics')
+  ) {
+    return 'calendar'
+  }
+
+  // VIP — personal direct emails (not noreply, short subject)
+  if (
+    !from.includes('noreply') &&
+    !from.includes('no-reply') &&
+    !from.includes('newsletter') &&
+    !from.includes('info@') &&
+    subject.length < 60 &&
+    subject.length > 3
+  ) {
+    return 'vip'
+  }
+
+  // DEFAULT — review (not security!)
+  return 'review'
+}
+
+/** Pace Mistral calls (IMAP sort uses these in email.mjs) */
+export const AI_BATCH_CHUNK_SIZE = 5
+export const AI_BATCH_CHUNK_DELAY_MS = 2000
+
 /**
 * Email categories with metadata
 * Uses Gmail categories where available
@@ -67,7 +193,8 @@ const CATEGORIES = {
  },
  security: {
    name: 'Security',
-    description: 'Security codes and notifications',
+    description:
+      'ONLY real account-security mail: login alerts (new sign-in, suspicious activity), password reset/change, 2FA/MFA codes, device verification. NOT marketing, shipping alerts, price drops, social notifications, or generic “notification” subjects.',
    color: '#f44336',
    gmailCategory: null,
    action: 'inbox',      // Keep in inbox (important!)
@@ -396,7 +523,12 @@ export class AISorterService {
   */
  async categorize(email, preferences = {}) {
    if (!this.enabled) {
-      return { category: 'review', confidence: 0, reason: 'AI not configured' }
+      return {
+        category: ruleBasedCategory(email),
+        confidence: 0,
+        reason: 'AI not configured',
+        assignedTo: null,
+      }
    }

    const { from, subject, snippet } = email
@@ -409,6 +541,13 @@ export class AISorterService {
 AVAILABLE CATEGORIES:
 ${Object.entries(CATEGORIES).map(([key, cat]) => `- ${key}: ${cat.name} - ${cat.description}`).join('\n')}

+CLASSIFICATION RULES (important):
+- security: Use ONLY for genuine account safety: password reset/change, 2FA/MFA codes, new device login, suspicious sign-in warnings from the service itself. Do NOT use security for marketing, newsletters, order/shipping "alerts", price alerts, social network notifications, or anything that merely says "alert" or "notification".
+- social: Social networks, likes, follows, mentions, friend requests, activity digests.
+- newsletters: Recurring digests, blogs, Substack, product updates that are not personal.
+- promotions: Sales, discounts, ads, deals.
+- review: When unsure or mixed — prefer review over guessing security.
+
 ${preferenceContext}

 EMAIL:
@@ -422,36 +561,78 @@ If the email is clearly FOR a specific worker (e.g. "für Max", "an Anna", subje

 Respond ONLY with the JSON object.`

-    try {
-      const response = await this.client.chat.complete({
-        model: this.model,
-        messages: [{ role: 'user', content: prompt }],
-        temperature: 0.1,
-        maxTokens: 150,
-        responseFormat: { type: 'json_object' },
-      })
-
-      const content = response.choices[0]?.message?.content
+    const parseAndValidate = (content) => {
      const result = JSON.parse(content)
-
-      // Validate category
      if (!CATEGORIES[result.category]) {
        result.category = 'review'
      }
-
-      // Validate assignedTo against name labels (id or name)
      if (result.assignedTo && preferences.nameLabels?.length) {
        const match = preferences.nameLabels.find(
-          l => l.enabled && (l.id === result.assignedTo || l.name === result.assignedTo)
+          (l) => l.enabled && (l.id === result.assignedTo || l.name === result.assignedTo)
        )
        if (!match) result.assignedTo = null
        else result.assignedTo = match.id || match.name
      }
-
      return result
-    } catch (error) {
-      log.error('AI categorization failed', { error: error.message })
-      return { category: 'review', confidence: 0, reason: 'Categorization error' }
+    }
+
+    let attempt = 0
+    let used503Backoff = false
+    while (true) {
+      try {
+        const response = await this.client.chat.complete({
+          model: this.model,
+          messages: [{ role: 'user', content: prompt }],
+          temperature: 0.1,
+          maxTokens: 150,
+          responseFormat: { type: 'json_object' },
+        })
+
+        const content = response.choices[0]?.message?.content
+        return parseAndValidate(content)
+      } catch (error) {
+        if (!isRetryableError(error)) {
+          log.error('AI categorization failed', { error: error.message })
+          return {
+            category: ruleBasedCategory(email),
+            confidence: 0,
+            reason: 'Categorization error',
+            assignedTo: null,
+          }
+        }
+        if (is503Error(error)) {
+          if (!used503Backoff) {
+            used503Backoff = true
+            log.warn('Mistral 503 (service unavailable), retry in 5s', { attempt: attempt + 1 })
+            await sleep(5000)
+            continue
+          }
+          log.warn('Mistral 503 after retry, using rule-based fallback')
+          return {
+            category: ruleBasedCategory(email),
+            confidence: 0,
+            reason: '503 — rule-based fallback',
+            assignedTo: null,
+          }
+        }
+        if (attempt >= 2) {
+          log.warn('Mistral rate limit after retries, using rule-based fallback')
+          return {
+            category: ruleBasedCategory(email),
+            confidence: 0,
+            reason: 'Rate limit — rule-based fallback',
+            assignedTo: null,
+          }
+        }
+        if (attempt === 0) {
+          log.warn('Mistral rate limit (429), retry in 2s', { attempt: attempt + 1 })
+          await sleep(2000)
+        } else {
+          log.warn('Mistral rate limit (429), retry in 5s', { attempt: attempt + 1 })
+          await sleep(5000)
+        }
+        attempt++
+      }
    }
  }

@@ -460,9 +641,14 @@ Respond ONLY with the JSON object.`
   */
  async batchCategorize(emails, preferences = {}) {
    if (!this.enabled || emails.length === 0) {
-      return emails.map(e => ({
+      return emails.map((e) => ({
        email: e,
-        classification: { category: 'review', confidence: 0, reason: 'AI not available' },
+        classification: {
+          category: ruleBasedCategory(e),
+          confidence: 0,
+          reason: 'AI not available',
+          assignedTo: null,
+        },
      }))
    }

@@ -486,7 +672,9 @@ Respond ONLY with the JSON object.`
    const prompt = `You are an email sorting assistant. Categorize the following ${emails.length} emails.

 CATEGORIES:
-${Object.entries(CATEGORIES).map(([key, cat]) => `${key}: ${cat.name}`).join(' | ')}
+${Object.entries(CATEGORIES).map(([key, cat]) => `${key}: ${cat.name} — ${cat.description}`).join('\n')}
+
+RULES: Use "security" ONLY for real account safety (password/2FA/login alerts). NOT for marketing alerts, shipping updates, or social notifications — use promotions, newsletters, social, or review instead.

 ${preferenceContext}

@@ -499,7 +687,7 @@ If an email is clearly FOR a specific worker, set assignedTo to that worker's id

 Respond ONLY with the JSON array.`

-    try {
+    const runBatchRequest = async () => {
      const response = await this.client.chat.complete({
        model: this.model,
        messages: [{ role: 'user', content: prompt }],
@@ -511,7 +699,6 @@ Respond ONLY with the JSON array.`
      const content = response.choices[0]?.message?.content
      let parsed

-      // Handle both array and object responses
      try {
        parsed = JSON.parse(content)
        if (parsed.results) parsed = parsed.results
@@ -519,17 +706,16 @@ Respond ONLY with the JSON array.`
          throw new Error('Not an array')
        }
      } catch {
-        // Fallback to individual processing
        return this._fallbackBatch(emails, preferences)
      }

      return emails.map((email, i) => {
-        const result = parsed.find(r => r.index === i)
+        const result = parsed.find((r) => r.index === i)
        const category = result?.category && CATEGORIES[result.category] ? result.category : 'review'
        let assignedTo = result?.assignedTo || null
        if (assignedTo && preferences.nameLabels?.length) {
          const match = preferences.nameLabels.find(
-            l => l.enabled && (l.id === assignedTo || l.name === assignedTo)
+            (l) => l.enabled && (l.id === assignedTo || l.name === assignedTo)
          )
          assignedTo = match ? (match.id || match.name) : null
        }
@@ -538,10 +724,68 @@ Respond ONLY with the JSON array.`
          classification: { category, confidence: 0.8, reason: 'Batch', assignedTo },
        }
      })
-    } catch (error) {
-      log.error('Batch categorization failed', { error: error.message })
-      return this._fallbackBatch(emails, preferences)
    }
+
+    let attempt = 0
+    let used503Backoff = false
+    while (true) {
+      try {
+        return await runBatchRequest()
+      } catch (error) {
+        if (!isRetryableError(error)) {
+          log.error('Batch categorization failed', { error: error.message })
+          return this._fallbackBatch(emails, preferences)
+        }
+        if (is503Error(error)) {
+          if (!used503Backoff) {
+            used503Backoff = true
+            log.warn('Mistral batch 503 (service unavailable), retry in 5s', { attempt: attempt + 1 })
+            await sleep(5000)
+            continue
+          }
+          log.warn('Mistral batch 503 after retry, rule-based per email')
+          return emails.map((email) => ({
+            email,
+            classification: {
+              category: ruleBasedCategory(email),
+              confidence: 0,
+              reason: '503 — rule-based fallback',
+              assignedTo: null,
+            },
+          }))
+        }
+        if (attempt >= 2) {
+          log.warn('Mistral batch rate limit after retries, rule-based per email')
+          return emails.map((email) => ({
+            email,
+            classification: {
+              category: ruleBasedCategory(email),
+              confidence: 0,
+              reason: 'Rate limit — rule-based fallback',
+              assignedTo: null,
+            },
+          }))
+        }
+        if (attempt === 0) {
+          log.warn('Mistral batch rate limit (429), retry in 2s', { attempt: attempt + 1 })
+          await sleep(2000)
+        } else {
+          log.warn('Mistral batch rate limit (429), retry in 5s', { attempt: attempt + 1 })
+          await sleep(5000)
+        }
+        attempt++
+      }
+    }
+
+    return emails.map((email) => ({
+      email,
+      classification: {
+        category: ruleBasedCategory(email),
+        confidence: 0,
+        reason: 'Rate limit — rule-based fallback',
+        assignedTo: null,
+      },
+    }))
  }

  /**