Files
Emailsorter/server/services/ai-sorter.mjs
ANDJ cbb225c001 feat: Gitea Webhook, IMAP, Settings & Deployment docs
- Webhook route and Gitea integration
- IMAP service and Nextcloud/Porkbun setup docs
- Settings UI improvements and API updates
- SSH/Webhook fix prompt for emailsorter.webklar.com
- Bootstrap, config and AI sorter updates
2026-01-31 15:00:00 +01:00

644 lines
19 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* AI Email Sorter Service
* Uses Mistral AI for intelligent email categorization
*/
import { Mistral } from '@mistralai/mistralai'
import { config } from '../config/index.mjs'
import { log } from '../middleware/logger.mjs'
/**
* Email categories with metadata
* Uses Gmail categories where available
*
* Actions:
* - 'star': Keep in inbox + add star (VIP)
* - 'inbox': Keep in inbox
* - 'archive_read': Archive + mark as read (cleans inbox)
*/
const CATEGORIES = {
vip: {
name: 'Important',
description: 'Important emails from known contacts',
color: '#ff0000',
gmailCategory: null,
action: 'star', // Keep in inbox + star
priority: 1,
},
customers: {
name: 'Clients',
description: 'Emails from clients and projects',
color: '#4285f4',
gmailCategory: null,
action: 'inbox', // Keep in inbox
priority: 2,
},
invoices: {
name: 'Invoices',
description: 'Invoices, receipts and financial documents',
color: '#0f9d58',
gmailCategory: null,
action: 'inbox', // Keep in inbox
priority: 3,
},
newsletters: {
name: 'Newsletter',
description: 'Regular newsletters and updates',
color: '#9c27b0',
gmailCategory: 'CATEGORY_UPDATES',
action: 'archive_read', // Archive + mark as read
priority: 4,
},
promotions: {
name: 'Promotions',
description: 'Marketing emails and promotions',
color: '#ff9800',
gmailCategory: 'CATEGORY_PROMOTIONS',
action: 'archive_read', // Archive + mark as read
priority: 5,
},
social: {
name: 'Social',
description: 'Social media and platform notifications',
color: '#00bcd4',
gmailCategory: 'CATEGORY_SOCIAL',
action: 'archive_read', // Archive + mark as read
priority: 6,
},
security: {
name: 'Security',
description: 'Security codes and notifications',
color: '#f44336',
gmailCategory: null,
action: 'inbox', // Keep in inbox (important!)
priority: 1,
},
calendar: {
name: 'Calendar',
description: 'Calendar invites and events',
color: '#673ab7',
gmailCategory: null,
action: 'inbox', // Keep in inbox
priority: 3,
},
review: {
name: 'Review',
description: 'Emails that need manual review',
color: '#607d8b',
gmailCategory: null,
action: 'inbox', // Keep in inbox for review
priority: 10,
},
}
/**
* Known companies for automatic detection
* Maps domain patterns to company names
*/
const KNOWN_COMPANIES = {
'amazon.com': 'Amazon',
'amazon.de': 'Amazon',
'amazon.co.uk': 'Amazon',
'amazon.fr': 'Amazon',
'google.com': 'Google',
'gmail.com': 'Google',
'microsoft.com': 'Microsoft',
'outlook.com': 'Microsoft',
'hotmail.com': 'Microsoft',
'apple.com': 'Apple',
'icloud.com': 'Apple',
'facebook.com': 'Facebook',
'meta.com': 'Meta',
'twitter.com': 'Twitter',
'x.com': 'Twitter',
'linkedin.com': 'LinkedIn',
'github.com': 'GitHub',
'netflix.com': 'Netflix',
'spotify.com': 'Spotify',
'paypal.com': 'PayPal',
'stripe.com': 'Stripe',
'shopify.com': 'Shopify',
'uber.com': 'Uber',
'airbnb.com': 'Airbnb',
'dropbox.com': 'Dropbox',
'slack.com': 'Slack',
'zoom.us': 'Zoom',
}
/**
* AI Sorter Service Class
*/
export class AISorterService {
constructor() {
this.client = null
this.model = 'mistral-small-latest'
this.enabled = Boolean(config.mistral.apiKey)
if (this.enabled) {
this.client = new Mistral({ apiKey: config.mistral.apiKey })
log.info('AI Sorter Service initialized with Mistral AI')
} else {
log.warn('AI Sorter Service disabled - no MISTRAL_API_KEY')
}
}
/**
* Get all categories
*/
getCategories() {
return CATEGORIES
}
/**
* Get category names as array
*/
getCategoryNames() {
return Object.values(CATEGORIES).map(c => c.name)
}
/**
* Get label name for a category key
*/
getLabelName(key) {
return CATEGORIES[key]?.name || CATEGORIES.review.name
}
/**
* Get Gmail category ID if available
*/
getGmailCategory(key) {
return CATEGORIES[key]?.gmailCategory || null
}
/**
* Get action for category (respects user preferences)
*/
getCategoryAction(key, preferences = {}) {
// Check for user override first
if (preferences.categoryActions?.[key]) {
return preferences.categoryActions[key]
}
// Return default action
return CATEGORIES[key]?.action || 'inbox'
}
/**
* Get color for category
*/
getCategoryColor(key) {
return CATEGORIES[key]?.color || '#607d8b'
}
/**
* Get enabled categories based on user preferences
*/
getEnabledCategories(preferences = {}) {
const enabled = preferences.enabledCategories || Object.keys(CATEGORIES)
return enabled.filter(key => CATEGORIES[key]) // Only return valid categories
}
/**
* Detect company from email address
*/
detectCompany(email) {
if (!email?.from) return null
// Extract domain from email
const emailMatch = email.from.match(/@([^\s>]+)/)
if (!emailMatch) return null
const domain = emailMatch[1].toLowerCase()
// Check known companies
if (KNOWN_COMPANIES[domain]) {
return {
name: KNOWN_COMPANIES[domain],
domain,
label: KNOWN_COMPANIES[domain],
}
}
// Check for subdomains (e.g., mail.amazon.com -> Amazon)
const domainParts = domain.split('.')
if (domainParts.length > 2) {
const baseDomain = domainParts.slice(-2).join('.')
if (KNOWN_COMPANIES[baseDomain]) {
return {
name: KNOWN_COMPANIES[baseDomain],
domain: baseDomain,
label: KNOWN_COMPANIES[baseDomain],
}
}
}
return null
}
/**
* Generate suggested rules based on email patterns
* Analyzes email samples to detect patterns and suggest rules
*/
async generateSuggestedRules(userId, emailSamples) {
if (!emailSamples || emailSamples.length === 0) {
return []
}
const suggestions = []
const senderCounts = {}
const domainCounts = {}
const subjectPatterns = {}
const categoryPatterns = {}
// Analyze patterns
for (const email of emailSamples) {
const from = email.from?.toLowerCase() || ''
const subject = email.subject?.toLowerCase() || ''
// Extract domain
const emailMatch = from.match(/@([^\s>]+)/)
if (emailMatch) {
const domain = emailMatch[1].toLowerCase()
domainCounts[domain] = (domainCounts[domain] || 0) + 1
}
// Count senders
const senderEmail = from.split('<')[1]?.split('>')[0] || from
senderCounts[senderEmail] = (senderCounts[senderEmail] || 0) + 1
// Detect category patterns
const category = email.category || 'review'
categoryPatterns[category] = (categoryPatterns[category] || 0) + 1
}
const totalEmails = emailSamples.length
const threshold = Math.max(3, Math.ceil(totalEmails * 0.1)) // At least 3 emails or 10% of total
// Suggest VIP senders (frequent senders)
const frequentSenders = Object.entries(senderCounts)
.filter(([_, count]) => count >= threshold)
.sort(([_, a], [__, b]) => b - a)
.slice(0, 3)
for (const [sender, count] of frequentSenders) {
suggestions.push({
type: 'vip_sender',
name: `Mark ${sender.split('@')[0]} as VIP`,
description: `${count} emails from this sender`,
confidence: Math.min(0.9, count / totalEmails),
action: {
type: 'add_vip',
email: sender,
},
})
}
// Suggest company labels (frequent domains)
const frequentDomains = Object.entries(domainCounts)
.filter(([domain, count]) => count >= threshold && !KNOWN_COMPANIES[domain])
.sort(([_, a], [__, b]) => b - a)
.slice(0, 3)
for (const [domain, count] of frequentDomains) {
const companyName = domain.split('.')[0].charAt(0).toUpperCase() + domain.split('.')[0].slice(1)
suggestions.push({
type: 'company_label',
name: `Label ${companyName} emails`,
description: `${count} emails from ${domain}`,
confidence: Math.min(0.85, count / totalEmails),
action: {
type: 'add_company_label',
name: companyName,
condition: `from:${domain}`,
category: 'promotions', // Default, user can change
},
})
}
// Suggest category-specific rules based on patterns
if (categoryPatterns.newsletters >= threshold) {
suggestions.push({
type: 'category_rule',
name: 'Archive newsletters automatically',
description: `${categoryPatterns.newsletters} newsletter emails detected`,
confidence: 0.8,
action: {
type: 'enable_category',
category: 'newsletters',
action: 'archive_read',
},
})
}
if (categoryPatterns.promotions >= threshold) {
suggestions.push({
type: 'category_rule',
name: 'Archive promotions automatically',
description: `${categoryPatterns.promotions} promotion emails detected`,
confidence: 0.75,
action: {
type: 'enable_category',
category: 'promotions',
action: 'archive_read',
},
})
}
// Sort by confidence and return top 5
return suggestions
.sort((a, b) => b.confidence - a.confidence)
.slice(0, 5)
}
/**
* Check if email matches a company label condition
*/
matchesCompanyLabel(email, companyLabel) {
if (!companyLabel?.enabled || !companyLabel?.condition) return false
const { condition } = companyLabel
const from = email.from?.toLowerCase() || ''
const subject = email.subject?.toLowerCase() || ''
// Simple condition parser: supports "from:domain.com" and "subject:keyword"
if (condition.includes('from:')) {
const domain = condition.split('from:')[1]?.trim().split(' ')[0]
if (domain && from.includes(domain)) return true
}
if (condition.includes('subject:')) {
const keyword = condition.split('subject:')[1]?.trim().split(' ')[0]
if (keyword && subject.includes(keyword)) return true
}
// Support OR conditions
if (condition.includes(' OR ')) {
const parts = condition.split(' OR ')
return parts.some(part => this.matchesCompanyLabel(email, { ...companyLabel, condition: part.trim() }))
}
// Support AND conditions
if (condition.includes(' AND ')) {
const parts = condition.split(' AND ')
return parts.every(part => this.matchesCompanyLabel(email, { ...companyLabel, condition: part.trim() }))
}
// Simple domain match
if (condition.includes('@')) {
const domain = condition.split('@')[1]?.trim()
if (domain && from.includes(domain)) return true
}
return false
}
/**
* Categorize a single email
*/
async categorize(email, preferences = {}) {
if (!this.enabled) {
return { category: 'review', confidence: 0, reason: 'AI not configured' }
}
const { from, subject, snippet } = email
// Build context from preferences
const preferenceContext = this._buildPreferenceContext(preferences)
const prompt = `You are an intelligent email sorting assistant. Analyze the following email and categorize it.
AVAILABLE CATEGORIES:
${Object.entries(CATEGORIES).map(([key, cat]) => `- ${key}: ${cat.name} - ${cat.description}`).join('\n')}
${preferenceContext}
EMAIL:
From: ${from}
Subject: ${subject}
Preview: ${snippet?.substring(0, 500) || 'No preview'}
RESPONSE FORMAT (JSON ONLY):
{"category": "category_key", "confidence": 0.0-1.0, "reason": "brief explanation", "assignedTo": "name_label_id_or_name_or_null"}
If the email is clearly FOR a specific worker (e.g. "für Max", "an Anna", subject/body mentions them), set assignedTo to that worker's id or name. Otherwise set assignedTo to null or omit it.
Respond ONLY with the JSON object.`
try {
const response = await this.client.chat.complete({
model: this.model,
messages: [{ role: 'user', content: prompt }],
temperature: 0.1,
maxTokens: 150,
responseFormat: { type: 'json_object' },
})
const content = response.choices[0]?.message?.content
const result = JSON.parse(content)
// Validate category
if (!CATEGORIES[result.category]) {
result.category = 'review'
}
// Validate assignedTo against name labels (id or name)
if (result.assignedTo && preferences.nameLabels?.length) {
const match = preferences.nameLabels.find(
l => l.enabled && (l.id === result.assignedTo || l.name === result.assignedTo)
)
if (!match) result.assignedTo = null
else result.assignedTo = match.id || match.name
}
return result
} catch (error) {
log.error('AI categorization failed', { error: error.message })
return { category: 'review', confidence: 0, reason: 'Categorization error' }
}
}
/**
* Batch categorize multiple emails
*/
async batchCategorize(emails, preferences = {}) {
if (!this.enabled || emails.length === 0) {
return emails.map(e => ({
email: e,
classification: { category: 'review', confidence: 0, reason: 'AI not available' },
}))
}
// For small batches, process individually
if (emails.length <= 5) {
const results = []
for (const email of emails) {
const classification = await this.categorize(email, preferences)
results.push({ email, classification })
}
return results
}
// For larger batches, use batch request
const preferenceContext = this._buildPreferenceContext(preferences)
const emailList = emails.map((e, i) =>
`[${i}] From: ${e.from} | Subject: ${e.subject} | Preview: ${e.snippet?.substring(0, 200) || '-'}`
).join('\n')
const prompt = `You are an email sorting assistant. Categorize the following ${emails.length} emails.
CATEGORIES:
${Object.entries(CATEGORIES).map(([key, cat]) => `${key}: ${cat.name}`).join(' | ')}
${preferenceContext}
EMAILS:
${emailList}
RESPONSE FORMAT (JSON ARRAY ONLY):
[{"index": 0, "category": "key", "assignedTo": "id_or_name_or_null"}, ...]
If an email is clearly FOR a specific worker, set assignedTo to that worker's id or name. Otherwise omit or null.
Respond ONLY with the JSON array.`
try {
const response = await this.client.chat.complete({
model: this.model,
messages: [{ role: 'user', content: prompt }],
temperature: 0.1,
maxTokens: emails.length * 50,
responseFormat: { type: 'json_object' },
})
const content = response.choices[0]?.message?.content
let parsed
// Handle both array and object responses
try {
parsed = JSON.parse(content)
if (parsed.results) parsed = parsed.results
if (!Array.isArray(parsed)) {
throw new Error('Not an array')
}
} catch {
// Fallback to individual processing
return this._fallbackBatch(emails, preferences)
}
return emails.map((email, i) => {
const result = parsed.find(r => r.index === i)
const category = result?.category && CATEGORIES[result.category] ? result.category : 'review'
let assignedTo = result?.assignedTo || null
if (assignedTo && preferences.nameLabels?.length) {
const match = preferences.nameLabels.find(
l => l.enabled && (l.id === assignedTo || l.name === assignedTo)
)
assignedTo = match ? (match.id || match.name) : null
}
return {
email,
classification: { category, confidence: 0.8, reason: 'Batch', assignedTo },
}
})
} catch (error) {
log.error('Batch categorization failed', { error: error.message })
return this._fallbackBatch(emails, preferences)
}
}
/**
* Fallback to individual categorization
*/
async _fallbackBatch(emails, preferences) {
const results = []
for (const email of emails) {
const classification = await this.categorize(email, preferences)
results.push({ email, classification })
// Rate limiting pause
await new Promise(r => setTimeout(r, 100))
}
return results
}
/**
* Build preference context for prompt
*/
_buildPreferenceContext(preferences) {
const parts = []
// Get enabled categories
const enabledCategories = this.getEnabledCategories(preferences)
if (enabledCategories.length < Object.keys(CATEGORIES).length) {
const disabled = Object.keys(CATEGORIES).filter(k => !enabledCategories.includes(k))
parts.push(`DISABLED CATEGORIES (do not use): ${disabled.map(k => CATEGORIES[k].name).join(', ')}`)
parts.push(`ONLY USE THESE CATEGORIES: ${enabledCategories.map(k => `${k} (${CATEGORIES[k].name})`).join(', ')}`)
}
if (preferences.vipSenders?.length) {
parts.push(`VIP Senders (always categorize as "vip"): ${preferences.vipSenders.join(', ')}`)
}
if (preferences.blockedSenders?.length) {
parts.push(`Blocked Senders (categorize as "promotions"): ${preferences.blockedSenders.join(', ')}`)
}
if (preferences.customRules?.length) {
parts.push(`Custom Rules:\n${preferences.customRules.map(r => `- ${r.condition}: ${r.category}`).join('\n')}`)
}
if (preferences.priorityTopics?.length) {
parts.push(`Priority Topics (higher priority): ${preferences.priorityTopics.join(', ')}`)
}
// Company labels context
if (preferences.companyLabels?.length) {
const activeLabels = preferences.companyLabels.filter(l => l.enabled)
if (activeLabels.length > 0) {
parts.push(`Company Labels (apply these labels when conditions match):\n${activeLabels.map(l => `- ${l.name}: ${l.condition}${l.category || 'promotions'}`).join('\n')}`)
}
}
// Name labels (workers) assign email to a person when clearly for them
if (preferences.nameLabels?.length) {
const activeNameLabels = preferences.nameLabels.filter(l => l.enabled)
if (activeNameLabels.length > 0) {
parts.push(`NAME LABELS (workers) assign email to ONE person when the email is clearly FOR that person (e.g. "für Max", "an Anna", "Max bitte prüfen", subject/body mentions them):\n${activeNameLabels.map(l => `- id: "${l.id}", name: "${l.name}"${l.keywords?.length ? `, keywords: ${JSON.stringify(l.keywords)}` : ''}`).join('\n')}\nIf the email is for a specific worker, set "assignedTo" to that label's id or name. Otherwise omit assignedTo.`)
}
}
return parts.length > 0 ? `USER PREFERENCES:\n${parts.join('\n')}\n` : ''
}
/**
* Learn from user corrections
*/
async learnFromCorrection(email, originalCategory, correctedCategory, userId) {
log.info('Learning correction received', {
from: email.from,
original: originalCategory,
corrected: correctedCategory,
userId,
})
// In production, this would:
// 1. Store correction in database
// 2. Update user preferences
// 3. Potentially fine-tune the model
return { learned: true }
}
/**
* Get category statistics
*/
getCategoryStats(classifications) {
const stats = {}
for (const { classification } of classifications) {
const cat = classification.category
stats[cat] = (stats[cat] || 0) + 1
}
return stats
}
}
// Default export
export default AISorterService