Files
Emailsorter/server/services/ai-sorter.mjs
ANDJ 6da8ce1cbd huhuih
hzgjuigik
2026-01-27 21:06:48 +01:00

618 lines
17 KiB
JavaScript

/**
* AI Email Sorter Service
* Uses Mistral AI for intelligent email categorization
*/
import { Mistral } from '@mistralai/mistralai'
import { config } from '../config/index.mjs'
import { log } from '../middleware/logger.mjs'
/**
* Email categories with metadata
* Uses Gmail categories where available
*
* Actions:
* - 'star': Keep in inbox + add star (VIP)
* - 'inbox': Keep in inbox
* - 'archive_read': Archive + mark as read (cleans inbox)
*/
const CATEGORIES = {
vip: {
name: 'Important',
description: 'Important emails from known contacts',
color: '#ff0000',
gmailCategory: null,
action: 'star', // Keep in inbox + star
priority: 1,
},
customers: {
name: 'Clients',
description: 'Emails from clients and projects',
color: '#4285f4',
gmailCategory: null,
action: 'inbox', // Keep in inbox
priority: 2,
},
invoices: {
name: 'Invoices',
description: 'Invoices, receipts and financial documents',
color: '#0f9d58',
gmailCategory: null,
action: 'inbox', // Keep in inbox
priority: 3,
},
newsletters: {
name: 'Newsletter',
description: 'Regular newsletters and updates',
color: '#9c27b0',
gmailCategory: 'CATEGORY_UPDATES',
action: 'archive_read', // Archive + mark as read
priority: 4,
},
promotions: {
name: 'Promotions',
description: 'Marketing emails and promotions',
color: '#ff9800',
gmailCategory: 'CATEGORY_PROMOTIONS',
action: 'archive_read', // Archive + mark as read
priority: 5,
},
social: {
name: 'Social',
description: 'Social media and platform notifications',
color: '#00bcd4',
gmailCategory: 'CATEGORY_SOCIAL',
action: 'archive_read', // Archive + mark as read
priority: 6,
},
security: {
name: 'Security',
description: 'Security codes and notifications',
color: '#f44336',
gmailCategory: null,
action: 'inbox', // Keep in inbox (important!)
priority: 1,
},
calendar: {
name: 'Calendar',
description: 'Calendar invites and events',
color: '#673ab7',
gmailCategory: null,
action: 'inbox', // Keep in inbox
priority: 3,
},
review: {
name: 'Review',
description: 'Emails that need manual review',
color: '#607d8b',
gmailCategory: null,
action: 'inbox', // Keep in inbox for review
priority: 10,
},
}
/**
* Known companies for automatic detection
* Maps domain patterns to company names
*/
const KNOWN_COMPANIES = {
'amazon.com': 'Amazon',
'amazon.de': 'Amazon',
'amazon.co.uk': 'Amazon',
'amazon.fr': 'Amazon',
'google.com': 'Google',
'gmail.com': 'Google',
'microsoft.com': 'Microsoft',
'outlook.com': 'Microsoft',
'hotmail.com': 'Microsoft',
'apple.com': 'Apple',
'icloud.com': 'Apple',
'facebook.com': 'Facebook',
'meta.com': 'Meta',
'twitter.com': 'Twitter',
'x.com': 'Twitter',
'linkedin.com': 'LinkedIn',
'github.com': 'GitHub',
'netflix.com': 'Netflix',
'spotify.com': 'Spotify',
'paypal.com': 'PayPal',
'stripe.com': 'Stripe',
'shopify.com': 'Shopify',
'uber.com': 'Uber',
'airbnb.com': 'Airbnb',
'dropbox.com': 'Dropbox',
'slack.com': 'Slack',
'zoom.us': 'Zoom',
}
/**
* AI Sorter Service Class
*/
export class AISorterService {
constructor() {
this.client = null
this.model = 'mistral-small-latest'
this.enabled = Boolean(config.mistral.apiKey)
if (this.enabled) {
this.client = new Mistral({ apiKey: config.mistral.apiKey })
log.info('AI Sorter Service initialized with Mistral AI')
} else {
log.warn('AI Sorter Service disabled - no MISTRAL_API_KEY')
}
}
/**
* Get all categories
*/
getCategories() {
return CATEGORIES
}
/**
* Get category names as array
*/
getCategoryNames() {
return Object.values(CATEGORIES).map(c => c.name)
}
/**
* Get label name for a category key
*/
getLabelName(key) {
return CATEGORIES[key]?.name || CATEGORIES.review.name
}
/**
* Get Gmail category ID if available
*/
getGmailCategory(key) {
return CATEGORIES[key]?.gmailCategory || null
}
/**
* Get action for category (respects user preferences)
*/
getCategoryAction(key, preferences = {}) {
// Check for user override first
if (preferences.categoryActions?.[key]) {
return preferences.categoryActions[key]
}
// Return default action
return CATEGORIES[key]?.action || 'inbox'
}
/**
* Get color for category
*/
getCategoryColor(key) {
return CATEGORIES[key]?.color || '#607d8b'
}
/**
* Get enabled categories based on user preferences
*/
getEnabledCategories(preferences = {}) {
const enabled = preferences.enabledCategories || Object.keys(CATEGORIES)
return enabled.filter(key => CATEGORIES[key]) // Only return valid categories
}
/**
* Detect company from email address
*/
detectCompany(email) {
if (!email?.from) return null
// Extract domain from email
const emailMatch = email.from.match(/@([^\s>]+)/)
if (!emailMatch) return null
const domain = emailMatch[1].toLowerCase()
// Check known companies
if (KNOWN_COMPANIES[domain]) {
return {
name: KNOWN_COMPANIES[domain],
domain,
label: KNOWN_COMPANIES[domain],
}
}
// Check for subdomains (e.g., mail.amazon.com -> Amazon)
const domainParts = domain.split('.')
if (domainParts.length > 2) {
const baseDomain = domainParts.slice(-2).join('.')
if (KNOWN_COMPANIES[baseDomain]) {
return {
name: KNOWN_COMPANIES[baseDomain],
domain: baseDomain,
label: KNOWN_COMPANIES[baseDomain],
}
}
}
return null
}
/**
* Generate suggested rules based on email patterns
* Analyzes email samples to detect patterns and suggest rules
*/
async generateSuggestedRules(userId, emailSamples) {
if (!emailSamples || emailSamples.length === 0) {
return []
}
const suggestions = []
const senderCounts = {}
const domainCounts = {}
const subjectPatterns = {}
const categoryPatterns = {}
// Analyze patterns
for (const email of emailSamples) {
const from = email.from?.toLowerCase() || ''
const subject = email.subject?.toLowerCase() || ''
// Extract domain
const emailMatch = from.match(/@([^\s>]+)/)
if (emailMatch) {
const domain = emailMatch[1].toLowerCase()
domainCounts[domain] = (domainCounts[domain] || 0) + 1
}
// Count senders
const senderEmail = from.split('<')[1]?.split('>')[0] || from
senderCounts[senderEmail] = (senderCounts[senderEmail] || 0) + 1
// Detect category patterns
const category = email.category || 'review'
categoryPatterns[category] = (categoryPatterns[category] || 0) + 1
}
const totalEmails = emailSamples.length
const threshold = Math.max(3, Math.ceil(totalEmails * 0.1)) // At least 3 emails or 10% of total
// Suggest VIP senders (frequent senders)
const frequentSenders = Object.entries(senderCounts)
.filter(([_, count]) => count >= threshold)
.sort(([_, a], [__, b]) => b - a)
.slice(0, 3)
for (const [sender, count] of frequentSenders) {
suggestions.push({
type: 'vip_sender',
name: `Mark ${sender.split('@')[0]} as VIP`,
description: `${count} emails from this sender`,
confidence: Math.min(0.9, count / totalEmails),
action: {
type: 'add_vip',
email: sender,
},
})
}
// Suggest company labels (frequent domains)
const frequentDomains = Object.entries(domainCounts)
.filter(([domain, count]) => count >= threshold && !KNOWN_COMPANIES[domain])
.sort(([_, a], [__, b]) => b - a)
.slice(0, 3)
for (const [domain, count] of frequentDomains) {
const companyName = domain.split('.')[0].charAt(0).toUpperCase() + domain.split('.')[0].slice(1)
suggestions.push({
type: 'company_label',
name: `Label ${companyName} emails`,
description: `${count} emails from ${domain}`,
confidence: Math.min(0.85, count / totalEmails),
action: {
type: 'add_company_label',
name: companyName,
condition: `from:${domain}`,
category: 'promotions', // Default, user can change
},
})
}
// Suggest category-specific rules based on patterns
if (categoryPatterns.newsletters >= threshold) {
suggestions.push({
type: 'category_rule',
name: 'Archive newsletters automatically',
description: `${categoryPatterns.newsletters} newsletter emails detected`,
confidence: 0.8,
action: {
type: 'enable_category',
category: 'newsletters',
action: 'archive_read',
},
})
}
if (categoryPatterns.promotions >= threshold) {
suggestions.push({
type: 'category_rule',
name: 'Archive promotions automatically',
description: `${categoryPatterns.promotions} promotion emails detected`,
confidence: 0.75,
action: {
type: 'enable_category',
category: 'promotions',
action: 'archive_read',
},
})
}
// Sort by confidence and return top 5
return suggestions
.sort((a, b) => b.confidence - a.confidence)
.slice(0, 5)
}
/**
* Check if email matches a company label condition
*/
matchesCompanyLabel(email, companyLabel) {
if (!companyLabel?.enabled || !companyLabel?.condition) return false
const { condition } = companyLabel
const from = email.from?.toLowerCase() || ''
const subject = email.subject?.toLowerCase() || ''
// Simple condition parser: supports "from:domain.com" and "subject:keyword"
if (condition.includes('from:')) {
const domain = condition.split('from:')[1]?.trim().split(' ')[0]
if (domain && from.includes(domain)) return true
}
if (condition.includes('subject:')) {
const keyword = condition.split('subject:')[1]?.trim().split(' ')[0]
if (keyword && subject.includes(keyword)) return true
}
// Support OR conditions
if (condition.includes(' OR ')) {
const parts = condition.split(' OR ')
return parts.some(part => this.matchesCompanyLabel(email, { ...companyLabel, condition: part.trim() }))
}
// Support AND conditions
if (condition.includes(' AND ')) {
const parts = condition.split(' AND ')
return parts.every(part => this.matchesCompanyLabel(email, { ...companyLabel, condition: part.trim() }))
}
// Simple domain match
if (condition.includes('@')) {
const domain = condition.split('@')[1]?.trim()
if (domain && from.includes(domain)) return true
}
return false
}
/**
* Categorize a single email
*/
async categorize(email, preferences = {}) {
if (!this.enabled) {
return { category: 'review', confidence: 0, reason: 'AI not configured' }
}
const { from, subject, snippet } = email
// Build context from preferences
const preferenceContext = this._buildPreferenceContext(preferences)
const prompt = `You are an intelligent email sorting assistant. Analyze the following email and categorize it.
AVAILABLE CATEGORIES:
${Object.entries(CATEGORIES).map(([key, cat]) => `- ${key}: ${cat.name} - ${cat.description}`).join('\n')}
${preferenceContext}
EMAIL:
From: ${from}
Subject: ${subject}
Preview: ${snippet?.substring(0, 500) || 'No preview'}
RESPONSE FORMAT (JSON ONLY):
{"category": "category_key", "confidence": 0.0-1.0, "reason": "brief explanation"}
Respond ONLY with the JSON object.`
try {
const response = await this.client.chat.complete({
model: this.model,
messages: [{ role: 'user', content: prompt }],
temperature: 0.1,
maxTokens: 150,
responseFormat: { type: 'json_object' },
})
const content = response.choices[0]?.message?.content
const result = JSON.parse(content)
// Validate category
if (!CATEGORIES[result.category]) {
result.category = 'review'
}
return result
} catch (error) {
log.error('AI categorization failed', { error: error.message })
return { category: 'review', confidence: 0, reason: 'Categorization error' }
}
}
/**
* Batch categorize multiple emails
*/
async batchCategorize(emails, preferences = {}) {
if (!this.enabled || emails.length === 0) {
return emails.map(e => ({
email: e,
classification: { category: 'review', confidence: 0, reason: 'AI not available' },
}))
}
// For small batches, process individually
if (emails.length <= 5) {
const results = []
for (const email of emails) {
const classification = await this.categorize(email, preferences)
results.push({ email, classification })
}
return results
}
// For larger batches, use batch request
const preferenceContext = this._buildPreferenceContext(preferences)
const emailList = emails.map((e, i) =>
`[${i}] From: ${e.from} | Subject: ${e.subject} | Preview: ${e.snippet?.substring(0, 200) || '-'}`
).join('\n')
const prompt = `You are an email sorting assistant. Categorize the following ${emails.length} emails.
CATEGORIES:
${Object.entries(CATEGORIES).map(([key, cat]) => `${key}: ${cat.name}`).join(' | ')}
${preferenceContext}
EMAILS:
${emailList}
RESPONSE FORMAT (JSON ARRAY ONLY):
[{"index": 0, "category": "key"}, {"index": 1, "category": "key"}, ...]
Respond ONLY with the JSON array.`
try {
const response = await this.client.chat.complete({
model: this.model,
messages: [{ role: 'user', content: prompt }],
temperature: 0.1,
maxTokens: emails.length * 50,
responseFormat: { type: 'json_object' },
})
const content = response.choices[0]?.message?.content
let parsed
// Handle both array and object responses
try {
parsed = JSON.parse(content)
if (parsed.results) parsed = parsed.results
if (!Array.isArray(parsed)) {
throw new Error('Not an array')
}
} catch {
// Fallback to individual processing
return this._fallbackBatch(emails, preferences)
}
return emails.map((email, i) => {
const result = parsed.find(r => r.index === i)
const category = result?.category && CATEGORIES[result.category] ? result.category : 'review'
return {
email,
classification: { category, confidence: 0.8, reason: 'Batch' },
}
})
} catch (error) {
log.error('Batch categorization failed', { error: error.message })
return this._fallbackBatch(emails, preferences)
}
}
/**
* Fallback to individual categorization
*/
async _fallbackBatch(emails, preferences) {
const results = []
for (const email of emails) {
const classification = await this.categorize(email, preferences)
results.push({ email, classification })
// Rate limiting pause
await new Promise(r => setTimeout(r, 100))
}
return results
}
/**
* Build preference context for prompt
*/
_buildPreferenceContext(preferences) {
const parts = []
// Get enabled categories
const enabledCategories = this.getEnabledCategories(preferences)
if (enabledCategories.length < Object.keys(CATEGORIES).length) {
const disabled = Object.keys(CATEGORIES).filter(k => !enabledCategories.includes(k))
parts.push(`DISABLED CATEGORIES (do not use): ${disabled.map(k => CATEGORIES[k].name).join(', ')}`)
parts.push(`ONLY USE THESE CATEGORIES: ${enabledCategories.map(k => `${k} (${CATEGORIES[k].name})`).join(', ')}`)
}
if (preferences.vipSenders?.length) {
parts.push(`VIP Senders (always categorize as "vip"): ${preferences.vipSenders.join(', ')}`)
}
if (preferences.blockedSenders?.length) {
parts.push(`Blocked Senders (categorize as "promotions"): ${preferences.blockedSenders.join(', ')}`)
}
if (preferences.customRules?.length) {
parts.push(`Custom Rules:\n${preferences.customRules.map(r => `- ${r.condition}: ${r.category}`).join('\n')}`)
}
if (preferences.priorityTopics?.length) {
parts.push(`Priority Topics (higher priority): ${preferences.priorityTopics.join(', ')}`)
}
// Company labels context
if (preferences.companyLabels?.length) {
const activeLabels = preferences.companyLabels.filter(l => l.enabled)
if (activeLabels.length > 0) {
parts.push(`Company Labels (apply these labels when conditions match):\n${activeLabels.map(l => `- ${l.name}: ${l.condition}${l.category || 'promotions'}`).join('\n')}`)
}
}
return parts.length > 0 ? `USER PREFERENCES:\n${parts.join('\n')}\n` : ''
}
/**
* Learn from user corrections
*/
async learnFromCorrection(email, originalCategory, correctedCategory, userId) {
log.info('Learning correction received', {
from: email.from,
original: originalCategory,
corrected: correctedCategory,
userId,
})
// In production, this would:
// 1. Store correction in database
// 2. Update user preferences
// 3. Potentially fine-tune the model
return { learned: true }
}
/**
* Get category statistics
*/
getCategoryStats(classifications) {
const stats = {}
for (const { classification } of classifications) {
const cat = classification.category
stats[cat] = (stats[cat] || 0) + 1
}
return stats
}
}
// Default export
export default AISorterService