Emailsorter/server/services/ai-sorter.mjs

/**
 * AI Email Sorter Service
 * Uses Mistral AI for intelligent email categorization
 */

import { Mistral } from '@mistralai/mistralai'
import { config } from '../config/index.mjs'
import { log } from '../middleware/logger.mjs'

/**
 * Email categories with metadata
 * Uses Gmail categories where available
 *
 * Actions:
 * - 'star': Keep in inbox + add star (VIP)
 * - 'inbox': Keep in inbox
 * - 'archive_read': Archive + mark as read (cleans inbox)
 */
const CATEGORIES = {
  vip: {
    name: 'Important',
    description: 'Important emails from known contacts',
    color: '#ff0000',
    gmailCategory: null,
    action: 'star',       // Keep in inbox + star
    priority: 1,
  },
  customers: {
    name: 'Clients',
    description: 'Emails from clients and projects',
    color: '#4285f4',
    gmailCategory: null,
    action: 'inbox',      // Keep in inbox
    priority: 2,
  },
  invoices: {
    name: 'Invoices',
    description: 'Invoices, receipts and financial documents',
    color: '#0f9d58',
    gmailCategory: null,
    action: 'inbox',      // Keep in inbox
    priority: 3,
  },
  newsletters: {
    name: 'Newsletter',
    description: 'Regular newsletters and updates',
    color: '#9c27b0',
    gmailCategory: 'CATEGORY_UPDATES',
    action: 'archive_read', // Archive + mark as read
    priority: 4,
  },
  promotions: {
    name: 'Promotions',
    description: 'Marketing emails and promotions',
    color: '#ff9800',
    gmailCategory: 'CATEGORY_PROMOTIONS',
    action: 'archive_read', // Archive + mark as read
    priority: 5,
  },
  social: {
    name: 'Social',
    description: 'Social media and platform notifications',
    color: '#00bcd4',
    gmailCategory: 'CATEGORY_SOCIAL',
    action: 'archive_read', // Archive + mark as read
    priority: 6,
  },
  security: {
    name: 'Security',
    description: 'Security codes and notifications',
    color: '#f44336',
    gmailCategory: null,
    action: 'inbox',      // Keep in inbox (important!)
    priority: 1,
  },
  calendar: {
    name: 'Calendar',
    description: 'Calendar invites and events',
    color: '#673ab7',
    gmailCategory: null,
    action: 'inbox',      // Keep in inbox
    priority: 3,
  },
  review: {
    name: 'Review',
    description: 'Emails that need manual review',
    color: '#607d8b',
    gmailCategory: null,
    action: 'inbox',      // Keep in inbox for review
    priority: 10,
  },
}

/**
 * Known companies for automatic detection
 * Maps domain patterns to company names
 */
const KNOWN_COMPANIES = {
  'amazon.com': 'Amazon',
  'amazon.de': 'Amazon',
  'amazon.co.uk': 'Amazon',
  'amazon.fr': 'Amazon',
  'google.com': 'Google',
  'gmail.com': 'Google',
  'microsoft.com': 'Microsoft',
  'outlook.com': 'Microsoft',
  'hotmail.com': 'Microsoft',
  'apple.com': 'Apple',
  'icloud.com': 'Apple',
  'facebook.com': 'Facebook',
  'meta.com': 'Meta',
  'twitter.com': 'Twitter',
  'x.com': 'Twitter',
  'linkedin.com': 'LinkedIn',
  'github.com': 'GitHub',
  'netflix.com': 'Netflix',
  'spotify.com': 'Spotify',
  'paypal.com': 'PayPal',
  'stripe.com': 'Stripe',
  'shopify.com': 'Shopify',
  'uber.com': 'Uber',
  'airbnb.com': 'Airbnb',
  'dropbox.com': 'Dropbox',
  'slack.com': 'Slack',
  'zoom.us': 'Zoom',
}

/**
 * AI Sorter Service Class
 */
export class AISorterService {
  constructor() {
    this.client = null
    this.model = 'mistral-small-latest'
    this.enabled = Boolean(config.mistral.apiKey)

    if (this.enabled) {
      this.client = new Mistral({ apiKey: config.mistral.apiKey })
      log.info('AI Sorter Service initialized with Mistral AI')
    } else {
      log.warn('AI Sorter Service disabled - no MISTRAL_API_KEY')
    }
  }

  /**
   * Get all categories
   */
  getCategories() {
    return CATEGORIES
  }

  /**
   * Get category names as array
   */
  getCategoryNames() {
    return Object.values(CATEGORIES).map(c => c.name)
  }

  /**
   * Get label name for a category key
   */
  getLabelName(key) {
    return CATEGORIES[key]?.name || CATEGORIES.review.name
  }

  /**
   * Get Gmail category ID if available
   */
  getGmailCategory(key) {
    return CATEGORIES[key]?.gmailCategory || null
  }

  /**
   * Get action for category (respects user preferences)
   */
  getCategoryAction(key, preferences = {}) {
    // Check for user override first
    if (preferences.categoryActions?.[key]) {
      return preferences.categoryActions[key]
    }
    // Return default action
    return CATEGORIES[key]?.action || 'inbox'
  }

  /**
   * Get color for category
   */
  getCategoryColor(key) {
    return CATEGORIES[key]?.color || '#607d8b'
  }

  /**
   * Get enabled categories based on user preferences
   */
  getEnabledCategories(preferences = {}) {
    const enabled = preferences.enabledCategories || Object.keys(CATEGORIES)
    return enabled.filter(key => CATEGORIES[key]) // Only return valid categories
  }

  /**
   * Detect company from email address
   */
  detectCompany(email) {
    if (!email?.from) return null

    // Extract domain from email
    const emailMatch = email.from.match(/@([^\s>]+)/)
    if (!emailMatch) return null

    const domain = emailMatch[1].toLowerCase()

    // Check known companies
    if (KNOWN_COMPANIES[domain]) {
      return {
        name: KNOWN_COMPANIES[domain],
        domain,
        label: KNOWN_COMPANIES[domain],
      }
    }

    // Check for subdomains (e.g., mail.amazon.com -> Amazon)
    const domainParts = domain.split('.')
    if (domainParts.length > 2) {
      const baseDomain = domainParts.slice(-2).join('.')
      if (KNOWN_COMPANIES[baseDomain]) {
        return {
          name: KNOWN_COMPANIES[baseDomain],
          domain: baseDomain,
          label: KNOWN_COMPANIES[baseDomain],
        }
      }
    }

    return null
  }

  /**
   * Generate suggested rules based on email patterns
   * Analyzes email samples to detect patterns and suggest rules
   */
  async generateSuggestedRules(userId, emailSamples) {
    if (!emailSamples || emailSamples.length === 0) {
      return []
    }

    const suggestions = []
    const senderCounts = {}
    const domainCounts = {}
    const subjectPatterns = {}
    const categoryPatterns = {}

    // Analyze patterns
    for (const email of emailSamples) {
      const from = email.from?.toLowerCase() || ''
      const subject = email.subject?.toLowerCase() || ''

      // Extract domain
      const emailMatch = from.match(/@([^\s>]+)/)
      if (emailMatch) {
        const domain = emailMatch[1].toLowerCase()
        domainCounts[domain] = (domainCounts[domain] || 0) + 1
      }

      // Count senders
      const senderEmail = from.split('<')[1]?.split('>')[0] || from
      senderCounts[senderEmail] = (senderCounts[senderEmail] || 0) + 1

      // Detect category patterns
      const category = email.category || 'review'
      categoryPatterns[category] = (categoryPatterns[category] || 0) + 1
    }

    const totalEmails = emailSamples.length
    const threshold = Math.max(3, Math.ceil(totalEmails * 0.1)) // At least 3 emails or 10% of total

    // Suggest VIP senders (frequent senders)
    const frequentSenders = Object.entries(senderCounts)
      .filter(([_, count]) => count >= threshold)
      .sort(([_, a], [__, b]) => b - a)
      .slice(0, 3)

    for (const [sender, count] of frequentSenders) {
      suggestions.push({
        type: 'vip_sender',
        name: `Mark ${sender.split('@')[0]} as VIP`,
        description: `${count} emails from this sender`,
        confidence: Math.min(0.9, count / totalEmails),
        action: {
          type: 'add_vip',
          email: sender,
        },
      })
    }

    // Suggest company labels (frequent domains)
    const frequentDomains = Object.entries(domainCounts)
      .filter(([domain, count]) => count >= threshold && !KNOWN_COMPANIES[domain])
      .sort(([_, a], [__, b]) => b - a)
      .slice(0, 3)

    for (const [domain, count] of frequentDomains) {
      const companyName = domain.split('.')[0].charAt(0).toUpperCase() + domain.split('.')[0].slice(1)
      suggestions.push({
        type: 'company_label',
        name: `Label ${companyName} emails`,
        description: `${count} emails from ${domain}`,
        confidence: Math.min(0.85, count / totalEmails),
        action: {
          type: 'add_company_label',
          name: companyName,
          condition: `from:${domain}`,
          category: 'promotions', // Default, user can change
        },
      })
    }

    // Suggest category-specific rules based on patterns
    if (categoryPatterns.newsletters >= threshold) {
      suggestions.push({
        type: 'category_rule',
        name: 'Archive newsletters automatically',
        description: `${categoryPatterns.newsletters} newsletter emails detected`,
        confidence: 0.8,
        action: {
          type: 'enable_category',
          category: 'newsletters',
          action: 'archive_read',
        },
      })
    }

    if (categoryPatterns.promotions >= threshold) {
      suggestions.push({
        type: 'category_rule',
        name: 'Archive promotions automatically',
        description: `${categoryPatterns.promotions} promotion emails detected`,
        confidence: 0.75,
        action: {
          type: 'enable_category',
          category: 'promotions',
          action: 'archive_read',
        },
      })
    }

    // Sort by confidence and return top 5
    return suggestions
      .sort((a, b) => b.confidence - a.confidence)
      .slice(0, 5)
  }

  /**
   * Check if email matches a company label condition
   */
  matchesCompanyLabel(email, companyLabel) {
    if (!companyLabel?.enabled || !companyLabel?.condition) return false

    const { condition } = companyLabel
    const from = email.from?.toLowerCase() || ''
    const subject = email.subject?.toLowerCase() || ''

    // Simple condition parser: supports "from:domain.com" and "subject:keyword"
    if (condition.includes('from:')) {
      const domain = condition.split('from:')[1]?.trim().split(' ')[0]
      if (domain && from.includes(domain)) return true
    }

    if (condition.includes('subject:')) {
      const keyword = condition.split('subject:')[1]?.trim().split(' ')[0]
      if (keyword && subject.includes(keyword)) return true
    }

    // Support OR conditions
    if (condition.includes(' OR ')) {
      const parts = condition.split(' OR ')
      return parts.some(part => this.matchesCompanyLabel(email, { ...companyLabel, condition: part.trim() }))
    }

    // Support AND conditions
    if (condition.includes(' AND ')) {
      const parts = condition.split(' AND ')
      return parts.every(part => this.matchesCompanyLabel(email, { ...companyLabel, condition: part.trim() }))
    }

    // Simple domain match
    if (condition.includes('@')) {
      const domain = condition.split('@')[1]?.trim()
      if (domain && from.includes(domain)) return true
    }

    return false
  }

  /**
   * Categorize a single email
   */
  async categorize(email, preferences = {}) {
    if (!this.enabled) {
      return { category: 'review', confidence: 0, reason: 'AI not configured' }
    }

    const { from, subject, snippet } = email

    // Build context from preferences
    const preferenceContext = this._buildPreferenceContext(preferences)

    const prompt = `You are an intelligent email sorting assistant. Analyze the following email and categorize it.

AVAILABLE CATEGORIES:
${Object.entries(CATEGORIES).map(([key, cat]) => `- ${key}: ${cat.name} - ${cat.description}`).join('\n')}

${preferenceContext}

EMAIL:
From: ${from}
Subject: ${subject}
Preview: ${snippet?.substring(0, 500) || 'No preview'}

RESPONSE FORMAT (JSON ONLY):
{"category": "category_key", "confidence": 0.0-1.0, "reason": "brief explanation"}

Respond ONLY with the JSON object.`

    try {
      const response = await this.client.chat.complete({
        model: this.model,
        messages: [{ role: 'user', content: prompt }],
        temperature: 0.1,
        maxTokens: 150,
        responseFormat: { type: 'json_object' },
      })

      const content = response.choices[0]?.message?.content
      const result = JSON.parse(content)

      // Validate category
      if (!CATEGORIES[result.category]) {
        result.category = 'review'
      }

      return result
    } catch (error) {
      log.error('AI categorization failed', { error: error.message })
      return { category: 'review', confidence: 0, reason: 'Categorization error' }
    }
  }

  /**
   * Batch categorize multiple emails
   */
  async batchCategorize(emails, preferences = {}) {
    if (!this.enabled || emails.length === 0) {
      return emails.map(e => ({
        email: e,
        classification: { category: 'review', confidence: 0, reason: 'AI not available' },
      }))
    }

    // For small batches, process individually
    if (emails.length <= 5) {
      const results = []
      for (const email of emails) {
        const classification = await this.categorize(email, preferences)
        results.push({ email, classification })
      }
      return results
    }

    // For larger batches, use batch request
    const preferenceContext = this._buildPreferenceContext(preferences)

    const emailList = emails.map((e, i) =>
      `[${i}] From: ${e.from} | Subject: ${e.subject} | Preview: ${e.snippet?.substring(0, 200) || '-'}`
    ).join('\n')

    const prompt = `You are an email sorting assistant. Categorize the following ${emails.length} emails.

CATEGORIES:
${Object.entries(CATEGORIES).map(([key, cat]) => `${key}: ${cat.name}`).join(' | ')}

${preferenceContext}

EMAILS:
${emailList}

RESPONSE FORMAT (JSON ARRAY ONLY):
[{"index": 0, "category": "key"}, {"index": 1, "category": "key"}, ...]

Respond ONLY with the JSON array.`

    try {
      const response = await this.client.chat.complete({
        model: this.model,
        messages: [{ role: 'user', content: prompt }],
        temperature: 0.1,
        maxTokens: emails.length * 50,
        responseFormat: { type: 'json_object' },
      })

      const content = response.choices[0]?.message?.content
      let parsed

      // Handle both array and object responses
      try {
        parsed = JSON.parse(content)
        if (parsed.results) parsed = parsed.results
        if (!Array.isArray(parsed)) {
          throw new Error('Not an array')
        }
      } catch {
        // Fallback to individual processing
        return this._fallbackBatch(emails, preferences)
      }

      return emails.map((email, i) => {
        const result = parsed.find(r => r.index === i)
        const category = result?.category && CATEGORIES[result.category] ? result.category : 'review'
        return {
          email,
          classification: { category, confidence: 0.8, reason: 'Batch' },
        }
      })
    } catch (error) {
      log.error('Batch categorization failed', { error: error.message })
      return this._fallbackBatch(emails, preferences)
    }
  }

  /**
   * Fallback to individual categorization
   */
  async _fallbackBatch(emails, preferences) {
    const results = []
    for (const email of emails) {
      const classification = await this.categorize(email, preferences)
      results.push({ email, classification })
      // Rate limiting pause
      await new Promise(r => setTimeout(r, 100))
    }
    return results
  }

  /**
   * Build preference context for prompt
   */
  _buildPreferenceContext(preferences) {
    const parts = []

    // Get enabled categories
    const enabledCategories = this.getEnabledCategories(preferences)
    if (enabledCategories.length < Object.keys(CATEGORIES).length) {
      const disabled = Object.keys(CATEGORIES).filter(k => !enabledCategories.includes(k))
      parts.push(`DISABLED CATEGORIES (do not use): ${disabled.map(k => CATEGORIES[k].name).join(', ')}`)
      parts.push(`ONLY USE THESE CATEGORIES: ${enabledCategories.map(k => `${k} (${CATEGORIES[k].name})`).join(', ')}`)
    }

    if (preferences.vipSenders?.length) {
      parts.push(`VIP Senders (always categorize as "vip"): ${preferences.vipSenders.join(', ')}`)
    }

    if (preferences.blockedSenders?.length) {
      parts.push(`Blocked Senders (categorize as "promotions"): ${preferences.blockedSenders.join(', ')}`)
    }

    if (preferences.customRules?.length) {
      parts.push(`Custom Rules:\n${preferences.customRules.map(r => `- ${r.condition}: ${r.category}`).join('\n')}`)
    }

    if (preferences.priorityTopics?.length) {
      parts.push(`Priority Topics (higher priority): ${preferences.priorityTopics.join(', ')}`)
    }

    // Company labels context
    if (preferences.companyLabels?.length) {
      const activeLabels = preferences.companyLabels.filter(l => l.enabled)
      if (activeLabels.length > 0) {
        parts.push(`Company Labels (apply these labels when conditions match):\n${activeLabels.map(l => `- ${l.name}: ${l.condition} → ${l.category || 'promotions'}`).join('\n')}`)
      }
    }

    return parts.length > 0 ? `USER PREFERENCES:\n${parts.join('\n')}\n` : ''
  }

  /**
   * Learn from user corrections
   */
  async learnFromCorrection(email, originalCategory, correctedCategory, userId) {
    log.info('Learning correction received', {
      from: email.from,
      original: originalCategory,
      corrected: correctedCategory,
      userId,
    })

    // In production, this would:
    // 1. Store correction in database
    // 2. Update user preferences
    // 3. Potentially fine-tune the model

    return { learned: true }
  }

  /**
   * Get category statistics
   */
  getCategoryStats(classifications) {
    const stats = {}
    for (const { classification } of classifications) {
      const cat = classification.category
      stats[cat] = (stats[cat] || 0) + 1
    }
    return stats
  }
}

// Default export
export default AISorterService