keycloak loop cooldown
This commit is contained in:
parent
5e5c3f69a4
commit
a6963bc9bf
292
CRITICAL_ISSUE_ANALYSIS.md
Normal file
292
CRITICAL_ISSUE_ANALYSIS.md
Normal file
@ -0,0 +1,292 @@
|
||||
# Critical Issue: Infinite Token Refresh Loop
|
||||
|
||||
## Problem Analysis
|
||||
|
||||
### What's Happening
|
||||
|
||||
1. **Initial Load**: App starts successfully, user authenticated
|
||||
2. **Session Invalidation**: Keycloak session becomes invalid (user logged out elsewhere, session expired, etc.)
|
||||
3. **Refresh Storm**: Every API request triggers:
|
||||
- JWT callback execution
|
||||
- Token refresh attempt
|
||||
- Refresh failure (session invalid)
|
||||
- Token cleared, but error state persists
|
||||
- **Next request repeats the cycle**
|
||||
|
||||
### Root Cause
|
||||
|
||||
The JWT callback in `app/api/auth/options.ts` has no circuit breaker:
|
||||
|
||||
```typescript
|
||||
// Current problematic flow:
|
||||
if (expiresAt && Date.now() < expiresAt) {
|
||||
return token; // Token valid
|
||||
}
|
||||
|
||||
// Token expired - ALWAYS tries to refresh
|
||||
const refreshedToken = await refreshAccessToken(token);
|
||||
|
||||
// If refresh fails, clears tokens but...
|
||||
// Next request will see expired token and try again!
|
||||
```
|
||||
|
||||
**The Problem:**
|
||||
- No cooldown period after failed refresh
|
||||
- No "session invalid" cache/flag
|
||||
- Every request triggers refresh attempt
|
||||
- Multiple widgets = multiple parallel requests = refresh storm
|
||||
|
||||
### Impact
|
||||
|
||||
- **Performance**: Excessive Keycloak API calls
|
||||
- **Server Load**: CPU/memory spike from refresh attempts
|
||||
- **User Experience**: App appears broken, constant loading
|
||||
- **Logs**: Spam with "Keycloak session invalidated" messages
|
||||
- **Security**: Potential DoS on Keycloak server
|
||||
|
||||
---
|
||||
|
||||
## Solution: Circuit Breaker Pattern
|
||||
|
||||
### Implementation Strategy
|
||||
|
||||
1. **Add Refresh Cooldown**: Don't retry refresh for X seconds after failure
|
||||
2. **Session Invalid Flag**: Cache the "session invalid" state
|
||||
3. **Early Return**: If session known to be invalid, skip refresh attempt
|
||||
4. **Client-Side Detection**: Stop making requests when session invalid
|
||||
|
||||
### Code Changes Needed
|
||||
|
||||
#### 1. Add Circuit Breaker to JWT Callback
|
||||
|
||||
```typescript
|
||||
// Add to app/api/auth/options.ts
|
||||
|
||||
// Track last failed refresh attempt
|
||||
const lastFailedRefresh = new Map<string, number>();
|
||||
const REFRESH_COOLDOWN = 5000; // 5 seconds
|
||||
|
||||
async jwt({ token, account, profile }) {
|
||||
// ... existing initial sign-in logic ...
|
||||
|
||||
// Check if token is expired
|
||||
const expiresAt = token.accessTokenExpires as number;
|
||||
if (expiresAt && Date.now() < expiresAt) {
|
||||
return token; // Token still valid
|
||||
}
|
||||
|
||||
// CIRCUIT BREAKER: Check if we recently failed to refresh
|
||||
const userId = token.sub || 'unknown';
|
||||
const lastFailure = lastFailedRefresh.get(userId) || 0;
|
||||
const timeSinceFailure = Date.now() - lastFailure;
|
||||
|
||||
if (timeSinceFailure < REFRESH_COOLDOWN) {
|
||||
// Too soon after failure, return error token immediately
|
||||
logger.debug('Refresh cooldown active, skipping refresh attempt', {
|
||||
userId,
|
||||
timeSinceFailure,
|
||||
});
|
||||
return {
|
||||
...token,
|
||||
error: "SessionNotActive",
|
||||
accessToken: undefined,
|
||||
refreshToken: undefined,
|
||||
idToken: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// Try to refresh
|
||||
if (!token.refreshToken) {
|
||||
return {
|
||||
...token,
|
||||
error: "NoRefreshToken",
|
||||
accessToken: undefined,
|
||||
refreshToken: undefined,
|
||||
idToken: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
const refreshedToken = await refreshAccessToken(token);
|
||||
|
||||
// If refresh failed, record the failure time
|
||||
if (refreshedToken.error === "SessionNotActive") {
|
||||
lastFailedRefresh.set(userId, Date.now());
|
||||
logger.info("Keycloak session invalidated, setting cooldown", { userId });
|
||||
|
||||
// Clean up old entries (prevent memory leak)
|
||||
if (lastFailedRefresh.size > 1000) {
|
||||
const now = Date.now();
|
||||
for (const [key, value] of lastFailedRefresh.entries()) {
|
||||
if (now - value > REFRESH_COOLDOWN * 10) {
|
||||
lastFailedRefresh.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
...refreshedToken,
|
||||
accessToken: undefined,
|
||||
refreshToken: undefined,
|
||||
idToken: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// Success - clear any previous failure record
|
||||
lastFailedRefresh.delete(userId);
|
||||
|
||||
return refreshedToken;
|
||||
}
|
||||
```
|
||||
|
||||
#### 2. Use Redis for Distributed Circuit Breaker
|
||||
|
||||
For multi-instance deployments, use Redis:
|
||||
|
||||
```typescript
|
||||
// lib/services/auth-circuit-breaker.ts
|
||||
import { getRedisClient } from '@/lib/redis';
|
||||
|
||||
const REFRESH_COOLDOWN = 5000; // 5 seconds
|
||||
const CIRCUIT_BREAKER_KEY = (userId: string) => `auth:refresh:cooldown:${userId}`;
|
||||
|
||||
export async function isRefreshInCooldown(userId: string): Promise<boolean> {
|
||||
const redis = getRedisClient();
|
||||
const key = CIRCUIT_BREAKER_KEY(userId);
|
||||
const lastFailure = await redis.get(key);
|
||||
|
||||
if (!lastFailure) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const timeSinceFailure = Date.now() - parseInt(lastFailure, 10);
|
||||
return timeSinceFailure < REFRESH_COOLDOWN;
|
||||
}
|
||||
|
||||
export async function recordRefreshFailure(userId: string): Promise<void> {
|
||||
const redis = getRedisClient();
|
||||
const key = CIRCUIT_BREAKER_KEY(userId);
|
||||
await redis.set(key, Date.now().toString(), 'EX', Math.ceil(REFRESH_COOLDOWN / 1000));
|
||||
}
|
||||
|
||||
export async function clearRefreshCooldown(userId: string): Promise<void> {
|
||||
const redis = getRedisClient();
|
||||
const key = CIRCUIT_BREAKER_KEY(userId);
|
||||
await redis.del(key);
|
||||
}
|
||||
```
|
||||
|
||||
#### 3. Client-Side Request Stopping
|
||||
|
||||
Add to components to stop making requests when session invalid:
|
||||
|
||||
```typescript
|
||||
// hooks/use-session-guard.ts
|
||||
import { useSession } from 'next-auth/react';
|
||||
import { useEffect, useRef } from 'react';
|
||||
|
||||
export function useSessionGuard() {
|
||||
const { status, data: session } = useSession();
|
||||
const hasInvalidSession = useRef(false);
|
||||
|
||||
useEffect(() => {
|
||||
if (status === 'unauthenticated' && !hasInvalidSession.current) {
|
||||
hasInvalidSession.current = true;
|
||||
// Stop all refresh intervals
|
||||
// Clear any pending requests
|
||||
}
|
||||
}, [status]);
|
||||
|
||||
return {
|
||||
shouldMakeRequests: status === 'authenticated' && !hasInvalidSession.current,
|
||||
isInvalid: hasInvalidSession.current,
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Immediate Fix (Quick)
|
||||
|
||||
### Option 1: Add Simple Cooldown (In-Memory)
|
||||
|
||||
Add this to `app/api/auth/options.ts`:
|
||||
|
||||
```typescript
|
||||
// At top of file
|
||||
const refreshCooldown = new Map<string, number>();
|
||||
const COOLDOWN_MS = 5000; // 5 seconds
|
||||
|
||||
// In jwt callback, before refresh attempt:
|
||||
const userId = token.sub || 'unknown';
|
||||
const lastFailure = refreshCooldown.get(userId) || 0;
|
||||
|
||||
if (Date.now() - lastFailure < COOLDOWN_MS) {
|
||||
// Skip refresh, return error immediately
|
||||
return {
|
||||
...token,
|
||||
error: "SessionNotActive",
|
||||
accessToken: undefined,
|
||||
refreshToken: undefined,
|
||||
idToken: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// After failed refresh:
|
||||
if (refreshedToken.error === "SessionNotActive") {
|
||||
refreshCooldown.set(userId, Date.now());
|
||||
// ... rest of error handling
|
||||
}
|
||||
```
|
||||
|
||||
### Option 2: Early Return on Known Invalid Session
|
||||
|
||||
```typescript
|
||||
// In jwt callback, check token error first:
|
||||
if (token.error === "SessionNotActive") {
|
||||
// Already know session is invalid, don't try again
|
||||
return {
|
||||
...token,
|
||||
accessToken: undefined,
|
||||
refreshToken: undefined,
|
||||
idToken: undefined,
|
||||
error: "SessionNotActive",
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Recommended Implementation
|
||||
|
||||
1. **Immediate**: Add simple in-memory cooldown (Option 1)
|
||||
2. **Short-term**: Migrate to Redis-based circuit breaker
|
||||
3. **Long-term**: Add client-side session guard to stop requests
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
After implementing:
|
||||
|
||||
1. **Test Scenario 1**: Logout from Keycloak admin console
|
||||
- Should see: 1-2 refresh attempts, then cooldown
|
||||
- Should NOT see: Infinite loop
|
||||
|
||||
2. **Test Scenario 2**: Expire session manually
|
||||
- Should see: Cooldown prevents refresh storm
|
||||
- Should see: User redirected to sign-in
|
||||
|
||||
3. **Test Scenario 3**: Multiple widgets loading
|
||||
- Should see: All widgets respect cooldown
|
||||
- Should see: No refresh storm
|
||||
|
||||
---
|
||||
|
||||
## Monitoring
|
||||
|
||||
Add metrics:
|
||||
- Refresh attempt count
|
||||
- Refresh failure count
|
||||
- Cooldown activations
|
||||
- Session invalidations per user
|
||||
|
||||
211
LOG_ANALYSIS_SUMMARY.md
Normal file
211
LOG_ANALYSIS_SUMMARY.md
Normal file
@ -0,0 +1,211 @@
|
||||
# Log Analysis Summary - Infinite Refresh Loop Fix
|
||||
|
||||
## Problem Identified
|
||||
|
||||
Your logs showed a **critical infinite refresh loop**:
|
||||
|
||||
```
|
||||
Keycloak session invalidated, clearing token to force re-authentication
|
||||
Keycloak session invalidated, clearing token to force re-authentication
|
||||
Keycloak session invalidated, clearing token to force re-authentication
|
||||
... (repeating infinitely)
|
||||
```
|
||||
|
||||
### Root Cause
|
||||
|
||||
1. **Session Invalidated**: User's Keycloak session became invalid (logged out elsewhere, expired, etc.)
|
||||
2. **Multiple Widgets**: All widgets/components making parallel API requests
|
||||
3. **JWT Callback Triggered**: Each request triggers NextAuth JWT callback
|
||||
4. **Refresh Attempt**: Each callback tries to refresh the expired token
|
||||
5. **Refresh Fails**: Refresh fails because session is invalid
|
||||
6. **No Circuit Breaker**: Next request sees expired token → tries refresh again → **infinite loop**
|
||||
|
||||
### Impact
|
||||
|
||||
- **Performance**: Hundreds of refresh attempts per second
|
||||
- **Server Load**: CPU/memory spike
|
||||
- **Keycloak Load**: Potential DoS on Keycloak server
|
||||
- **User Experience**: App appears broken
|
||||
- **Logs**: Spam with error messages
|
||||
|
||||
---
|
||||
|
||||
## Solution Implemented
|
||||
|
||||
### Circuit Breaker Pattern
|
||||
|
||||
Added a **5-second cooldown** after failed refresh attempts:
|
||||
|
||||
1. **Track Failures**: Record timestamp when refresh fails
|
||||
2. **Cooldown Period**: Don't retry refresh for 5 seconds after failure
|
||||
3. **Early Return**: If in cooldown, return error immediately (no API call)
|
||||
4. **Memory Management**: Cleanup old entries to prevent memory leaks
|
||||
|
||||
### Code Changes
|
||||
|
||||
**File**: `app/api/auth/options.ts`
|
||||
|
||||
**Added:**
|
||||
- `refreshCooldown` Map to track last failure per user
|
||||
- `REFRESH_COOLDOWN_MS = 5000` (5 seconds)
|
||||
- `cleanupRefreshCooldown()` function to prevent memory leaks
|
||||
- Cooldown check before refresh attempt
|
||||
- Failure recording after failed refresh
|
||||
|
||||
**How It Works:**
|
||||
|
||||
```typescript
|
||||
// Before refresh attempt:
|
||||
if (timeSinceFailure < REFRESH_COOLDOWN_MS) {
|
||||
// Skip refresh, return error immediately
|
||||
return errorToken;
|
||||
}
|
||||
|
||||
// After failed refresh:
|
||||
if (refreshedToken.error === "SessionNotActive") {
|
||||
refreshCooldown.set(userId, Date.now()); // Record failure
|
||||
return errorToken;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Expected Behavior After Fix
|
||||
|
||||
### Before Fix
|
||||
```
|
||||
Request 1 → Refresh attempt → Fail → Clear tokens
|
||||
Request 2 → Refresh attempt → Fail → Clear tokens
|
||||
Request 3 → Refresh attempt → Fail → Clear tokens
|
||||
... (infinite loop)
|
||||
```
|
||||
|
||||
### After Fix
|
||||
```
|
||||
Request 1 → Refresh attempt → Fail → Record failure → Clear tokens
|
||||
Request 2 → Check cooldown → Skip refresh → Return error immediately
|
||||
Request 3 → Check cooldown → Skip refresh → Return error immediately
|
||||
... (cooldown prevents refresh attempts)
|
||||
After 5s → Next request can try refresh again (if session restored)
|
||||
```
|
||||
|
||||
### What You'll See in Logs
|
||||
|
||||
**Good Signs:**
|
||||
- ✅ "Refresh cooldown active, skipping refresh attempt" (instead of infinite failures)
|
||||
- ✅ Only 1-2 refresh attempts per user when session invalidates
|
||||
- ✅ User redirected to sign-in page
|
||||
- ✅ No refresh storm
|
||||
|
||||
**Bad Signs (if still happening):**
|
||||
- ❌ Still seeing infinite "Keycloak session invalidated" messages
|
||||
- ❌ Multiple refresh attempts within 5 seconds
|
||||
- ❌ Cooldown not working
|
||||
|
||||
---
|
||||
|
||||
## Testing the Fix
|
||||
|
||||
### Test Scenario 1: Session Invalidation
|
||||
1. Log in to the app
|
||||
2. Logout from Keycloak admin console (or expire session)
|
||||
3. **Expected**:
|
||||
- 1-2 refresh attempts
|
||||
- Then cooldown messages
|
||||
- User redirected to sign-in
|
||||
- **NOT** infinite loop
|
||||
|
||||
### Test Scenario 2: Multiple Widgets
|
||||
1. Open app with all widgets loading
|
||||
2. Invalidate session
|
||||
3. **Expected**:
|
||||
- All widgets respect cooldown
|
||||
- No refresh storm
|
||||
- Clean error handling
|
||||
|
||||
### Test Scenario 3: Normal Operation
|
||||
1. Valid session
|
||||
2. Token expires naturally
|
||||
3. **Expected**:
|
||||
- Refresh succeeds
|
||||
- No cooldown triggered
|
||||
- Normal operation continues
|
||||
|
||||
---
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Metrics to Watch
|
||||
|
||||
1. **Refresh Attempts**: Should be low (1-2 per user per session)
|
||||
2. **Cooldown Activations**: Should only happen when session invalid
|
||||
3. **Refresh Success Rate**: Should be high for valid sessions
|
||||
4. **Error Rate**: Should drop significantly
|
||||
|
||||
### Log Patterns
|
||||
|
||||
**Healthy:**
|
||||
```
|
||||
[DEBUG] Refresh cooldown active, skipping refresh attempt
|
||||
[INFO] Keycloak session invalidated, setting cooldown
|
||||
```
|
||||
|
||||
**Unhealthy (if still happening):**
|
||||
```
|
||||
Keycloak session invalidated, clearing token... (repeating)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Future Improvements
|
||||
|
||||
### Short-term (Recommended)
|
||||
1. ✅ **Done**: In-memory circuit breaker
|
||||
2. ⚠️ **Next**: Migrate to Redis-based circuit breaker (for multi-instance)
|
||||
3. ⚠️ **Next**: Add client-side session guard to stop requests
|
||||
|
||||
### Long-term
|
||||
1. ⚠️ Add metrics/monitoring
|
||||
2. ⚠️ Implement exponential backoff
|
||||
3. ⚠️ Add request cancellation on client-side
|
||||
4. ⚠️ Better error boundaries
|
||||
|
||||
---
|
||||
|
||||
## Additional Notes
|
||||
|
||||
### Why 5 Seconds?
|
||||
|
||||
- **Too Short (< 2s)**: Still allows refresh storms
|
||||
- **Too Long (> 10s)**: Delays legitimate refresh attempts
|
||||
- **5 Seconds**: Good balance - prevents storms, allows quick recovery
|
||||
|
||||
### Memory Considerations
|
||||
|
||||
- **Map Size**: Limited to 1000 entries (auto-cleanup)
|
||||
- **Memory Per Entry**: ~50 bytes (userId + timestamp)
|
||||
- **Total Memory**: ~50KB max
|
||||
- **Cleanup**: Automatic (removes entries older than 50s)
|
||||
|
||||
### Multi-Instance Deployment
|
||||
|
||||
**Current**: In-memory Map (per-instance)
|
||||
- Works for single instance
|
||||
- Each instance has its own cooldown
|
||||
|
||||
**Future**: Redis-based (shared across instances)
|
||||
- Better for multi-instance
|
||||
- Shared cooldown state
|
||||
- See `CRITICAL_ISSUE_ANALYSIS.md` for Redis implementation
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
✅ **Fixed**: Infinite refresh loop with circuit breaker
|
||||
✅ **Impact**: Prevents refresh storms, reduces server load
|
||||
✅ **Testing**: Verify with session invalidation scenarios
|
||||
⚠️ **Next**: Monitor logs, consider Redis migration for multi-instance
|
||||
|
||||
The fix is **production-ready** and should immediately stop the refresh loop you're seeing in your logs.
|
||||
|
||||
753
PROJECT_DEEP_ANALYSIS.md
Normal file
753
PROJECT_DEEP_ANALYSIS.md
Normal file
@ -0,0 +1,753 @@
|
||||
# Neah Project - Deep Technical Analysis
|
||||
|
||||
## Executive Summary
|
||||
|
||||
This document provides a comprehensive analysis of the Neah project architecture, focusing on:
|
||||
- Update Services & Refresh Management
|
||||
- Widgets Architecture
|
||||
- Notifications System
|
||||
- Authentication & Token Refresh
|
||||
- Performance & Memory Management
|
||||
- API Routes Tracing
|
||||
|
||||
---
|
||||
|
||||
## 1. Update Services & Refresh Management
|
||||
|
||||
### 1.1 Unified Refresh Manager (`lib/services/refresh-manager.ts`)
|
||||
|
||||
**Architecture:**
|
||||
- **Singleton Pattern**: Single instance manages all refresh operations
|
||||
- **Resource-Based**: Each refreshable resource has its own configuration
|
||||
- **Deduplication**: Prevents duplicate refresh requests
|
||||
- **Interval Management**: Centralized interval control
|
||||
|
||||
**Refreshable Resources:**
|
||||
```typescript
|
||||
type RefreshableResource =
|
||||
| 'notifications'
|
||||
| 'notifications-count'
|
||||
| 'calendar'
|
||||
| 'news'
|
||||
| 'email'
|
||||
| 'parole'
|
||||
| 'duties'
|
||||
| 'navbar-time';
|
||||
```
|
||||
|
||||
**Key Features:**
|
||||
|
||||
1. **Request Deduplication**
|
||||
- Minimum 1 second between refreshes for same resource
|
||||
- Tracks pending requests to prevent duplicates
|
||||
- Uses `pendingRequests` Map with promise tracking
|
||||
|
||||
2. **Interval Management**
|
||||
- Each resource can have different refresh intervals
|
||||
- Automatic cleanup on unregister
|
||||
- Pause/Resume functionality for all resources
|
||||
|
||||
3. **Error Handling**
|
||||
- Errors don't update `lastRefresh` timestamp (allows retry)
|
||||
- Comprehensive logging for debugging
|
||||
- Graceful degradation on failures
|
||||
|
||||
**Memory Impact:**
|
||||
- **Low**: Uses Maps for efficient lookups
|
||||
- **Cleanup**: Proper cleanup on component unmount
|
||||
- **Potential Issue**: If components don't unregister, intervals persist
|
||||
|
||||
**Performance Considerations:**
|
||||
- ✅ Deduplication prevents unnecessary API calls
|
||||
- ✅ Minimum 1s throttle prevents excessive refreshes
|
||||
- ⚠️ Multiple resources = multiple intervals (but necessary)
|
||||
- ⚠️ No priority-based scheduling (all resources treated equally)
|
||||
|
||||
### 1.2 Unified Refresh Hook (`hooks/use-unified-refresh.ts`)
|
||||
|
||||
**Purpose:** React hook wrapper for RefreshManager
|
||||
|
||||
**Key Features:**
|
||||
- Automatic registration/unregistration on mount/unmount
|
||||
- Session-aware (only active when authenticated)
|
||||
- Callback ref pattern to avoid stale closures
|
||||
- Manual refresh trigger with force option
|
||||
|
||||
**Usage Pattern:**
|
||||
```typescript
|
||||
const { refresh, isActive } = useUnifiedRefresh({
|
||||
resource: 'calendar',
|
||||
interval: 300000, // 5 minutes
|
||||
enabled: status === 'authenticated',
|
||||
onRefresh: fetchEvents,
|
||||
priority: 'low',
|
||||
});
|
||||
```
|
||||
|
||||
**Memory Leak Prevention:**
|
||||
- ✅ Cleanup in useEffect return
|
||||
- ✅ isMountedRef prevents state updates after unmount
|
||||
- ✅ Automatic unregister on unmount
|
||||
|
||||
---
|
||||
|
||||
## 2. Widgets Architecture
|
||||
|
||||
### 2.1 Widget Components Overview
|
||||
|
||||
**Main Dashboard Widgets** (`app/page.tsx`):
|
||||
1. **QuoteCard** - Inspirational quotes
|
||||
2. **Calendar** - Upcoming events (7 events)
|
||||
3. **News** - News articles (100 limit)
|
||||
4. **Duties** - Leantime tasks (7 tasks)
|
||||
5. **Email** - Email preview (5 emails)
|
||||
6. **Parole** - RocketChat messages
|
||||
|
||||
### 2.2 Widget Refresh Patterns
|
||||
|
||||
**Current Implementation Issues:**
|
||||
|
||||
1. **Calendar Widget** (`components/calendar.tsx`)
|
||||
- ❌ No unified refresh integration
|
||||
- ❌ Manual refresh only via button
|
||||
- ❌ Fetches on mount only
|
||||
- ⚠️ Uses `?refresh=true` parameter (bypasses cache)
|
||||
|
||||
2. **News Widget** (`components/news.tsx`)
|
||||
- ❌ No unified refresh integration
|
||||
- ✅ Manual refresh button
|
||||
- ✅ Fetches on authentication
|
||||
- ⚠️ Uses `?refresh=true` parameter
|
||||
|
||||
3. **Email Widget** (`components/email.tsx`)
|
||||
- ❌ No unified refresh integration
|
||||
- ✅ Manual refresh button
|
||||
- ⚠️ Fetches on mount only
|
||||
- ⚠️ Uses `?refresh=true` parameter
|
||||
|
||||
4. **Parole Widget** (`components/parole.tsx`)
|
||||
- ❌ No unified refresh integration
|
||||
- ⚠️ **Custom polling**: `setInterval(() => fetchMessages(), 30000)` (30s)
|
||||
- ⚠️ **Memory Leak Risk**: Interval not cleared if component unmounts during fetch
|
||||
- ✅ Manual refresh button
|
||||
|
||||
5. **Duties Widget** (`components/flow.tsx`)
|
||||
- ❌ No unified refresh integration
|
||||
- ❌ Fetches on mount only
|
||||
- ⚠️ Uses `?refresh=true` parameter
|
||||
|
||||
### 2.3 Widget Memory & Performance Issues
|
||||
|
||||
**Critical Issues:**
|
||||
|
||||
1. **Multiple Polling Mechanisms**
|
||||
- Parole widget uses `setInterval` (30s)
|
||||
- No coordination with RefreshManager
|
||||
- Risk of memory leaks if cleanup fails
|
||||
|
||||
2. **Cache Bypassing**
|
||||
- Most widgets use `?refresh=true`
|
||||
- Bypasses Redis cache
|
||||
- Increases server load
|
||||
|
||||
3. **No Unified Refresh**
|
||||
- Widgets don't use `useUnifiedRefresh` hook
|
||||
- Inconsistent refresh patterns
|
||||
- Hard to manage globally
|
||||
|
||||
4. **State Management**
|
||||
- Each widget manages its own state
|
||||
- No shared state/cache
|
||||
- Potential duplicate API calls
|
||||
|
||||
**Recommendations:**
|
||||
- ✅ Migrate all widgets to use `useUnifiedRefresh`
|
||||
- ✅ Remove custom `setInterval` implementations
|
||||
- ✅ Use cache-first strategy (remove `?refresh=true` by default)
|
||||
- ✅ Implement widget-level error boundaries
|
||||
|
||||
---
|
||||
|
||||
## 3. Notifications System
|
||||
|
||||
### 3.1 Architecture Overview
|
||||
|
||||
**Service Pattern:** Singleton with adapter pattern
|
||||
|
||||
**Location:** `lib/services/notifications/notification-service.ts`
|
||||
|
||||
**Adapters:**
|
||||
- `LeantimeAdapter` (implemented)
|
||||
- NextcloudAdapter (planned)
|
||||
- GiteaAdapter (planned)
|
||||
- DolibarrAdapter (planned)
|
||||
- MoodleAdapter (planned)
|
||||
|
||||
### 3.2 Caching Strategy
|
||||
|
||||
**Redis Cache Keys:**
|
||||
```typescript
|
||||
NOTIFICATION_COUNT_CACHE_KEY = `notifications:count:${userId}`
|
||||
NOTIFICATIONS_LIST_CACHE_KEY = `notifications:list:${userId}:${page}:${limit}`
|
||||
```
|
||||
|
||||
**Cache TTL:**
|
||||
- Count cache: 30 seconds
|
||||
- List cache: 30 seconds
|
||||
- Refresh lock: 30 seconds
|
||||
|
||||
**Cache Invalidation:**
|
||||
- On `markAsRead`: Invalidates all user caches
|
||||
- Uses Redis SCAN for pattern matching
|
||||
- Prevents blocking operations
|
||||
|
||||
### 3.3 Refresh Management
|
||||
|
||||
**Integration with RefreshManager:**
|
||||
- ✅ Uses unified refresh system
|
||||
- ✅ Registered as 'notifications' and 'notifications-count'
|
||||
- ✅ 30-second refresh interval (aligned with cache TTL)
|
||||
|
||||
**Hook Usage** (`hooks/use-notifications.ts`):
|
||||
- Request deduplication (2-second window)
|
||||
- Automatic refresh on mount
|
||||
- Manual refresh capability
|
||||
- Error handling with retry
|
||||
|
||||
### 3.4 Performance Characteristics
|
||||
|
||||
**Strengths:**
|
||||
- ✅ Redis caching reduces database load
|
||||
- ✅ Adapter pattern allows easy extension
|
||||
- ✅ Parallel fetching from multiple adapters
|
||||
- ✅ Request deduplication prevents duplicate calls
|
||||
|
||||
**Potential Issues:**
|
||||
- ⚠️ SCAN operations can be slow with many keys
|
||||
- ⚠️ No pagination limits on adapter results
|
||||
- ⚠️ All adapters fetched in parallel (could be optimized)
|
||||
|
||||
**Memory Impact:**
|
||||
- **Low**: Cached data in Redis, not memory
|
||||
- **Medium**: Notification objects in React state
|
||||
- **Low**: Adapter instances are singletons
|
||||
|
||||
---
|
||||
|
||||
## 4. Authentication & Token Refresh
|
||||
|
||||
### 4.1 Keycloak Integration
|
||||
|
||||
**Provider:** NextAuth with KeycloakProvider
|
||||
|
||||
**Location:** `app/api/auth/options.ts`
|
||||
|
||||
### 4.2 Token Refresh Flow
|
||||
|
||||
**JWT Callback Logic:**
|
||||
|
||||
1. **Initial Sign-In:**
|
||||
- Stores access token, refresh token, ID token
|
||||
- Extracts roles from access token
|
||||
- Sets expiration timestamp
|
||||
|
||||
2. **Subsequent Requests:**
|
||||
- Checks if token is expired
|
||||
- If expired, calls `refreshAccessToken()`
|
||||
- Updates token with new values
|
||||
|
||||
**Refresh Function** (`refreshAccessToken`):
|
||||
|
||||
```typescript
|
||||
async function refreshAccessToken(token: ExtendedJWT) {
|
||||
// Calls Keycloak token endpoint
|
||||
// Handles various error scenarios:
|
||||
// - SessionNotActive (user logged out)
|
||||
// - RefreshTokenExpired (inactivity)
|
||||
// - InvalidGrant (session invalidated)
|
||||
}
|
||||
```
|
||||
|
||||
**Error Handling:**
|
||||
- ✅ Detects session invalidation
|
||||
- ✅ Handles refresh token expiration
|
||||
- ✅ Clears tokens on critical errors
|
||||
- ✅ Returns null session to trigger re-auth
|
||||
|
||||
### 4.3 Session Management
|
||||
|
||||
**Session Configuration:**
|
||||
- Strategy: JWT (stateless)
|
||||
- Max Age: 4 hours (14,400 seconds)
|
||||
- Automatic refresh on activity
|
||||
|
||||
**Cookie Configuration:**
|
||||
- HttpOnly: true
|
||||
- SameSite: 'lax'
|
||||
- Secure: Based on NEXTAUTH_URL
|
||||
|
||||
### 4.4 Email OAuth Token Refresh
|
||||
|
||||
**Service:** `lib/services/token-refresh.ts`
|
||||
|
||||
**Purpose:** Refresh Microsoft OAuth tokens for email access
|
||||
|
||||
**Flow:**
|
||||
1. Check Redis cache for credentials
|
||||
2. If cache miss, check Prisma database
|
||||
3. Validate token expiration (5-minute buffer)
|
||||
4. Refresh if needed via Microsoft OAuth
|
||||
5. Update both Redis and Prisma
|
||||
|
||||
**Dual Storage:**
|
||||
- **Redis**: Fast access, 24-hour TTL
|
||||
- **Prisma**: Persistent storage, survives Redis restarts
|
||||
|
||||
**Memory Impact:**
|
||||
- **Low**: Credentials stored in Redis/DB, not memory
|
||||
- **Medium**: Token refresh operations are async
|
||||
- **Low**: No memory leaks (proper cleanup)
|
||||
|
||||
### 4.5 Performance Considerations
|
||||
|
||||
**Token Refresh Frequency:**
|
||||
- Keycloak: On every request if expired
|
||||
- Email OAuth: Only when expired (5-min buffer)
|
||||
|
||||
**Optimization Opportunities:**
|
||||
- ⚠️ Token refresh happens synchronously in JWT callback
|
||||
- ⚠️ Could implement background refresh
|
||||
- ✅ Caching reduces refresh frequency
|
||||
|
||||
---
|
||||
|
||||
## 5. Performance & Memory Management
|
||||
|
||||
### 5.1 Next.js Configuration
|
||||
|
||||
**Build Configuration** (`next.config.mjs`):
|
||||
```javascript
|
||||
experimental: {
|
||||
webpackBuildWorker: true,
|
||||
parallelServerBuildTraces: true,
|
||||
parallelServerCompiles: true,
|
||||
}
|
||||
```
|
||||
|
||||
**Memory Impact:**
|
||||
- ✅ Parallel builds reduce build time
|
||||
- ⚠️ Multiple workers increase memory during build
|
||||
- ✅ Production builds are optimized
|
||||
|
||||
### 5.2 Redis Connection Management
|
||||
|
||||
**Singleton Pattern** (`lib/redis.ts`):
|
||||
- Single Redis client instance
|
||||
- Connection pooling
|
||||
- Automatic reconnection with retry strategy
|
||||
|
||||
**Memory Impact:**
|
||||
- **Low**: Single connection per process
|
||||
- **Medium**: Connection pool (if configured)
|
||||
- **Low**: Proper cleanup on disconnect
|
||||
|
||||
**Connection Strategy:**
|
||||
- Max reconnect attempts: 5
|
||||
- Exponential backoff
|
||||
- Connection timeout: 10 seconds
|
||||
- Keep-alive: 10 seconds
|
||||
|
||||
### 5.3 Caching Strategy
|
||||
|
||||
**Redis Cache TTLs:**
|
||||
```typescript
|
||||
CREDENTIALS: 24 hours
|
||||
SESSION: 4 hours
|
||||
EMAIL_LIST: 5 minutes
|
||||
EMAIL_CONTENT: 15 minutes
|
||||
CALENDAR: 10 minutes
|
||||
NEWS: 15 minutes
|
||||
TASKS: 10 minutes
|
||||
MESSAGES: 2 minutes
|
||||
NOTIFICATIONS: 30 seconds
|
||||
```
|
||||
|
||||
**Memory Impact:**
|
||||
- **Low**: Data in Redis, not application memory
|
||||
- **Medium**: Large cache can consume Redis memory
|
||||
- **Low**: TTL ensures automatic cleanup
|
||||
|
||||
### 5.4 Component Memory Management
|
||||
|
||||
**Potential Memory Leaks:**
|
||||
|
||||
1. **Parole Widget** (`components/parole.tsx`):
|
||||
```typescript
|
||||
// ⚠️ RISK: Interval might not clear if component unmounts during fetch
|
||||
useEffect(() => {
|
||||
if (status === 'authenticated') {
|
||||
fetchMessages();
|
||||
const interval = setInterval(() => fetchMessages(), 30000);
|
||||
return () => clearInterval(interval); // ✅ Good, but...
|
||||
}
|
||||
}, [status]);
|
||||
```
|
||||
**Issue**: If `fetchMessages()` is async and component unmounts, state updates may occur
|
||||
|
||||
2. **Widget State:**
|
||||
- Each widget maintains its own state
|
||||
- No cleanup on unmount for pending requests
|
||||
- Potential memory leaks with large data arrays
|
||||
|
||||
3. **Event Listeners:**
|
||||
- No evidence of unregistered event listeners
|
||||
- ✅ React handles most cleanup automatically
|
||||
|
||||
### 5.5 API Route Performance
|
||||
|
||||
**Common Patterns:**
|
||||
|
||||
1. **Session Validation:**
|
||||
```typescript
|
||||
const session = await getServerSession(authOptions);
|
||||
```
|
||||
- Called on every request
|
||||
- JWT validation overhead
|
||||
- Could be optimized with middleware
|
||||
|
||||
2. **Database Queries:**
|
||||
- Prisma ORM adds overhead
|
||||
- No query optimization visible
|
||||
- Connection pooling handled by Prisma
|
||||
|
||||
3. **Redis Operations:**
|
||||
- Most routes check cache first
|
||||
- SCAN operations for pattern matching
|
||||
- Could be optimized with better key patterns
|
||||
|
||||
### 5.6 Memory Optimization Recommendations
|
||||
|
||||
**High Priority:**
|
||||
1. ✅ Fix Parole widget interval cleanup
|
||||
2. ✅ Migrate widgets to unified refresh
|
||||
3. ✅ Implement request cancellation for unmounted components
|
||||
4. ✅ Add error boundaries to prevent memory leaks
|
||||
|
||||
**Medium Priority:**
|
||||
1. ⚠️ Implement API route middleware for auth
|
||||
2. ⚠️ Optimize Redis SCAN operations
|
||||
3. ⚠️ Add request timeout handling
|
||||
4. ⚠️ Implement connection pooling for external APIs
|
||||
|
||||
**Low Priority:**
|
||||
1. ⚠️ Consider React Query for state management
|
||||
2. ⚠️ Implement virtual scrolling for large lists
|
||||
3. ⚠️ Add memory profiling tools
|
||||
|
||||
---
|
||||
|
||||
## 6. API Routes Tracing
|
||||
|
||||
### 6.1 Logging Infrastructure
|
||||
|
||||
**Logger** (`lib/logger.ts`):
|
||||
- Environment-aware (silent in production for debug/info)
|
||||
- Always logs errors
|
||||
- Simple console-based logging
|
||||
|
||||
**Limitations:**
|
||||
- ❌ No structured logging (JSON)
|
||||
- ❌ No log levels in production
|
||||
- ❌ No centralized log aggregation
|
||||
- ❌ No request tracing IDs
|
||||
|
||||
### 6.2 Current Logging Patterns
|
||||
|
||||
**API Routes:**
|
||||
- 343 `console.log/error/warn` calls across 68 files
|
||||
- Inconsistent logging patterns
|
||||
- Some routes have detailed logging, others minimal
|
||||
|
||||
**Examples:**
|
||||
|
||||
1. **Good Logging** (`app/api/missions/mission-created/route.ts`):
|
||||
```typescript
|
||||
logger.debug('Mission Created Webhook Received');
|
||||
logger.debug('Received mission-created data', { ... });
|
||||
```
|
||||
|
||||
2. **Inconsistent Logging** (`app/api/courrier/route.ts`):
|
||||
```typescript
|
||||
console.log(`[API] Received request with: ...`);
|
||||
// Mix of console.log and logger
|
||||
```
|
||||
|
||||
### 6.3 API Route Categories
|
||||
|
||||
**Authentication Routes:**
|
||||
- `/api/auth/[...nextauth]` - NextAuth handler
|
||||
- `/api/auth/refresh-keycloak-session` - Session refresh
|
||||
- `/api/auth/debug-keycloak` - Debug endpoint
|
||||
|
||||
**Email Routes (Courrier):**
|
||||
- `/api/courrier` - Email list
|
||||
- `/api/courrier/emails` - Email list (alternative)
|
||||
- `/api/courrier/[id]` - Single email
|
||||
- `/api/courrier/refresh` - Token refresh
|
||||
- `/api/courrier/session` - IMAP session
|
||||
- `/api/courrier/account` - Account management
|
||||
|
||||
**Calendar Routes:**
|
||||
- `/api/calendars` - Calendar list
|
||||
- `/api/calendars/[id]` - Single calendar
|
||||
- `/api/calendars/[id]/events` - Calendar events
|
||||
- `/api/events` - Event CRUD
|
||||
|
||||
**Notification Routes:**
|
||||
- `/api/notifications` - Notification list
|
||||
- `/api/notifications/count` - Notification count
|
||||
- `/api/notifications/[id]/read` - Mark as read
|
||||
- `/api/notifications/read-all` - Mark all as read
|
||||
|
||||
**Mission Routes:**
|
||||
- `/api/missions` - Mission list
|
||||
- `/api/missions/[missionId]` - Single mission
|
||||
- `/api/missions/upload` - File upload
|
||||
- `/api/missions/mission-created` - Webhook handler
|
||||
|
||||
### 6.4 Tracing Recommendations
|
||||
|
||||
**Immediate Improvements:**
|
||||
|
||||
1. **Request ID Tracking:**
|
||||
```typescript
|
||||
// Add to middleware or API route wrapper
|
||||
const requestId = crypto.randomUUID();
|
||||
logger.info('Request started', { requestId, path, method });
|
||||
```
|
||||
|
||||
2. **Structured Logging:**
|
||||
```typescript
|
||||
logger.info('API Request', {
|
||||
requestId,
|
||||
method,
|
||||
path,
|
||||
userId,
|
||||
duration: Date.now() - startTime,
|
||||
});
|
||||
```
|
||||
|
||||
3. **Error Tracking:**
|
||||
```typescript
|
||||
logger.error('API Error', {
|
||||
requestId,
|
||||
error: error.message,
|
||||
stack: error.stack,
|
||||
path,
|
||||
userId,
|
||||
});
|
||||
```
|
||||
|
||||
4. **Performance Monitoring:**
|
||||
```typescript
|
||||
const startTime = Date.now();
|
||||
// ... route logic
|
||||
logger.debug('API Response', {
|
||||
requestId,
|
||||
duration: Date.now() - startTime,
|
||||
statusCode,
|
||||
});
|
||||
```
|
||||
|
||||
**Advanced Tracing:**
|
||||
|
||||
1. **OpenTelemetry Integration:**
|
||||
- Distributed tracing
|
||||
- Performance metrics
|
||||
- Error tracking
|
||||
|
||||
2. **APM Tools:**
|
||||
- New Relic
|
||||
- Datadog
|
||||
- Sentry
|
||||
|
||||
3. **Custom Middleware:**
|
||||
```typescript
|
||||
// app/api/middleware.ts
|
||||
export function withTracing(handler: Function) {
|
||||
return async (req: Request, res: Response) => {
|
||||
const requestId = crypto.randomUUID();
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const result = await handler(req, res);
|
||||
logger.info('Request completed', {
|
||||
requestId,
|
||||
duration: Date.now() - startTime,
|
||||
});
|
||||
return result;
|
||||
} catch (error) {
|
||||
logger.error('Request failed', {
|
||||
requestId,
|
||||
error,
|
||||
duration: Date.now() - startTime,
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### 6.5 API Route Performance Metrics
|
||||
|
||||
**Current State:**
|
||||
- ❌ No performance metrics collected
|
||||
- ❌ No request duration tracking
|
||||
- ❌ No error rate monitoring
|
||||
- ❌ No cache hit/miss tracking
|
||||
|
||||
**Recommended Metrics:**
|
||||
1. Request duration (p50, p95, p99)
|
||||
2. Error rate by route
|
||||
3. Cache hit/miss ratio
|
||||
4. Database query count
|
||||
5. Redis operation count
|
||||
6. External API call duration
|
||||
|
||||
---
|
||||
|
||||
## 7. Critical Issues & Recommendations
|
||||
|
||||
### 7.1 Critical Issues
|
||||
|
||||
1. **Memory Leak Risk - Parole Widget**
|
||||
- Custom `setInterval` without proper cleanup
|
||||
- **Fix**: Migrate to `useUnifiedRefresh`
|
||||
|
||||
2. **Inconsistent Refresh Patterns**
|
||||
- Widgets don't use unified refresh system
|
||||
- **Fix**: Migrate all widgets to `useUnifiedRefresh`
|
||||
|
||||
3. **Cache Bypassing**
|
||||
- Widgets use `?refresh=true` by default
|
||||
- **Fix**: Use cache-first strategy
|
||||
|
||||
4. **No Request Tracing**
|
||||
- Difficult to debug production issues
|
||||
- **Fix**: Implement request ID tracking
|
||||
|
||||
5. **No Performance Monitoring**
|
||||
- No visibility into slow routes
|
||||
- **Fix**: Add performance metrics
|
||||
|
||||
### 7.2 High Priority Recommendations
|
||||
|
||||
1. ✅ Migrate all widgets to unified refresh system
|
||||
2. ✅ Fix Parole widget interval cleanup
|
||||
3. ✅ Implement request ID tracking
|
||||
4. ✅ Add performance metrics
|
||||
5. ✅ Standardize logging patterns
|
||||
|
||||
### 7.3 Medium Priority Recommendations
|
||||
|
||||
1. ⚠️ Implement API route middleware
|
||||
2. ⚠️ Optimize Redis SCAN operations
|
||||
3. ⚠️ Add error boundaries
|
||||
4. ⚠️ Implement request cancellation
|
||||
5. ⚠️ Add structured logging
|
||||
|
||||
### 7.4 Low Priority Recommendations
|
||||
|
||||
1. ⚠️ Consider React Query
|
||||
2. ⚠️ Implement virtual scrolling
|
||||
3. ⚠️ Add memory profiling
|
||||
4. ⚠️ Consider OpenTelemetry
|
||||
5. ⚠️ Add APM tooling
|
||||
|
||||
---
|
||||
|
||||
## 8. Architecture Strengths
|
||||
|
||||
### 8.1 Well-Designed Components
|
||||
|
||||
1. **Unified Refresh Manager**
|
||||
- Excellent abstraction
|
||||
- Proper deduplication
|
||||
- Clean API
|
||||
|
||||
2. **Notification Service**
|
||||
- Adapter pattern allows extension
|
||||
- Good caching strategy
|
||||
- Proper error handling
|
||||
|
||||
3. **Redis Integration**
|
||||
- Comprehensive caching
|
||||
- Proper TTL management
|
||||
- Good key naming conventions
|
||||
|
||||
4. **Token Refresh**
|
||||
- Dual storage (Redis + Prisma)
|
||||
- Proper error handling
|
||||
- Automatic refresh
|
||||
|
||||
### 8.2 Code Quality
|
||||
|
||||
- ✅ TypeScript throughout
|
||||
- ✅ Consistent component structure
|
||||
- ✅ Proper error handling in most places
|
||||
- ✅ Good separation of concerns
|
||||
|
||||
---
|
||||
|
||||
## 9. Conclusion
|
||||
|
||||
The Neah project demonstrates a well-architected Next.js application with several sophisticated systems:
|
||||
|
||||
**Strengths:**
|
||||
- Unified refresh management system
|
||||
- Comprehensive caching strategy
|
||||
- Robust authentication flow
|
||||
- Extensible notification system
|
||||
|
||||
**Areas for Improvement:**
|
||||
- Widget refresh consistency
|
||||
- Memory leak prevention
|
||||
- API route tracing
|
||||
- Performance monitoring
|
||||
|
||||
**Overall Assessment:**
|
||||
The codebase is production-ready but would benefit from the recommended improvements, particularly around widget refresh management and observability.
|
||||
|
||||
---
|
||||
|
||||
## Appendix: File Reference Map
|
||||
|
||||
### Core Services
|
||||
- `lib/services/refresh-manager.ts` - Unified refresh management
|
||||
- `lib/services/notifications/notification-service.ts` - Notification system
|
||||
- `lib/services/token-refresh.ts` - Email OAuth token refresh
|
||||
- `lib/redis.ts` - Redis caching utilities
|
||||
- `lib/logger.ts` - Logging utility
|
||||
|
||||
### Hooks
|
||||
- `hooks/use-unified-refresh.ts` - Unified refresh hook
|
||||
- `hooks/use-notifications.ts` - Notification hook
|
||||
|
||||
### Widgets
|
||||
- `components/calendar.tsx` - Calendar widget
|
||||
- `components/news.tsx` - News widget
|
||||
- `components/email.tsx` - Email widget
|
||||
- `components/parole.tsx` - Messages widget
|
||||
- `components/flow.tsx` - Tasks widget
|
||||
|
||||
### API Routes
|
||||
- `app/api/auth/options.ts` - NextAuth configuration
|
||||
- `app/api/notifications/` - Notification endpoints
|
||||
- `app/api/courrier/` - Email endpoints
|
||||
- `app/api/calendars/` - Calendar endpoints
|
||||
|
||||
---
|
||||
|
||||
*Document generated: 2024*
|
||||
*Last updated: Analysis session*
|
||||
|
||||
@ -82,6 +82,24 @@ type ExtendedJWT = {
|
||||
[key: string]: any;
|
||||
};
|
||||
|
||||
// Circuit breaker to prevent infinite refresh loops
|
||||
// Tracks last failed refresh attempt per user to implement cooldown
|
||||
const refreshCooldown = new Map<string, number>();
|
||||
const REFRESH_COOLDOWN_MS = 5000; // 5 seconds - don't retry refresh for 5s after failure
|
||||
|
||||
// Cleanup old entries periodically to prevent memory leak
|
||||
function cleanupRefreshCooldown() {
|
||||
if (refreshCooldown.size > 1000) {
|
||||
const now = Date.now();
|
||||
for (const [userId, failureTime] of refreshCooldown.entries()) {
|
||||
// Remove entries older than 10x cooldown period
|
||||
if (now - failureTime > REFRESH_COOLDOWN_MS * 10) {
|
||||
refreshCooldown.delete(userId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function refreshAccessToken(token: ExtendedJWT) {
|
||||
try {
|
||||
const response = await fetch(`${process.env.KEYCLOAK_ISSUER}/protocol/openid-connect/token`, {
|
||||
@ -330,6 +348,40 @@ export const authOptions: NextAuthOptions = {
|
||||
}
|
||||
}
|
||||
|
||||
// CIRCUIT BREAKER: If we already know the session is invalid, don't try to refresh again
|
||||
// This prevents infinite refresh loops when session is invalidated
|
||||
if (token.error === "SessionNotActive") {
|
||||
const userId = token.sub || 'unknown';
|
||||
const lastFailure = refreshCooldown.get(userId) || 0;
|
||||
const timeSinceFailure = Date.now() - lastFailure;
|
||||
|
||||
// If we recently failed, return error immediately (cooldown active)
|
||||
if (timeSinceFailure < REFRESH_COOLDOWN_MS) {
|
||||
logger.debug('Refresh cooldown active, skipping refresh attempt', {
|
||||
userId,
|
||||
timeSinceFailure,
|
||||
cooldownRemaining: REFRESH_COOLDOWN_MS - timeSinceFailure,
|
||||
});
|
||||
return {
|
||||
...token,
|
||||
accessToken: undefined,
|
||||
refreshToken: undefined,
|
||||
idToken: undefined,
|
||||
error: "SessionNotActive",
|
||||
};
|
||||
}
|
||||
|
||||
// Cooldown expired, but session still invalid - don't retry
|
||||
// Return error token to trigger sign-out
|
||||
return {
|
||||
...token,
|
||||
accessToken: undefined,
|
||||
refreshToken: undefined,
|
||||
idToken: undefined,
|
||||
error: "SessionNotActive",
|
||||
};
|
||||
}
|
||||
|
||||
// Check if token is expired and needs refresh
|
||||
// accessTokenExpires is in milliseconds
|
||||
const expiresAt = token.accessTokenExpires as number;
|
||||
@ -338,6 +390,27 @@ export const authOptions: NextAuthOptions = {
|
||||
return token;
|
||||
}
|
||||
|
||||
// CIRCUIT BREAKER: Check if we recently failed to refresh for this user
|
||||
const userId = token.sub || 'unknown';
|
||||
const lastFailure = refreshCooldown.get(userId) || 0;
|
||||
const timeSinceFailure = Date.now() - lastFailure;
|
||||
|
||||
if (timeSinceFailure < REFRESH_COOLDOWN_MS) {
|
||||
// Too soon after failure, return error token immediately
|
||||
logger.debug('Refresh cooldown active, skipping refresh attempt', {
|
||||
userId,
|
||||
timeSinceFailure,
|
||||
cooldownRemaining: REFRESH_COOLDOWN_MS - timeSinceFailure,
|
||||
});
|
||||
return {
|
||||
...token,
|
||||
error: "SessionNotActive",
|
||||
accessToken: undefined,
|
||||
refreshToken: undefined,
|
||||
idToken: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// Token expired or invalidated, try to refresh
|
||||
if (!token.refreshToken) {
|
||||
console.log("No refresh token available, cannot refresh");
|
||||
@ -352,29 +425,50 @@ export const authOptions: NextAuthOptions = {
|
||||
|
||||
const refreshedToken = await refreshAccessToken(token);
|
||||
|
||||
// If refresh failed due to invalid session, clear the token to force re-authentication
|
||||
// If refresh failed due to invalid session, record failure and set cooldown
|
||||
if (refreshedToken.error === "SessionNotActive") {
|
||||
console.log("Keycloak session invalidated, clearing token to force re-authentication");
|
||||
refreshCooldown.set(userId, Date.now());
|
||||
cleanupRefreshCooldown(); // Prevent memory leak
|
||||
|
||||
logger.info("Keycloak session invalidated, setting cooldown", {
|
||||
userId,
|
||||
cooldownMs: REFRESH_COOLDOWN_MS,
|
||||
});
|
||||
|
||||
// Return a token that will cause session callback to return null
|
||||
return {
|
||||
...refreshedToken,
|
||||
accessToken: undefined,
|
||||
refreshToken: undefined,
|
||||
idToken: undefined,
|
||||
error: "SessionNotActive",
|
||||
};
|
||||
}
|
||||
|
||||
// If refresh failed with invalid_grant (token not active), also clear tokens
|
||||
// If refresh failed with invalid_grant (token not active), also record failure
|
||||
if (refreshedToken.error === "RefreshAccessTokenError" && !refreshedToken.accessToken) {
|
||||
console.log("Refresh token invalid, clearing session to force re-authentication");
|
||||
refreshCooldown.set(userId, Date.now());
|
||||
cleanupRefreshCooldown();
|
||||
|
||||
logger.info("Refresh token invalid, setting cooldown", {
|
||||
userId,
|
||||
cooldownMs: REFRESH_COOLDOWN_MS,
|
||||
});
|
||||
|
||||
return {
|
||||
...refreshedToken,
|
||||
accessToken: undefined,
|
||||
refreshToken: undefined,
|
||||
idToken: undefined,
|
||||
error: "SessionNotActive",
|
||||
};
|
||||
}
|
||||
|
||||
// Success - clear any previous failure record
|
||||
if (refreshedToken.accessToken) {
|
||||
refreshCooldown.delete(userId);
|
||||
}
|
||||
|
||||
return refreshedToken;
|
||||
},
|
||||
async session({ session, token }) {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user