#!/bin/bash

# Pinzón Aggregator v6.3 - Con soporte multi-protocolo y múltiples formatos de fecha
# gemini://caleb.subnet.city
# Licencia: UNLICENSE

FEEDS_FILE="feeds.txt"
OUTPUT_FILE="index"  # Nombre sin extensión, se añade .gmi automáticamente
INDEX_FILE="${OUTPUT_FILE}.gmi"
TEMP_DIR="/tmp/gemini_parser"
MAX_ENTRIES=84  # Número máximo de entradas a mostrar

# Configuración de caché
CACHE_DIR="$HOME/.pinzon"
CACHE_FILE="$CACHE_DIR/pinzon.cache"

# Configuración de Lagrange - Busca en múltiples ubicaciones
if [[ -n "$LAGRANGE_PATH" ]]; then
    # Usar la ruta especificada por el usuario
    LAGRANGE_PATH="$LAGRANGE_PATH"
elif command -v lagrange >/dev/null 2>&1; then
    # Buscar en PATH
    LAGRANGE_PATH="lagrange"
elif [[ -f "$HOME/lagrange/lagrange" ]]; then
    # Buscar en ubicación común
    LAGRANGE_PATH="$HOME/lagrange/lagrange"
else
    # Usar valor por defecto
    LAGRANGE_PATH="lagrange"
fi

USE_LAGRANGE_ONLY=1  # Usar solo Lagrange para todas las descargas

# Configuración de reintentos
MAX_RETRIES=3  # Número máximo de reintentos por feed
RETRY_DELAY=2  # Segundos entre reintentos

# Verificar modo debug
DEBUG=0
if [[ "$1" == "--debug" ]]; then
    DEBUG=1
    echo "=== MODO DEBUG ACTIVADO ==="
fi

mkdir -p "$TEMP_DIR"

# Arrays para almacenar las entradas
declare -a entries
declare -A blog_names
declare -A blog_favicons

# Colores para output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

# Obtener años actual y anterior
CURRENT_YEAR=$(date +%Y)
PREVIOUS_YEAR=$((CURRENT_YEAR - 1))
TODAY=$(date +%Y-%m-%d)

# ============================
# SISTEMA DE LOG DE ERRORES
# ============================

ERROR_LOG="error.log"

# Función para registrar errores
log_error() {
    local error_message="$1"
    local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    echo "[$timestamp] ERROR: $error_message" >> "$ERROR_LOG"
}

# ============================
# SISTEMA DE CACHÉ SIMPLIFICADO
# ============================

# Función para inicializar el caché
init_cache() {
    mkdir -p "$CACHE_DIR"
    if [[ ! -f "$CACHE_FILE" ]]; then
        touch "$CACHE_FILE"
        echo -e "${GREEN}✓ Caché inicializado: $CACHE_FILE${NC}"
    fi
}

# Función para obtener la última entrada del caché
get_cached_last_entry() {
    local url="$1"
    
    [[ ! -f "$CACHE_FILE" ]] && return 1
    
    while IFS='|' read -r cached_url cached_last_entry; do
        if [[ "$cached_url" == "$url" ]]; then
            echo "$cached_last_entry"
            return 0
        fi
    done < "$CACHE_FILE"
    
    return 1
}

# Función para actualizar el caché
update_cache() {
    local url="$1"
    local last_entry="$2"
    
    # Primero eliminar entrada existente si la hay
    remove_from_cache "$url"
    
    # Agregar nueva entrada al caché
    echo "${url}|${last_entry}" >> "$CACHE_FILE"
    echo -e "${GREEN}✓ Caché actualizado: $url${NC}"
}

# Función para eliminar del caché
remove_from_cache() {
    local url="$1"
    local temp_file=$(mktemp)
    
    [[ ! -f "$CACHE_FILE" ]] && return
    
    grep -v "^${url}|" "$CACHE_FILE" > "$temp_file" 2>/dev/null
    mv "$temp_file" "$CACHE_FILE"
}

echo -e "${YELLOW}Filtrando entradas de los años: $PREVIOUS_YEAR y $CURRENT_YEAR${NC}"
echo -e "${YELLOW}Fecha máxima aceptada: $TODAY (hoy)${NC}"

# Función específica para descargar favicons usando Lagrange CON REINTENTOS
fetch_favicon() {
    local favicon_url="$1"
    local output_file="$2"
    local retry_count=0
    
    # VERIFICAR SI LAGRANGE ESTÁ DISPONIBLE
    if ! command -v "$LAGRANGE_PATH" >/dev/null 2>&1; then
        return 1
    fi
    
    while [[ $retry_count -lt $MAX_RETRIES ]]; do
        echo -e "${BLUE}Descargando favicon: $favicon_url (Intento $((retry_count + 1))/$MAX_RETRIES)${NC}" >&2
        
        # Usar Lagrange para descargar el favicon
        if "$LAGRANGE_PATH" -d "$favicon_url" > "$output_file" 2>/dev/null; then
            # Verificar si se recibió contenido
            if [[ ! -s "$output_file" ]]; then
                echo -e "${YELLOW}Favicon vacío, reintentando...${NC}" >&2
            else
                # CON LAGRANGE, EL CONTENIDO VIENE DIRECTO - NO HAY LÍNEA DE ESTADO GEMINI
                # Simplemente verificar que el archivo tenga contenido válido
                local first_line=$(head -1 "$output_file" 2>/dev/null | tr -d '\r\n' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
                
                if [[ -n "$first_line" ]]; then
                    echo -e "${GREEN}✓ Favicon descargado: '$first_line'${NC}" >&2
                    return 0
                else
                    echo -e "${YELLOW}Favicon sin contenido válido, reintentando...${NC}" >&2
                fi
            fi
        else
            echo -e "${YELLOW}Error descargando favicon, reintentando...${NC}" >&2
        fi
        
        # Incrementar contador de reintentos
        ((retry_count++))
        
        # Esperar antes del siguiente reintento (excepto en el último intento)
        if [[ $retry_count -lt $MAX_RETRIES ]]; then
            echo -e "${BLUE}Esperando $RETRY_DELAY segundos antes del reintento...${NC}" >&2
            sleep $RETRY_DELAY
        fi
    done
    
    # Si llegamos aquí, todos los reintentos fallaron
    echo -e "${RED}✗ Error: Todos los $MAX_RETRIES intentos fallaron para favicon: $favicon_url${NC}" >&2
    return 1
}

# Función para obtener favicon desde el directorio raíz del blog - MEJORADA CON VALIDACIÓN
get_blog_favicon() {
    local blog_url="$1"
    local blog_name="$2"
    local favicon_override="$3"
    
    # Si hay favicon override, usarlo directamente
    if [[ -n "$favicon_override" ]]; then
        blog_favicons["$blog_url"]="$favicon_override"
        echo "$favicon_override"
        return 0
    fi
    
    # Si ya tenemos el favicon cacheado, devolverlo
    if [[ -n "${blog_favicons[$blog_url]}" ]]; then
        echo "${blog_favicons[$blog_url]}"
        return 0
    fi
    
    # Asignar favicons por protocolo
    if [[ "$blog_url" =~ ^gopher:// ]]; then
        blog_favicons["$blog_url"]="📂"
        echo "📂"
        return 0
    elif [[ "$blog_url" =~ ^spartan:// ]]; then
        blog_favicons["$blog_url"]="🗡"
        echo "🗡" 
        return 0
    elif [[ "$blog_url" =~ ^gemini:// ]]; then
        # Solo buscar favicon para protocolos Gemini
        # Extraer la URL base del blog (directorio raíz)
        local clean_url="${blog_url#gemini://}"
        local host_port="${clean_url%%/*}"
        local favicon_url="gemini://$host_port/favicon.txt"
        
        # Solo mostrar mensaje de búsqueda en la terminal, no en el output
        echo -e "${BLUE}Buscando favicon: $favicon_url${NC}" >&2
        
        # Descargar favicon.txt
        local favicon_file="$TEMP_DIR/favicon_$(echo "$host_port" | tr '/' '_').txt"
        
        if fetch_favicon "$favicon_url" "$favicon_file"; then
            # Leer el contenido completo del favicon.txt
            local favicon_content=$(cat "$favicon_file" 2>/dev/null | tr -d '\r\n' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
            
            # VERIFICACIÓN MEJORADA: Solo aceptar si tiene exactamente 1 carácter (emoji)
            if [[ -n "$favicon_content" && ${#favicon_content} -eq 1 ]]; then
                # Solo mostrar mensaje de éxito en la terminal, no en el output
                echo -e "${GREEN}✓ Favicon encontrado: '$favicon_content'${NC}" >&2
                blog_favicons["$blog_url"]="$favicon_content"
                echo "$favicon_content"
                return 0
            else
                # Si tiene más de 1 carácter o está vacío, usar emoji por defecto
                echo -e "${YELLOW}✗ Favicon inválido (longitud: ${#favicon_content}), usando emoji por defecto${NC}" >&2
            fi
        else
            echo -e "${YELLOW}✗ No se pudo descargar favicon.txt, usando emoji por defecto${NC}" >&2
        fi
    fi
    
    # CORRECCIÓN: Si no se encuentra favicon.txt válido o no es Gemini, usar emoji por defecto: 📝
    local default_favicon="📝"
    blog_favicons["$blog_url"]="$default_favicon"
    echo "$default_favicon"
}

# Función para verificar si una fecha es válida (hasta hoy) - NUEVA VERSIÓN CORREGIDA
is_valid_date() {
    local date="$1"
    
    # Verificar formato básico AAAA-MM-DD
    if [[ ! "$date" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then
        return 1  # Formato inválido
    fi
    
    # Extraer año, mes y día
    local year="${date:0:4}"
    local month="${date:5:2}"
    local day="${date:8:2}"
    
    # CORRECCIÓN: Quitar ceros iniciales para evitar problemas octales
    # Esto convierte "08" en "8" y "09" en "9", pero deja "10", "11", "12" intactos
    month=$((10#$month))
    day=$((10#$day))
    
    # Verificar rango básico de mes y día
    if [[ "$month" -lt 1 || "$month" -gt 12 ]] || [[ "$day" -lt 1 || "$day" -gt 31 ]]; then
        return 1  # Fecha inválida
    fi
    
    # Verificar si la fecha es posterior a hoy
    if [[ "$date" > "$TODAY" ]]; then
        # Fecha futura - NO VÁLIDA
        return 2  # Código especial para fecha futura
    fi
    
    # Verificar si el año está dentro del rango permitido (PREVIOUS_YEAR a CURRENT_YEAR)
    if [[ "$year" -ge "$PREVIOUS_YEAR" ]] && [[ "$year" -le "$CURRENT_YEAR" ]]; then
        return 0  # Válida y dentro del rango de años
    else
        return 1  # Fuera del rango de años
    fi
}

# Función para convertir enlace relativo a absoluto CORREGIDA
convert_to_absolute_url() {
    local relative_link="$1"
    local base_url="$2"
    
    # Si ya es URL absoluta, devolver tal cual
    if [[ "$relative_link" =~ ^gemini:// ]] || [[ "$relative_link" =~ ^gopher:// ]] || [[ "$relative_link" =~ ^spartan:// ]] || [[ "$relative_link" =~ ^https?:// ]]; then
        echo "$relative_link"
        return 0
    fi
    
    # Si es enlace Gemini-relativo (//host/path), convertirlo a gemini://
    if [[ "$relative_link" =~ ^// ]]; then
        echo "gemini:${relative_link}"
        return 0
    fi
    
    # Extraer partes de la base_url CORRECTAMENTE
    local clean_base="${base_url#*://}"
    local base_host_port="${clean_base%%/*}"
    local base_path="/${clean_base#*/}"
    
    # Si base_path es solo "/", mantenerlo
    if [[ "$base_path" == "/$base_host_port" ]]; then
        base_path="/"
    fi
    
    # Si el enlace relativo empieza con /, es desde la raíz del dominio
    if [[ "$relative_link" =~ ^/ ]]; then
        local protocol="${base_url%://*}"
        # CORRECCIÓN: Evitar triple barra
        local result="${protocol}://${base_host_port}${relative_link}"
        echo "$result"
        return 0
    fi
    
    # Para enlaces relativos (sin / al inicio), usar base_path como directorio base
    local base_dir="$base_path"
    
    # Si base_dir no termina con /, añadirlo (excepto si es "/")
    if [[ "$base_dir" != "/" ]] && [[ ! "$base_dir" =~ /$ ]]; then
        base_dir="$base_dir/"
    fi
    
    # Construir URL absoluta
    local result=""
    local protocol="${base_url%://*}"
    
    # CORRECCIÓN: Manejar correctamente las barras
    if [[ "$base_dir" == "/" ]]; then
        # Si está en raíz: protocolo://host/enlace
        result="${protocol}://${base_host_port}/${relative_link}"
    else
        # Si está en subdirectorio: protocolo://host/dir/enlace
        # Asegurarse de que no haya dobles barras
        base_dir=$(echo "$base_dir" | sed 's|//|/|g')
        relative_link=$(echo "$relative_link" | sed 's|^/||')
        result="${protocol}://${base_host_port}${base_dir}${relative_link}"
    fi
    
    # CORRECCIÓN FINAL: Eliminar triple barra si existe
    result=$(echo "$result" | sed 's|gemini:///|gemini://|g')
    result=$(echo "$result" | sed 's|gopher:///|gopher://|g')
    result=$(echo "$result" | sed 's|spartan:///|spartan://|g')
    
    echo "$result"
}

# Función para limpiar URL base (quitar .gmi si es un archivo) CORREGIDA
clean_base_url() {
    local url="$1"
    
    # Si la URL termina con .gmi, quitar solo el archivo, mantener el directorio
    if [[ "$url" =~ \.gmi$ ]]; then
        # Extraer host y path
        local clean_url="${url#*://}"
        local host_port="${clean_url%%/*}"
        local path="/${clean_url#*/}"
        
        # Si el path es solo el nombre del archivo (sin /), entonces está en raíz
        if [[ "$path" == "/$host_port" ]]; then
            # Está en la raíz: protocolo://host/archivo.gmi → protocolo://host/
            local protocol="${url%://*}"
            echo "${protocol}://$host_port/"
        else
            # Está en un subdirectorio: protocolo://host/dir/archivo.gmi → protocolo://host/dir/
            local dir_path=$(dirname "$path")
            if [[ "$dir_path" == "." ]]; then
                dir_path="/"
            elif [[ "$dir_path" != "/" ]]; then
                dir_path="$dir_path/"
            fi
            local protocol="${url%://*}"
            echo "${protocol}://$host_port$dir_path"
        fi
    else
        echo "$url"
    fi
}

# Función para limpiar título de separadores comunes
clean_title() {
    local title="$1"
    
    # Eliminar separadores comunes al inicio del título
    title=$(echo "$title" | sed -E 's/^[[:space:]]*([:|\\-][[:space:]]*)+//')
    title=$(echo "$title" | sed -E 's/^[[:space:]]*//')  # Eliminar espacios iniciales
    title=$(echo "$title" | sed -E 's/[[:space:]]*$//')  # Eliminar espacios finales
    
    echo "$title"
}

# Función para descargar contenido usando exclusivamente Lagrange CON REINTENTOS
fetch_content() {
    local url="$1"
    local output_file="$2"
    local retry_count=0
    
    while [[ $retry_count -lt $MAX_RETRIES ]]; do
        echo -e "${BLUE}URL completa: $url (Intento $((retry_count + 1))/$MAX_RETRIES)${NC}"
        
        # VERIFICAR SI LAGRANGE ESTÁ DISPONIBLE
        if ! command -v "$LAGRANGE_PATH" >/dev/null 2>&1; then
            echo -e "${RED}✗ Lagrange no está disponible en: $LAGRANGE_PATH${NC}"
            echo -e "${YELLOW}Instala Lagrange o configura LAGRANGE_PATH con la ruta correcta${NC}"
            return 1
        fi
    
        # USAR LAGRANGE PARA TODOS LOS PROTOCOLOS
        echo -e "${YELLOW}Usando Lagrange para descargar contenido...${NC}"
    
        if "$LAGRANGE_PATH" -d "$url" > "$output_file" 2>/dev/null; then
            # Verificar si Lagrange devolvió contenido
            if [[ -s "$output_file" ]]; then
                echo -e "${GREEN}✓ Descarga Lagrange exitosa${NC}"
            
                # Para Gemini, limpiar la respuesta si es necesario
                if [[ "$url" =~ ^gemini:// ]]; then
                    # Verificar si es una respuesta Gemini con código de estado
                    local first_line=$(head -1 "$output_file" 2>/dev/null)
                    if echo "$first_line" | grep -q "^2[0-9]"; then
                        # Es una respuesta Gemini válida, quitar la línea de estado
                        tail -n +2 "$output_file" > "${output_file}.content"
                        mv "${output_file}.content" "$output_file"
                        echo -e "${GREEN}✓ Respuesta Gemini limpiada${NC}"
                    fi
                fi
            
                return 0
            else
                echo -e "${YELLOW}⚠ Lagrange no devolvió contenido, reintentando...${NC}"
            fi
        else
            echo -e "${YELLOW}⚠ Error ejecutando Lagrange, reintentando...${NC}"
        fi
    
        # Incrementar contador de reintentos
        ((retry_count++))
    
        # Esperar antes del siguiente reintento (excepto en el último intento)
        if [[ $retry_count -lt $MAX_RETRIES ]]; then
            echo -e "${BLUE}Esperando $RETRY_DELAY segundos antes del reintento...${NC}"
            sleep $RETRY_DELAY
        fi
    done
    
    # Si llegamos aquí, todos los reintentos fallaron
    echo -e "${RED}✗ Error: Todos los $MAX_RETRIES intentos fallaron para: $url${NC}"
    return 1
}

# ============================
# NUEVO: FUNCIONES DE PARSEO MEJORADAS
# ============================

# Función para parsear fechas en múltiples formatos - MEJORADA
parse_date() {
    local date_str="$1"
    
    # DEBUG
    if [[ $DEBUG -eq 1 ]]; then
        echo -e "${BLUE}DEBUG: Parseando fecha: '$date_str'${NC}" >&2
    fi
    
    # 1. Intentar formato AAAA-MM-DD
    if [[ "$date_str" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then
        echo "$date_str"
        return
    fi
    
    # 2. Intentar formato DD-MM-AAAA
    if [[ "$date_str" =~ ^[0-9]{2}-[0-9]{2}-[0-9]{4}$ ]]; then
        local day=$(echo "$date_str" | cut -d'-' -f1)
        local month=$(echo "$date_str" | cut -d'-' -f2)
        local year=$(echo "$date_str" | cut -d'-' -f3)
        echo "${year}-${month}-${day}"
        return
    fi
    
    # 3. NUEVO: Intentar formato YYMMDD (6 dígitos)
    if [[ "$date_str" =~ ^[0-9]{6}$ ]]; then
        local year_two_digit="${date_str:0:2}"
        local month="${date_str:2:2}"
        local day="${date_str:4:2}"
        
        # Convertir año de 2 a 4 dígitos (asumir siglo 20 para <= 25, 21 para > 25)
        if [[ "$year_two_digit" -le "25" ]]; then
            local year="20${year_two_digit}"
        else
            local year="19${year_two_digit}"
        fi
        
        # CORRECCIÓN: Quitar ceros iniciales para evitar problemas octales
        month=$((10#$month))
        day=$((10#$day))
        
        # Validar fecha básica
        if [[ "$month" -ge 1 && "$month" -le 12 ]] && [[ "$day" -ge 1 && "$day" -le 31 ]]; then
            # Reformatar con ceros iniciales si es necesario
            printf "%04d-%02d-%02d\n" "$year" "$month" "$day"
            return
        fi
    fi
    
    # 4. NUEVO: Intentar formato AAAAjDDD (fecha juliana/estelar)
    if [[ "$date_str" =~ ^[0-9]{4}j[0-9]{3}$ ]]; then
        local year="${date_str:0:4}"
        local julian_day="${date_str:5:3}"
        
        # Validar día juliano (1-366)
        if [[ "$julian_day" -ge 1 && "$julian_day" -le 366 ]]; then
            # Convertir día juliano a fecha normal
            local converted_date=$(convert_julian_to_gregorian "$year" "$julian_day")
            if [[ -n "$converted_date" ]]; then
                echo "$converted_date"
                return
            fi
        fi
    fi
    
    # Si no coincide, devolver vacío
    echo ""
}

# Función para convertir día juliano a fecha gregoriana
convert_julian_to_gregorian() {
    local year="$1"
    local julian_day="$2"
    
    # Usar el comando date para conversión (más preciso)
    if command -v date >/dev/null 2>&1; then
        # Calcular fecha desde día juliano
        local converted=$(date -d "${year}-01-01 +$((julian_day - 1)) days" "+%Y-%m-%d" 2>/dev/null)
        if [[ -n "$converted" ]]; then
            echo "$converted"
            return
        fi
    fi
    
    # Fallback: algoritmo simple (para sistemas sin GNU date)
    local months=(31 28 31 30 31 30 31 31 30 31 30 31)
    
    # Año bisiesto
    if [[ $((year % 4)) -eq 0 && ( $((year % 100)) -ne 0 || $((year % 400)) -eq 0 ) ]]; then
        months[1]=29
    fi
    
    local month=1
    local day_accumulated=0
    
    for days_in_month in "${months[@]}"; do
        if [[ $julian_day -le $((day_accumulated + days_in_month)) ]]; then
            local day=$((julian_day - day_accumulated))
            printf "%04d-%02d-%02d\n" "$year" "$month" "$day"
            return
        fi
        day_accumulated=$((day_accumulated + days_in_month))
        month=$((month + 1))
    done
    
    # Si llegamos aquí, error
    echo ""
}

# Función para detectar y parsear XML/Atom feeds CORREGIDA (compatible con todas las versiones de awk)
parse_atom_feed() {
    local content_file="$1"
    local blog_name="$2"
    local blog_url="$3"
    local favicon_override="$4"
    
    echo -e "\n${GREEN}Detectado feed Atom XML - Parseando...${NC}"
    
    # Verificar más específicamente si es un feed Atom
    if ! head -10 "$content_file" | grep -q -E "<feed[^>]*xmlns=[\"']http://www.w3.org/2005/Atom[\"']"; then
        echo -e "${YELLOW}Advertencia: El contenido parece XML pero no es un feed Atom estándar${NC}"
    fi
    
    # Obtener favicon para este blog
    local favicon=$(get_blog_favicon "$blog_url" "$blog_name" "$favicon_override")
    
    # Almacenar nombre del blog
    blog_names["$blog_url"]="$blog_name"
    
    local entry_count=0
    local filtered_count=0
    local future_count=0
    
    # Parsear el XML con awk (método compatible con todas las versiones)
    while IFS= read -r entry_data; do
        if [[ -n "$entry_data" ]]; then
            IFS='|' read -r date title entry_link <<< "$entry_data"
            
            # Verificar si la fecha es válida (hasta hoy)
            local valid_result
            is_valid_date "$date"
            valid_result=$?
            
            if [[ $valid_result -eq 0 ]]; then
                # Crear entrada en el formato: fecha|blog_name|title|entry_url|favicon
                local entry="$date|$blog_name|$title|$entry_link|$favicon"
                entries+=("$entry")
                ((entry_count++))
                
                echo "✓ $date - $title"
                echo "  URL: $entry_link"
            elif [[ $valid_result -eq 2 ]]; then
                # Fecha futura
                ((future_count++))
                # Registrar en error.log
                log_error "El Gemlog $blog_url contiene una entrada con fecha futura, no procesada - ($title - $date)"
                if [[ $DEBUG -eq 1 ]]; then
                    echo -e "${RED}DEBUG: Entrada futura ignorada: $date - $title${NC}"
                fi
            else
                # Fecha antigua o inválida
                ((filtered_count++))
                if [[ $DEBUG -eq 1 ]]; then
                    echo -e "${YELLOW}DEBUG: Filtrada (fecha antigua o inválida): $date - $title${NC}"
                fi
            fi
        fi
    done < <(awk '
    BEGIN {
        in_entry = 0
        entry_count = 0
        title = ""
        published = ""
        updated = "" 
        link = ""
        blog_url = "'"$blog_url"'"
    }
    
    /<entry>/ {
        in_entry = 1
        title = ""
        published = ""
        updated = "" 
        link = ""
    }
    
    /<title[^>]*>/ && in_entry {
        # Método compatible: extraer contenido entre <title> y </title>
        line = $0
        if (match(line, /<title[^>]*>/)) {
            start = RSTART + RLENGTH
            rest = substr(line, start)
            if (match(rest, /<\/title>/)) {
                title = substr(rest, 1, RSTART - 1)
            } else {
                title = rest
            }
        }
        # Limpiar espacios
        gsub(/^[ \t]+|[ \t]+$/, "", title)
        # Limpiar CDATA si existe
        gsub(/<!\[CDATA\[|\]\]>/, "", title)
    }
    
    /<published>/ && in_entry {
        line = $0
        if (match(line, /<published>/)) {
            start = RSTART + RLENGTH
            rest = substr(line, start)
            if (match(rest, /<\/published>/)) {
                published = substr(rest, 1, RSTART - 1)
            } else {
                published = rest
            }
        }
    }
    
    /<updated>/ && in_entry && published == "" {
        line = $0
        if (match(line, /<updated>/)) {
            start = RSTART + RLENGTH
            rest = substr(line, start)
            if (match(rest, /<\/updated>/)) {
                published = substr(rest, 1, RSTART - 1)
            }
            # CORRECCIÓN: Eliminado el "else" innecesario
        }
    }
    
    /<link[^>]*href=/ && in_entry && link == "" {
        line = $0
        if (match(line, /href="/)) {
            start = RSTART + 6
            rest = substr(line, start)
            if (match(rest, /"/)) {
                link = substr(rest, 1, RSTART - 1)
                # Si es enlace Gemini-relativo (//host/path), convertirlo
                if (link ~ /^\/\//) {
                    link = "gemini:" link
                }
            }
        }
    }
    
    /<\/entry>/ {
        if (in_entry && title != "" && published != "") {
            # Extraer solo la fecha (AAAA-MM-DD)
            if (match(published, /[0-9]{4}-[0-9]{2}-[0-9]{2}/)) {
                date = substr(published, RSTART, 10)
            } else {
                # Si no coincide el patrón, usar los primeros 10 caracteres
                date = substr(published, 1, 10)
            }
            
            # Si no hay enlace, usar la URL del blog
            if (link == "") {
                link = blog_url
            }
            
            # Escapar pipes en el título para evitar conflictos
            gsub(/\|/, " ", title)
            
            print date "|" title "|" link
            entry_count++
        }
        in_entry = 0
        title = ""
        published = ""
        link = ""
    }
    
    END {
        # Debug: descomentar para ver número de entradas procesadas
        # print "Total entries processed: " entry_count > "/dev/stderr"
    }
    ' "$content_file")
    
    if [[ $entry_count -eq 0 ]]; then
        echo -e "${YELLOW}No se encontraron entradas recientes en el feed Atom${NC}"
    else
        echo -e "${GREEN}Encontradas $entry_count entradas recientes en el feed Atom${NC}"
    fi
    
    if [[ $filtered_count -gt 0 ]]; then
        echo -e "${YELLOW}Ignorando $filtered_count entradas anteriores a $PREVIOUS_YEAR o inválidas${NC}"
    fi
    
    if [[ $future_count -gt 0 ]]; then
        echo -e "${RED}⚠ Ignorando $future_count entradas con fechas futuras${NC}"
    fi
}

# Función para encontrar la última entrada de un feed - CORREGIDA
find_last_entry() {
    local temp_output="$1"
    local blog_name="$2"
    
    local last_date=""
    local last_title=""
    
    # Buscar la última entrada en el output del procesamiento
    while IFS= read -r line; do
        if echo "$line" | grep -q "✓ [0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}"; then
            # Extraer fecha y título
            local date=$(echo "$line" | grep -oE "[0-9]{4}-[0-9]{2}-[0-9]{2}")
            local title=$(echo "$line" | sed -E "s/.*✓ [0-9]{4}-[0-9]{2}-[0-9]{2} - //")
            
            if [[ -n "$date" ]]; then
                # Si es la primera fecha o más reciente que la actual
                if [[ -z "$last_date" ]] || [[ "$date" > "$last_date" ]]; then
                    last_date="$date"
                    last_title="$title"
                fi
            fi
        fi
    done < "$temp_output"
    
    if [[ -n "$last_date" && -n "$last_title" ]]; then
        echo "${last_date}|${last_title}"
    else
        echo ""
    fi
}

# Función para extraer entradas con fechas - MEJORADA CON ASOCIACIÓN CORRECTA DE ENLACES
extract_entries_with_dates() {
    local content_file="$1"
    local blog_name="$2"
    local blog_url="$3"
    local favicon_override="$4"
    
    echo -e "\n${GREEN}Extrayendo entradas de: $blog_name${NC}"
    echo -e "${BLUE}Formatos soportados: AAAA-MM-DD, DD-MM-AAAA, YYMMDD, AAAAjDDD${NC}"
    echo -e "${BLUE}Fecha máxima aceptada: $TODAY${NC}"
    
    # Mostrar información del favicon
    if [[ -n "$favicon_override" ]]; then
        echo -e "${BLUE}Usando favicon personalizado: $favicon_override${NC}"
    fi
    
    # DEBUG: Mostrar contenido crudo si está activado
    if [[ $DEBUG -eq 1 ]]; then
        echo -e "${YELLOW}=== DEBUG: CONTENIDO CRUDO (primeras 10 líneas) ===${NC}"
        head -10 "$content_file" | while IFS= read -r line; do
            echo "DEBUG: |$line|"
        done
        echo -e "${YELLOW}=== FIN DEBUG ===${NC}"
    fi
    
    # Verificar si es un feed XML/Atom
    local first_lines=$(head -5 "$content_file" 2>/dev/null)
    if echo "$first_lines" | grep -q -E "<feed[^>]*>|<entry>"; then
        parse_atom_feed "$content_file" "$blog_name" "$blog_url" "$favicon_override"
        return 0
    fi
    
    # Si no es XML, proceder con el parsing normal
    local clean_blog_url=$(clean_base_url "$blog_url")
    
    # Obtener favicon para este blog (solo el emoji, sin mensajes)
    local favicon=$(get_blog_favicon "$blog_url" "$blog_name" "$favicon_override")
    
    # Almacenar nombre del blog para usar después
    blog_names["$blog_url"]="$blog_name"
    
    local entry_count=0
    local filtered_count=0
    local future_count=0
    
    # PRIMERO: Recolectar TODOS los enlaces con sus líneas de contexto
    declare -a link_lines
    declare -A link_descriptions
    local line_number=0
    
    while IFS= read -r line; do
        ((line_number++))
        if echo "$line" | grep -q "^=>"; then
            # CORRECCIÓN: Manejo robusto de espacios/tabs después del =>
            
            # Eliminar el '=>' inicial y cualquier espacio/tab después
            local cleaned_line=$(echo "$line" | sed 's/^=>[[:space:]]*//')
            
            # También limpiar caracteres especiales no imprimibles
            cleaned_line=$(echo "$cleaned_line" | tr -s ' ' ' ')  # Colapsar múltiples espacios
            cleaned_line=$(echo "$cleaned_line" | sed 's/\xC2\xA0/ /g')  # Reemplazar espacios no-break
            
            # Obtener la URL (primer campo, hasta el primer espacio/tab)
            local link_part=$(echo "$cleaned_line" | awk '{print $1}')
            
            # Obtener la descripción - FORMA MÁS SEGURA que evita problemas con sed
            # Primero, si la línea contiene más de un "campo" (palabras)
            local word_count=$(echo "$cleaned_line" | wc -w)
            if [[ $word_count -gt 1 ]]; then
                # Usar awk para obtener todo excepto el primer campo
                local desc=$(echo "$cleaned_line" | awk '{$1=""; print $0}' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
            else
                local desc=""
            fi
            
            # DEBUG: Mostrar enlaces procesados
            if [[ $DEBUG -eq 1 ]]; then
                echo -e "${BLUE}DEBUG: Línea $line_number -> URL: '$link_part' | Desc: '$desc'${NC}" >&2
            fi
            
            # Convertir enlace relativo a absoluto usando la URL limpia
            link_part=$(convert_to_absolute_url "$link_part" "$clean_blog_url")
            
            # IGNORAR ENLACES QUE SEAN FAVICONS
            if [[ "$link_part" =~ favicon\.(ico|txt|gmi)$ ]] || [[ "$desc" =~ [Ff]avicon ]]; then
                echo -e "${BLUE}Ignorando enlace favicon: $link_part${NC}" >&2
                continue
            fi
            
            # Almacenar enlace con su número de línea y descripción
            link_lines["$line_number"]="$link_part"
            if [[ -n "$desc" ]]; then
                link_descriptions["$line_number"]="$desc"
            fi
        fi
    done < "$content_file"
    
    # SEGUNDO: Buscar entradas por fecha y encontrar el enlace MÁS CERCANO
    line_number=0
    while IFS= read -r line; do
        ((line_number++))
        local date_found=""
        local title=""
        local format_used=""
        
        # FORMATO 1: AAAA-MM-DD
        if echo "$line" | grep -qE "([0-9]{4}-[0-9]{2}-[0-9]{2})"; then
            date_found=$(echo "$line" | grep -oE "[0-9]{4}-[0-9]{2}-[0-9]{2}" | head -1)
            format_used="AAAA-MM-DD"
            
            # Extraer título
            title=$(echo "$line" | sed -E "s/.*[0-9]{4}-[0-9]{2}-[0-9]{2}[[:space:]]*//")
            
        # FORMATO 2: DD-MM-AAAA  
        elif echo "$line" | grep -qE "([0-9]{2}-[0-9]{2}-[0-9]{4})"; then
            date_found=$(echo "$line" | grep -oE "[0-9]{2}-[0-9]{2}-[0-9]{4}" | head -1)
            format_used="DD-MM-AAAA"
            
            # Extraer título
            title=$(echo "$line" | sed -E "s/.*[0-9]{2}-[0-9]{2}-[0-9]{4}[[:space:]]*//")
            
        # FORMATO 3: YYMMDD (6 dígitos)
        elif echo "$line" | grep -qE "(^|[[:space:]])[0-9]{6}($|[[:space:]])"; then
            date_found=$(echo "$line" | grep -oE "(^|[[:space:]])[0-9]{6}($|[[:space:]])" | head -1 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
            format_used="YYMMDD"
            
            # Extraer título (todo después de los 6 dígitos)
            title=$(echo "$line" | sed -E "s/.*[0-9]{6}[[:space:]]*//")
            
        # FORMATO 4: AAAAjDDD (fecha juliana)
        elif echo "$line" | grep -qE "\[?[0-9]{4}j[0-9]{3}\]?"; then
            date_found=$(echo "$line" | grep -oE "\[?[0-9]{4}j[0-9]{3}\]?" | head -1 | tr -d '[]')
            format_used="AAAAjDDD"
            
            # Extraer título (eliminar la parte de fecha juliana)
            title=$(echo "$line" | sed -E "s/.*\[?[0-9]{4}j[0-9]{3}\]?[[:space:]]*//")
        fi
        
        # Si encontramos una fecha, procesarla
        if [[ -n "$date_found" ]]; then
            local parsed_date=$(parse_date "$date_found")
            
            if [[ -n "$parsed_date" ]]; then
                # Limpiar título
                title=$(clean_title "$title")
                
                # Si no hay título después de la fecha, buscar en la siguiente línea
                if [[ -z "$title" || ${#title} -lt 3 ]]; then
                    # Leer siguiente línea sin consumir el input principal
                    local next_line=$(sed -n "$((line_number + 1))p" "$content_file")
                    if [[ -n "$next_line" ]]; then
                        # Verificar que la siguiente línea no sea vacía, enlace, o otra fecha
                        if [[ ${#next_line} -gt 5 ]] && 
                           ! echo "$next_line" | grep -q "^[[:space:]]*$" && 
                           ! echo "$next_line" | grep -q "^=>" && 
                           ! echo "$next_line" | grep -qE "[0-9]{4}-[0-9]{2}-[0-9]{2}" && 
                           ! echo "$next_line" | grep -qE "[0-9]{2}-[0-9]{2}-[0-9]{4}" &&
                           ! echo "$next_line" | grep -qE "[0-9]{6}" &&
                           ! echo "$next_line" | grep -qE "\[?[0-9]{4}j[0-9]{3}\]?"; then
                            title=$(echo "$next_line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
                            title=$(clean_title "$title")
                        fi
                    fi
                fi
                
                # Si todavía no hay título, usar uno por defecto
                if [[ -z "$title" || ${#title} -lt 3 ]]; then
                    title="Entrada del $parsed_date"
                fi
                
                # Limitar longitud del título
                if [[ ${#title} -gt 100 ]]; then
                    title="${title:0:97}..."
                fi
                
                # Verificar si la fecha es válida (hasta hoy)
                local valid_result
                is_valid_date "$parsed_date"
                valid_result=$?
                
                if [[ $valid_result -eq 0 ]]; then
                    # BUSCAR ENLACE MÁS CERCANO - ESTRATEGIA MEJORADA
                    local entry_url="$clean_blog_url"  # Por defecto, enlazar al blog principal
                    local closest_link=""
                    local min_distance=1000000
                    
                    # Buscar enlaces cercanos (antes y después de esta línea)
                    for link_line_num in "${!link_lines[@]}"; do
                        local distance=$((link_line_num - line_number))
                        local abs_distance=${distance#-}  # Valor absoluto
                        
                        # Preferir enlaces que estén después de la fecha (más cercanos en contenido)
                        if [[ $distance -ge -5 && $distance -le 10 ]] && [[ $abs_distance -lt $min_distance ]]; then
                            # Verificar si la descripción del enlace coincide con el título
                            local link_desc="${link_descriptions[$link_line_num]}"
                            if [[ -n "$link_desc" ]]; then
                                # Si la descripción coincide exactamente con el título, usar este enlace
                                if [[ "$link_desc" == "$title" ]]; then
                                    entry_url="${link_lines[$link_line_num]}"
                                    min_distance=$abs_distance
                                    closest_link="$entry_url"
                                    break
                                # Si la descripción contiene palabras del título, considerar este enlace
                                elif [[ "$title" =~ "$link_desc" ]] || [[ "$link_desc" =~ "$title" ]]; then
                                    entry_url="${link_lines[$link_line_num]}"
                                    min_distance=$abs_distance
                                    closest_link="$entry_url"
                                fi
                            fi
                            
                            # Si no hay coincidencia exacta, usar el enlace más cercano
                            if [[ -z "$closest_link" ]] || [[ $abs_distance -lt $min_distance ]]; then
                                entry_url="${link_lines[$link_line_num]}"
                                min_distance=$abs_distance
                                closest_link="$entry_url"
                            fi
                        fi
                    done
                    
                    # Si encontramos un enlace cercano, usarlo
                    if [[ -n "$closest_link" && "$closest_link" != "$clean_blog_url" ]]; then
                        entry_url="$closest_link"
                    fi
                    
                    # Crear entrada en el formato: fecha|blog_name|title|entry_url|favicon
                    local entry="$parsed_date|$blog_name|$title|$entry_url|$favicon"
                    
                    # DEBUG: Mostrar entrada si está activado
                    if [[ $DEBUG -eq 1 ]]; then
                        echo -e "${BLUE}DEBUG: AÑADIENDO ENTRY -> |$entry|${NC}"
                        echo -e "${BLUE}DEBUG: Formato: $format_used, Original: $date_found, Parseado: $parsed_date${NC}"
                        echo -e "${BLUE}DEBUG: Enlace usado: $entry_url (distancia: $min_distance)${NC}"
                    fi
                    
                    entries+=("$entry")
                    ((entry_count++))
                    
                    echo "✓ $parsed_date - $title"
                    echo "  Formato: $format_used, URL: $entry_url"
                elif [[ $valid_result -eq 2 ]]; then
                    # Fecha futura
                    ((future_count++))
                    # Registrar en error.log
                    log_error "El Gemlog $blog_url contiene una entrada con fecha futura, no procesada - ($title - $parsed_date)"
                    if [[ $DEBUG -eq 1 ]]; then
                        echo -e "${RED}DEBUG: Entrada futura ignorada: $parsed_date - $title${NC}"
                    fi
                else
                    # Fecha antigua o inválida
                    ((filtered_count++))
                    if [[ $DEBUG -eq 1 ]]; then
                        echo -e "${YELLOW}DEBUG: Filtrada (fecha antigua o inválida): $parsed_date - $title${NC}"
                    fi
                fi
            fi
        fi
        
    done < "$content_file"
    
    if [[ $entry_count -eq 0 ]]; then
        echo -e "${YELLOW}No se encontraron entradas recientes con fecha${NC}"
    else
        echo -e "${GREEN}Encontradas $entry_count entradas recientes${NC}"
    fi
    
    # NUEVO: Mostrar solo resumen de entradas filtradas (no una por una)
    if [[ $filtered_count -gt 0 ]]; then
        echo -e "${YELLOW}Ignorando $filtered_count entradas anteriores a $PREVIOUS_YEAR o inválidas${NC}"
    fi
    
    if [[ $future_count -gt 0 ]]; then
        echo -e "${RED}⚠ Ignorando $future_count entradas con fechas futuras${NC}"
    fi
}

process_capsule() {
    local url="$1"
    local name="$2"
    local favicon_override="$3"
    local temp_file="$TEMP_DIR/$(echo "$name" | tr ' ' '_' | tr -cd 'a-zA-Z0-9_-').gmi"
    local temp_output="$TEMP_DIR/$(echo "$name" | tr ' ' '_' | tr -cd 'a-zA-Z0-9_-')_output.txt"
    
    echo -e "${YELLOW}=== Verificando: $name ===${NC}"
    
    # PASO 1: Obtener contenido fresco (siempre necesario para verificar última entrada)
    echo -e "${BLUE}Paso 1: Descargando contenido...${NC}"
    if ! fetch_content "$url" "$temp_file"; then
        echo -e "${RED}✗ Error descargando $name${NC}"
        log_error "No se pudo conectar a $url - $name"
        return 1
    fi
    
    # PASO 2: Extraer solo la última entrada del contenido fresco
    echo -e "${BLUE}Paso 2: Extrayendo última entrada...${NC}"
    extract_entries_with_dates "$temp_file" "$name" "$url" "$favicon_override" > "$temp_output" 2>&1
    local current_last_entry=$(find_last_entry "$temp_output" "$name")
    
    if [[ -z "$current_last_entry" ]]; then
        echo -e "${YELLOW}⚠ No se encontraron entradas recientes, procesando igualmente...${NC}"
        # Continuar con el procesamiento aunque no haya encontrado última entrada
        cat "$temp_output"
        return 0
    fi
    
    # PASO 3: Verificar contra el caché
    echo -e "${BLUE}Paso 3: Verificando caché...${NC}"
    local cached_last_entry=$(get_cached_last_entry "$url")
    
    if [[ -z "$cached_last_entry" ]]; then
        # No está en caché - PRIMERA VEZ
        echo -e "${GREEN}✓ Nueva entrada en caché: $name${NC}"
        echo -e "${BLUE}  Última entrada: $current_last_entry${NC}"
        update_cache "$url" "$current_last_entry"
        cat "$temp_output"
    elif [[ "$cached_last_entry" == "$current_last_entry" ]]; then
        # Coincide con caché - NO HAY CAMBIOS
        echo -e "${GREEN}⏭️ Saltando feed (sin cambios): $name${NC}"
        echo -e "${BLUE}  Última entrada: $current_last_entry${NC}"
        # No mostrar output del procesamiento para feeds sin cambios
    else
        # No coincide - HAY CAMBIOS
        echo -e "${YELLOW}🔄 Actualizando feed (cambios detectados): $name${NC}"
        echo -e "${BLUE}  Anterior: $cached_last_entry${NC}"
        echo -e "${BLUE}  Actual: $current_last_entry${NC}"
        update_cache "$url" "$current_last_entry"
        cat "$temp_output"
    fi
    
    return 0
}

# Función para crear el archivo addgmi.gmi
create_addgmi_file() {
    local addgmi_file="addgmi.gmi"
    
    cat > "$addgmi_file" << 'EOF'
# 🌿 Envía tu refugio en la "pequeña red"

¿Tienes un espacio en la pequeña web? 
Envíame tu dirección para agregarla y que más personas puedan descubrir (y read) tu contenido.

Juntos tejemos esta red alternativa, lejos del ruido.

## 📮 Cómo contactarme:

=> ../../xmpp.gmi 🗨️  XMPP/Jabber (mensajería descentralizada)
=> mailto:tgomez@duck.com 📧 Correo electrónico (respuesta en 24h)
=> https://masto.es/@caleb 😁 Mastodon (red social federada)

## 💫 ¿Qué es lo que se sindica?
• Cápsulas Gemini (gemini://)
• Madrigueras Gopher (gopher://)  
• Espacios Spartan (spartan://)

Gracias por ser parte de esta red.

¡ Somos la resistencia !
EOF

    echo -e "${GREEN}✓ Archivo $addgmi_file creado${NC}"
}

generate_index() {
    echo "# 🐦 Pinzón Aggregator" > "$INDEX_FILE"
    echo "" >> "$INDEX_FILE"
    
    # Añadir enlace para enviar sitios
    echo "=> addgmi.gmi 🌿 Envía tu sitio a este agregador" >> "$INDEX_FILE"
    echo "" >> "$INDEX_FILE"
    
    # DEBUG: Mostrar entradas si está activado
    if [[ $DEBUG -eq 1 ]]; then
        echo -e "${YELLOW}=== DEBUG: ENTRIES EN ARRAY ===${NC}"
        for i in "${!entries[@]}"; do
            echo "Entry $i: |${entries[$i]}|"
        done
        echo -e "${YELLOW}=== FIN DEBUG ENTRIES ===${NC}"
    fi
    
    # Ordenar entradas por fecha (más reciente primero) y limitar a MAX_ENTRIES
    IFS=$'\n' sorted_entries=($(sort -r <<< "${entries[*]}" | head -"$MAX_ENTRIES"))
    unset IFS
    
    # CORRECCIÓN: Agrupar entradas por fecha correctamente
    declare -A entries_by_date
    for entry in "${sorted_entries[@]}"; do
        IFS='|' read -r date blog_name title entry_url favicon <<< "$entry"
        entries_by_date["$date"]+="$entry"$'\n'
    done
    
    # Ordenar las fechas (más reciente primero)
    IFS=$'\n' sorted_dates=($(sort -r <<< "${!entries_by_date[@]}"))
    unset IFS
    
    local entries_added=0
    
    # Generar el índice agrupando por fecha
    for date in "${sorted_dates[@]}"; do
        # Verificar si hemos alcanzado el máximo de entradas
        if [[ $entries_added -ge $MAX_ENTRIES ]]; then
            break
        fi
        
        # Mostrar la fecha como encabezado
        echo "$date" >> "$INDEX_FILE"
        
        # Procesar cada entrada de esta fecha
        while IFS= read -r entry; do
            [[ -z "$entry" ]] && continue
            
            # Verificar límite de entradas
            if [[ $entries_added -ge $MAX_ENTRIES ]]; then
                break
            fi
            
            IFS='|' read -r entry_date blog_name title entry_url favicon <<< "$entry"
            
            # Crear enlace en el formato: => URL favicon nombre: título
            echo "=> $entry_url $favicon $blog_name: $title" >> "$INDEX_FILE"
            ((entries_added++))
            
        done <<< "${entries_by_date[$date]}"
        
        # Añadir línea en blanco entre fechas (excepto después de la última)
        if [[ $entries_added -lt $MAX_ENTRIES ]] && [[ "$date" != "${sorted_dates[-1]}" ]]; then
            echo "" >> "$INDEX_FILE"
        fi
    done
    
    # Crear archivo addgmi.gmi
    create_addgmi_file
}

main() {
    # Inicializar caché
    init_cache
    
    # Limpiar error log anterior si estamos en modo debug
    if [[ $DEBUG -eq 1 && -f "$ERROR_LOG" ]]; then
        rm "$ERROR_LOG"
        echo -e "${YELLOW}✓ Error log anterior eliminado${NC}"
    fi
    
    # Verificar archivo de feeds
    if [[ ! -f "$FEEDS_FILE" ]]; then
        cat > "$FEEDS_FILE" << EOF
# Archivo de feeds - Formato: 
# URL(gemini:// gopher:// o spartan://)|Nombre a mostrar|Favicon personalizado (Opcional)
# Si no existe favicon online (en el servidor) o personalizado se usará:
# 📝 para Gemini 📂 para Gopher y 🗡 para Spartan

# GEMINI
gemini://caleb.subnet.city/gemlog/|Caleb|
gemini://subnet.city/gemlog/|Subnet|
gemini://pkillers.subnet.city/suscribe.gmi|Psikokillers|💀

# GOPHER
gopher://lucio.albenga.es:70/1/lfa/es/phlog|Lucio Albenga|🐒
EOF
        echo "Creado $FEEDS_FILE con ejemplos multi-protocolo"
        echo -e "${YELLOW}Archivo $FEEDS_FILE creado con ejemplos${NC}"
        echo -e "${YELLOW}Edita el archivo y ejecuta el script nuevamente${NC}"
        exit 1
    fi
    
    # Inicializar arrays
    entries=()
    declare -A blog_names
    declare -A blog_favicons
    
    local total=0
    local success=0
    
    echo -e "${YELLOW}=== 🐦 Pinzón Aggregator v6.3 ===${NC}"
    echo -e "Filtrando entradas de: $PREVIOUS_YEAR y $CURRENT_YEAR"
    echo -e "Fecha máxima aceptada: $TODAY (hoy)"
    echo -e "Soporte para: Gemini, Gopher y Spartan"
    echo -e "Formatos de fecha: AAAA-MM-DD, DD-MM-AAAA, YYMMDD, AAAAjDDD\n"
    echo -e "${BLUE}Configuración de reintentos: $MAX_RETRIES intentos con $RETRY_DELAY segundos de espera${NC}\n"
    
    # Verificar Lagrange al inicio
    if ! command -v "$LAGRANGE_PATH" >/dev/null 2>&1; then
        echo -e "${RED}✗ ERROR: Lagrange no está disponible${NC}"
        echo -e "${YELLOW}Se buscó en:${NC}"
        echo -e "${YELLOW}  - PATH: lagrange${NC}"
        echo -e "${YELLOW}  - $HOME/lagrange/lagrange${NC}"
        echo -e "${YELLOW}  - Variable LAGRANGE_PATH: $LAGRANGE_PATH${NC}"
        echo -e "${YELLOW}Por favor, instala Lagrange o configura la variable LAGRANGE_PATH${NC}"
        echo -e "${BLUE}Ejemplo: export LAGRANGE_PATH=\"/ruta/a/tu/lagrange\"${NC}"
        exit 1
    else
        echo -e "${GREEN}✓ Usando Lagrange: $LAGRANGE_PATH${NC}"
    fi
    
    while IFS='|' read -r url name favicon_override; do
        [[ -z "$url" ]] && continue
        if echo "$url" | grep -q "^[[:space:]]*#"; then
            continue
        fi
        
        # Limpiar campos
        url=$(echo "$url" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
        name=$(echo "$name" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
        favicon_override=$(echo "$favicon_override" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
        
        ((total++))
        
        echo -e "${YELLOW}=== Procesando [$total]: $name ===${NC}"
        echo -e "${BLUE}Protocolo: ${url%%://*}${NC}"
        
        # Mostrar información del favicon
        if [[ -n "$favicon_override" ]]; then
            echo -e "${GREEN}✓ Favicon personalizado: $favicon_override${NC}"
        else
            echo -e "${BLUE}ℹ️  Usando favicon automático${NC}"
        fi
        
        if process_capsule "$url" "$name" "$favicon_override"; then
            ((success++))
        fi
        
        # Pequeña pausa entre requests
        sleep 1
        
    done < "$FEEDS_FILE"
    
    # Generar archivo
    echo -e "\n${GREEN}Generando $INDEX_FILE con las $MAX_ENTRIES entradas más recientes...${NC}"
    generate_index
    
    # Resumen
    echo -e "\n${GREEN}✅ Pinzón Aggregator v6.3 - Completado!${NC}"
    echo -e "Feeds procesados: $success/$total"
    echo -e "Entradas encontradas: ${#entries[@]}"
    echo -e "Entradas mostradas: $((${#entries[@]} > $MAX_ENTRIES ? $MAX_ENTRIES : ${#entries[@]}))"
    echo -e "Archivo generado: $INDEX_FILE"
    echo -e "Años incluidos: $PREVIOUS_YEAR - $CURRENT_YEAR"
    echo -e "Fecha máxima aceptada: $TODAY"
    echo -e "Formatos soportados: AAAA-MM-DD, DD-MM-AAAA, YYMMDD, AAAAjDDD"
    echo -e "Caché: $CACHE_FILE"
    echo -e "Lagrange: $LAGRANGE_PATH"
    echo -e "Reintentos: $MAX_RETRIES intentos con $RETRY_DELAY segundos"
    
    # Mostrar estadísticas de caché
    if [[ -f "$CACHE_FILE" ]]; then
        local cache_entries=$(wc -l < "$CACHE_FILE" 2>/dev/null || echo 0)
        echo -e "Entradas en caché: $cache_entries"
    fi
    
    # Mostrar favicons encontrados
    echo -e "\n${BLUE}Favicons utilizados:${NC}"
    for blog_url in "${!blog_favicons[@]}"; do
        echo "  ${blog_favicons[$blog_url]} - $blog_url"
    done
    
    # Mostrar información del error log si existe
    if [[ -f "$ERROR_LOG" ]]; then
        local error_count=$(wc -l < "$ERROR_LOG" 2>/dev/null || echo 0)
        if [[ $error_count -gt 0 ]]; then
            echo -e "\n${RED}⚠ Se registraron $error_count errores en: $ERROR_LOG${NC}"
            if [[ $DEBUG -eq 1 ]]; then
                echo -e "${YELLOW}Contenido de error.log:${NC}"
                cat "$ERROR_LOG"
            fi
        fi
    fi
    
    # Mostrar preview del archivo
    echo -e "\n${BLUE}Vista previa de $INDEX_FILE:${NC}"
    head -n 5 "$INDEX_FILE"
    
    # Mostrar contenido del caché SOLO en modo debug
    if [[ $DEBUG -eq 1 ]]; then
        echo -e "\n${BLUE}Contenido del caché:${NC}"
        if [[ -f "$CACHE_FILE" ]]; then
            cat "$CACHE_FILE"
        else
            echo "  (vacío)"
        fi
    fi
    
    # Limpiar temporal
    rm -rf "$TEMP_DIR"
}

# Ejecutar script principal
main "$@"