#!/usr/bin/env python3
"""
Blogger to Gemtext Converter
Descarga todo un sitio de Blogger y lo convierte a formato Gemtext
Usa solo librerías estándar de Python
"""

import xml.etree.ElementTree as ET
import html
import urllib.request
import urllib.parse
import urllib.error
import os
import re
import sys
import time
import subprocess
from datetime import datetime
from html.parser import HTMLParser

def normalize_hashtag(tag):
    """Convertir hashtags a una sola palabra en minúsculas sin espacios ni caracteres especiales"""
    if not tag.strip():
        return ""
    
    # Convertir a minúsculas
    tag = tag.lower().strip()
    
    # Reemplazar tildes y caracteres especiales
    replacements = {
        'á': 'a', 'é': 'e', 'í': 'i', 'ó': 'o', 'ú': 'u',
        'ñ': 'n', 'ü': 'u',
        'à': 'a', 'è': 'e', 'ì': 'i', 'ò': 'o', 'ù': 'u',
        'â': 'a', 'ê': 'e', 'î': 'i', 'ô': 'o', 'û': 'u'
    }
    
    for old, new in replacements.items():
        tag = tag.replace(old, new)
    
    # Eliminar TODOS los caracteres que no sean letras o números
    normalized = re.sub(r'[^a-z0-9]', '', tag)
    
    return normalized

class HTMLToGemtextParser(HTMLParser):
    """Parser para convertir HTML a Gemtext correctamente"""
    def __init__(self):
        super().__init__()
        self.lines = []
        self.current_text = ""
        self.in_anchor = False
        self.anchor_href = ""
        self.anchor_text = ""
        self.downloaded_images = []
        self.current_tag = ""
        self.ignore_until_tag = None
        self.in_list_item = False
        self.list_item_has_link = False
        self.list_item_text = ""
        self.video_urls = []
        self.last_was_heading = False
        self.last_was_list = False
        self.consecutive_br = 0
        self.pending_links = []  # Almacenar enlaces para poner después del párrafo
        self.in_paragraph = False  # Para saber si estamos en un párrafo normal
    
    def set_downloaded_images(self, images):
        """Establecer la lista de imágenes descargadas"""
        self.downloaded_images = images
    
    def _extract_youtube_video(self, tag, attrs):
        """Extraer URLs de video de YouTube"""
        video_url = None
        
        for attr, value in attrs:
            if attr in ['src', 'movie', 'data', 'value']:
                if 'youtube.com' in value or 'youtu.be' in value:
                    video_url = value
                    break
        
        if video_url:
            # Convertir URL embed a URL normal de YouTube
            if '/v/' in video_url:
                video_id = video_url.split('/v/')[1].split('&')[0].split('?')[0]
                youtube_url = f"https://www.youtube.com/watch?v={video_id}"
                self.video_urls.append(youtube_url)
            elif 'youtube.com/embed/' in video_url:
                video_id = video_url.split('/embed/')[1].split('&')[0].split('?')[0]
                youtube_url = f"https://www.youtube.com/watch?v={video_id}"
                self.video_urls.append(youtube_url)
    
    def _extract_blogger_video(self, tag, attrs):
        """Extraer videos de Blogger/YouTube por contentid"""
        content_id = None
        for attr, value in attrs:
            if attr == 'contentid':
                content_id = value
                break
            elif attr == 'class' and 'BLOG_video_class' in value:
                # Es un objeto de video de Blogger
                for attr2, value2 in attrs:
                    if attr2 == 'contentid':
                        content_id = value2
                        break
        
        if content_id:
            # Crear URL de video de Blogger/YouTube aproximada
            youtube_url = f"https://www.youtube.com/watch?v={content_id[:11]}"
            self.video_urls.append(youtube_url)
            return True
        return False
    
    def handle_starttag(self, tag, attrs):
        # Spans no deben hacer flush (son inline)
        if tag not in ['br', 'img', 'span']:
            self._flush_text()
        
        self.current_tag = tag
        attrs_dict = dict(attrs)
        
        # Si estamos ignorando contenido, solo procesar tags de cierre
        if self.ignore_until_tag:
            return
        
        # Extraer videos de YouTube
        if tag in ['object', 'embed', 'param']:
            if not self._extract_blogger_video(tag, attrs):
                self._extract_youtube_video(tag, attrs)
        
        # Ignorar completamente estos tags y su contenido
        if tag in ['script', 'style']:
            self.ignore_until_tag = tag
            return
        
        if tag == 'a':
            self.in_anchor = True
            self.anchor_href = attrs_dict.get('href', '')
            self.anchor_text = ""
            if self.in_list_item:
                self.list_item_has_link = True
        
        elif tag == 'img':
            # Buscar src en los atributos
            img_src = None
            img_alt = ""
            for attr, value in attrs:
                if attr == 'src':
                    img_src = value
                elif attr == 'alt':
                    img_alt = value
            
            if img_src and img_src.startswith('http'):
                # Buscar si esta imagen fue descargada
                local_name = None
                success = False
                for img_url, safe_name, downloaded in self.downloaded_images:
                    if img_url == img_src or img_src in img_url:
                        local_name = safe_name
                        success = downloaded
                        break
                
                if local_name and success:
                    display_text = img_alt if img_alt else 'Imagen'
                    self.lines.append(f"=> ../img/{local_name} {display_text}")
                else:
                    # Usar placeholder para imagen fallida
                    self.lines.append("=> ../img/placeholder.png Imagen no disponible")
        
        elif tag in ['li']:
            self.in_list_item = True
            self.list_item_has_link = False
            self.list_item_text = ""
        
        elif tag in ['ul', 'ol']:
            # Añadir línea en blanco antes de listas si no hay ya una
            if self.lines and self.lines[-1] != "":
                self.lines.append("")
            self.last_was_list = True
        
        elif tag == 'br':
            # CORRECCIÓN: Procesar br inmediatamente
            self.consecutive_br += 1
            if self.consecutive_br >= 2:
                # Dos o más br consecutivos = nuevo párrafo
                self._flush_text()
                if self.lines and self.lines[-1] != "":
                    self.lines.append("")
            else:
                # Un solo br = espacio simple
                self.current_text += " "
        
        elif tag in ['p', 'div', 'section', 'article']:
            self._flush_text()
            self.in_paragraph = True
            # Añadir línea en blanco antes de párrafos si no hay ya una
            if self.lines and self.lines[-1] != "" and not self.last_was_heading:
                self.lines.append("")
        
        elif tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
            self._flush_text()
            # Añadir línea en blanco antes de encabezados si no hay ya una
            if self.lines and self.lines[-1] != "":
                self.lines.append("")
            self.last_was_heading = True
    
    def handle_endtag(self, tag):
        # Reset contador de br para cualquier endtag excepto br
        if tag != 'br':
            self.consecutive_br = 0
        
        # Si encontramos el tag de cierre del que estábamos ignorando
        if self.ignore_until_tag and tag == self.ignore_until_tag:
            self.ignore_until_tag = None
            self.current_tag = ""
            return
        
        # Si estamos ignorando contenido, no procesar otros tags
        if self.ignore_until_tag:
            return
        
        if tag == 'a' and self.in_anchor:
            self.in_anchor = False
            if self.anchor_href and self.anchor_text.strip():
                clean_text = self._clean_text(self.anchor_text)
                if clean_text:
                    if self.in_list_item:
                        # En listas: comportamiento normal (texto en la lista)
                        self.list_item_text += clean_text + " "
                        self.list_item_has_link = True
                    else:
                        # CORRECCIÓN: En párrafos normales - NO añadir texto al párrafo
                        # Solo guardar el enlace para poner después
                        self.pending_links.append((self.anchor_href, clean_text))
            self.anchor_text = ""
            self.anchor_href = ""
        
        elif tag in ['li']:
            self.in_list_item = False
            if self.list_item_text.strip():
                clean_text = self._clean_text(self.list_item_text).strip()
                if clean_text:
                    if self.list_item_has_link:
                        # Lista de enlaces: usar formato =>
                        self.lines.append(f"=> {self.anchor_href} {clean_text}")
                    else:
                        # Lista normal: usar formato *
                        self.lines.append(f"* {clean_text}")
            self.list_item_text = ""
            self.list_item_has_link = False
        
        elif tag in ['p', 'div', 'section', 'article']:
            self._flush_text()
            self.lines.append("")  # Línea en blanco después del párrafo
            self.in_paragraph = False
            self.last_was_heading = False
        
        elif tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
            self._flush_text()
            self.lines.append("")
            self.last_was_heading = True
        
        elif tag in ['ul', 'ol']:
            # Añadir línea en blanco después de listas
            if self.lines and self.lines[-1] != "":
                self.lines.append("")
            self.last_was_list = False
            self.last_was_heading = False
        
        elif tag == 'span':
            # Los spans son inline, no hacer flush del texto
            pass
        
        elif tag == 'br':
            # CORRECCIÓN: Ya se procesó en starttag, no hacer nada aquí
            pass
        
        self.current_tag = ""
    
    def handle_data(self, data):
        # Ignorar datos si estamos ignorando contenido
        if self.ignore_until_tag:
            return
            
        # No ignorar datos dentro de object/param/embed completamente
        if self.current_tag in ['script', 'style']:
            return
            
        if self.in_anchor:
            self.anchor_text += data
        elif self.in_list_item:
            self.list_item_text += data
        else:
            clean_data = self._clean_text(data)
            if clean_data:
                self.current_text += clean_data + " "
    
    def _clean_text(self, text):
        """Limpiar texto de HTML, JavaScript y código residual"""
        if not text.strip():
            return ""
        
        # Decodificar entidades HTML
        text = html.unescape(text)
        
        # Remover código JavaScript y HTML residual
        text = re.sub(r'&lt;\/?script[^&]*&gt;', '', text)
        text = re.sub(r'<\/?script[^>]*>', '', text)
        text = re.sub(r'&lt;\/?object[^&]*&gt;', '', text)
        text = re.sub(r'&lt;\/?param[^&]*&gt;', '', text)
        text = re.sub(r'&lt;\/?embed[^&]*&gt;', '', text)
        text = re.sub(r'<\/?object[^>]*>', '', text)
        text = re.sub(r'<\/?param[^>]*>', '', text)
        text = re.sub(r'<\/?embed[^>]*>', '', text)
        text = re.sub(r'<\/?div[^>]*>', '', text)
        text = re.sub(r'<\/?span[^>]*>', '', text)
        
        # Remover atributos HTML residuales
        text = re.sub(r'\"[^\"]*\"', '', text)
        text = re.sub(r"\'[^\']*\'", '', text)
        
        # Remover entidades HTML mal formadas
        text = re.sub(r'&[^;\s]{1,10};', '', text)
        
        # Remover espacios múltiples y limpiar
        text = re.sub(r'\s+', ' ', text)
        text = text.strip()
        
        return text
    
    def _flush_text(self):
        """Volcar texto acumulado a líneas - PÁRRAFOS COMPLETOS en una línea"""
        if self.current_text.strip():
            clean_text = self._clean_text(self.current_text)
            if clean_text:
                # Dividir en párrafos basados en dobles espacios
                paragraphs = re.split(r'\s\s+', clean_text)
                for paragraph in paragraphs:
                    paragraph = paragraph.strip()
                    if paragraph:
                        # Un párrafo completo en una línea
                        self.lines.append(paragraph)
                        
                        # Añadir enlaces pendientes después del párrafo
                        if self.pending_links:
                            for href, text in self.pending_links:
                                self.lines.append(f"=> {href} {text}")
                            self.pending_links = []
            
            self.current_text = ""
    
    def get_gemtext(self):
        """Obtener el Gemtext procesado"""
        # Procesar cualquier texto pendiente
        self._flush_text()
        
        # Añadir cualquier enlace pendiente que no se haya procesado
        if self.pending_links:
            if self.lines and self.lines[-1] != "":
                self.lines.append("")
            for href, text in self.pending_links:
                self.lines.append(f"=> {href} {text}")
            self.pending_links = []
        
        # Añadir videos de YouTube al final del contenido
        if self.video_urls:
            if self.lines and self.lines[-1] != "":
                self.lines.append("")
            self.lines.append("Videos relacionados:")
            for video_url in set(self.video_urls):  # Remover duplicados
                self.lines.append(f"=> {video_url} Ver video")
            self.lines.append("")
        
        # Unir líneas y limpiar saltos múltiples
        result = '\n'.join(self.lines)
        result = re.sub(r'\n\s*\n', '\n\n', result)
        result = result.strip()
        
        # Asegurar que no termina con múltiples saltos
        if result:
            result += '\n'
        
        return result

class BloggerToGemtext:
    def __init__(self, blog_url, tag):
        self.blog_url = blog_url.rstrip('/')
        self.tag = tag
        self.img_dir = "img"
        self.output_file = f"blog.{tag}"
        self.used_ids = set()  # Para evitar IDs duplicados
        
        # Crear directorios necesarios
        os.makedirs(self.img_dir, exist_ok=True)
        
        # Crear imagen placeholder
        self.create_placeholder()
        
    def create_placeholder(self):
        """Crear imagen placeholder usando ImageMagick"""
        placeholder_path = os.path.join(self.img_dir, "placeholder.png")
        
        if not os.path.exists(placeholder_path):
            print("🖼️  Creando imagen placeholder...")
            try:
                # Comando ImageMagick para crear una imagen 200x200 con texto
                cmd = [
                    'convert', 
                    '-size', '200x200',
                    'xc', '#f0f0f0',  # Fondo gris claro
                    '-gravity', 'center',
                    '-pointsize', '12',
                    '-fill', '#666666',  # Texto gris
                    '-annotate', '0', 'Imagen no disponible',
                    placeholder_path
                ]
                
                result = subprocess.run(cmd, capture_output=True, text=True)
                if result.returncode == 0:
                    print("✅ Placeholder creado: placeholder.png")
                else:
                    print("❌ Error creando placeholder, creando archivo vacío...")
                    # Crear un archivo vacío como fallback
                    with open(placeholder_path, 'wb') as f:
                        f.write(b'')
                    
            except Exception as e:
                print(f"❌ ImageMagick no disponible: {e}")
                print("📝 Creando archivo placeholder vacío...")
                with open(placeholder_path, 'wb') as f:
                    f.write(b'')
    
    def get_feed_url(self):
        """Obtener la URL del feed Atom del blog"""
        if '.blogspot.com' in self.blog_url:
            domain = self.blog_url.replace('https://', '').split('/')[0]
            return f"https://{domain}/feeds/posts/default"
        else:
            return f"{self.blog_url}/feeds/posts/default"
    
    def download_url(self, url, retries=3):
        """Descargar URL con reintentos"""
        for attempt in range(retries):
            try:
                req = urllib.request.Request(
                    url,
                    headers={
                        'User-Agent': 'Mozilla/5.0 (compatible; Blogger-to-Gemtext/1.0)'
                    }
                )
                with urllib.request.urlopen(req, timeout=30) as response:
                    return response.read().decode('utf-8')
            except Exception as e:
                if attempt == retries - 1:
                    raise e
                time.sleep(1)
        return None
    
    def download_image(self, img_url):
        """Descargar una imagen individual"""
        try:
            # Limpiar URL
            clean_url = img_url.split('?')[0]  # Remover parámetros URL
            
            parsed_url = urllib.parse.urlparse(clean_url)
            img_name = os.path.basename(parsed_url.path)
            
            if not img_name or '.' not in img_name:
                img_name = f"image_{abs(hash(img_url))}.jpg"
            
            safe_img_name = re.sub(r'[^\w\.-]', '_', img_name)
            img_path = os.path.join(self.img_dir, safe_img_name)
            
            if not os.path.exists(img_path):
                print(f"    📷 Descargando: {safe_img_name}")
                req = urllib.request.Request(
                    clean_url,
                    headers={'User-Agent': 'Mozilla/5.0'}
                )
                with urllib.request.urlopen(req, timeout=30) as response:
                    img_data = response.read()
                    # Verificar que es una imagen válida
                    if len(img_data) > 100:  # Mínimo 100 bytes
                        with open(img_path, 'wb') as f:
                            f.write(img_data)
                        return safe_img_name, True
                    else:
                        print(f"      ⚠️  Imagen demasiado pequeña, usando placeholder")
                        return safe_img_name, False
            
            return safe_img_name, True
            
        except Exception as e:
            print(f"    ❌ Error descargando {img_url}: {e}")
            safe_name = re.sub(r'[^\w\.-]', '_', os.path.basename(img_url)) if '.' in img_url else f"image_{abs(hash(img_url))}"
            return safe_name, False
    
    def extract_images_from_html(self, html_content):
        """Extraer solo las URLs de imágenes reales del contenido HTML"""
        if not html_content:
            return []
        
        # Buscar solo tags <img> con atributo src
        img_pattern = r'<img[^>]*src="([^"]+)"[^>]*>'
        images = re.findall(img_pattern, html_content)
        
        # Filtrar solo URLs HTTP/HTTPS válidas
        valid_images = [img for img in images if img.startswith('http')]
        
        return valid_images
    
    def download_all_posts(self):
        """Descargar todos los posts del blog usando Atom XML"""
        print("📥 Descargando contenido del blog...")
        
        feed_url = self.get_feed_url()
        all_posts = []
        start_index = 1
        max_results = 500
        
        while True:
            paginated_url = f"{feed_url}?start-index={start_index}&max-results={max_results}"
            
            print(f"📦 Descargando lote desde índice {start_index}...")
            
            try:
                xml_content = self.download_url(paginated_url)
                if not xml_content:
                    break
                
                # Parsear XML
                root = ET.fromstring(xml_content)
                
                # Namespace de Atom
                ns = {'atom': 'http://www.w3.org/2005/Atom'}
                
                entries = root.findall('atom:entry', ns)
                if not entries:
                    break
                
                print(f"  ✅ Descargados {len(entries)} posts")
                
                # Convertir entradas XML a diccionarios simples
                for entry in entries:
                    post_data = self.parse_atom_entry(entry, ns)
                    if post_data:
                        all_posts.append(post_data)
                
                # Verificar si hay más posts
                if len(entries) < max_results:
                    break
                    
                start_index += max_results
                time.sleep(1)
                
            except Exception as e:
                print(f"❌ Error descargando lote: {e}")
                break
        
        print(f"📊 Total de posts descargados: {len(all_posts)}")
        return all_posts
    
    def parse_atom_entry(self, entry, ns):
        """Parsear una entrada Atom XML"""
        try:
            post_data = {}
            
            # Título
            title_elem = entry.find('atom:title', ns)
            if title_elem is not None:
                post_data['title'] = title_elem.text or ''
            
            # Fecha de publicación
            published_elem = entry.find('atom:published', ns)
            if published_elem is not None:
                post_data['published'] = published_elem.text or ''
            
            # Fecha de actualización
            updated_elem = entry.find('atom:updated', ns)
            if updated_elem is not None:
                post_data['updated'] = updated_elem.text or ''
            
            # Contenido
            content_elem = entry.find('atom:content', ns)
            if content_elem is not None:
                post_data['content'] = content_elem.text or ''
            
            # Categorías
            categories = []
            for category_elem in entry.findall('atom:category', ns):
                term = category_elem.get('term')
                if term:
                    categories.append(term)
            post_data['categories'] = categories
            
            return post_data
            
        except Exception as e:
            print(f"❌ Error parseando entrada: {e}")
            return None
    
    def process_post_images(self, html_content):
        """Procesar y descargar imágenes de un post"""
        if not html_content:
            return []
        
        # Extraer URLs de imágenes reales
        image_urls = self.extract_images_from_html(html_content)
        
        print(f"    🔍 Encontradas {len(image_urls)} imágenes")
        
        downloaded_images = []
        for img_url in image_urls:
            safe_img_name, success = self.download_image(img_url)
            downloaded_images.append((img_url, safe_img_name, success))
            if success:
                print(f"      ✅ {safe_img_name}")
            else:
                print(f"      🖼️  Placeholder para: {safe_img_name}")
        
        return downloaded_images
    
    def html_to_gemtext(self, html_content, downloaded_images):
        """Convertir HTML a Gemtext correctamente"""
        if not html_content:
            return ""
        
        # Decodificar entidades HTML
        content = html.unescape(html_content)
        
        # Usar nuestro parser personalizado
        parser = HTMLToGemtextParser()
        parser.set_downloaded_images(downloaded_images)
        parser.feed(content)
        gemtext = parser.get_gemtext()
        
        return gemtext
    
    def get_entry_date(self, post_data):
        """Obtener la fecha del post"""
        date_str = post_data.get('published') or post_data.get('updated') or ''
        if date_str:
            try:
                date_part = date_str.split('T')[0]
                return date_part
            except:
                pass
        
        return "0000-00-00"
    
    def generate_entry_id(self, title, entry_count):
        """Generar un ID único para la entrada - EVITAR DUPLICADOS"""
        if title:
            # Crear ID base
            entry_id = re.sub(r'[^a-z0-9]', '_', title.lower())
            entry_id = re.sub(r'_+', '_', entry_id)
            entry_id = entry_id.strip('_')[:25]  # Más corto para dejar espacio a números
            
            if not entry_id:
                entry_id = f"entrada_{entry_count}"
            
            # Verificar si ya existe y añadir número si es necesario
            base_id = entry_id
            counter = 1
            while entry_id in self.used_ids:
                entry_id = f"{base_id}_{counter}"
                counter += 1
            
            self.used_ids.add(entry_id)
            return entry_id
        
        entry_id = f"entrada_{entry_count}"
        self.used_ids.add(entry_id)
        return entry_id
    
    def normalize_categories(self, categories):
        """Normalizar categorías para formato hashtag"""
        normalized_tags = []
        for category in categories:
            if category.strip():
                normalized_tag = normalize_hashtag(category)
                if normalized_tag:
                    normalized_tags.append(f"#{normalized_tag}")
        return ' '.join(normalized_tags)
    
    def process_posts(self, posts):
        """Procesar todos los posts y generar archivo Gemtext"""
        print("🔄 Procesando posts y generando Gemtext...")
        
        with open(self.output_file, 'w', encoding='utf-8') as f:
            for i, post in enumerate(posts):
                try:
                    title = post.get('title', '').strip()
                    if not title:
                        continue
                    
                    print(f"  📝 [{i+1}/{len(posts)}] Procesando: {title[:60]}...")
                    
                    # Obtener contenido
                    content = post.get('content', '')
                    
                    # Procesar imágenes del post
                    downloaded_images = self.process_post_images(content)
                    
                    # Convertir a Gemtext
                    gemtext_content = self.html_to_gemtext(content, downloaded_images)
                    
                    # Obtener fecha
                    date = self.get_entry_date(post)
                    
                    # Generar ID único
                    entry_id = self.generate_entry_id(title, i + 1)
                    
                    # Obtener y normalizar categorías
                    categories = post.get('categories', [])
                    normalized_hashtags = self.normalize_categories(categories)
                    
                    # Escribir entrada en formato blog.txt
                    f.write(f"' Entrada descargada de {self.blog_url}\n")
                    f.write(f"{date}\n")
                    f.write(f"{entry_id}\n")
                    f.write(f"[{self.tag}] {title}\n")
                    
                    # Escribir categorías normalizadas como hashtags
                    if normalized_hashtags:
                        f.write(f"{normalized_hashtags}\n")
                    else:
                        f.write("#blogger\n")
                    
                    f.write(f"{gemtext_content}\n")
                    f.write("<END>\n\n")
                    
                    # Contar imágenes exitosas vs placeholders
                    success_count = sum(1 for _, _, success in downloaded_images if success)
                    placeholder_count = len(downloaded_images) - success_count
                    
                    print(f"      ✅ Procesada - {success_count} imágenes, {placeholder_count} placeholders")
                    print(f"      🆔 ID: {entry_id}")
                    
                except Exception as e:
                    print(f"❌ Error procesando post {i + 1}: {e}")
                    continue
    
    def run(self):
        """Ejecutar el proceso completo"""
        print(f"🚀 Convirtiendo blog: {self.blog_url}")
        print(f"🏷️  Tag: {self.tag}")
        
        try:
            # Descargar todos los posts
            posts = self.download_all_posts()
            
            if not posts:
                print("❌ No se encontraron posts para procesar")
                return False
            
            # Procesar posts y generar Gemtext
            self.process_posts(posts)
            
            print(f"\n🎉 ¡Proceso completado!")
            print(f"📄 Archivo generado: {self.output_file}")
            print(f"🖼️  Imágenes descargadas en: {self.img_dir}/")
            print(f"📊 Total de posts procesados: {len(posts)}")
            
            return True
            
        except Exception as e:
            print(f"❌ Error en el proceso: {e}")
            return False

def main():
    if len(sys.argv) != 3:
        print("Uso: python3 blogger_to_gemtext.py <url_blogspot> <tag>")
        print("Ejemplo: python3 blogger_to_gemtext.py https://nintenfreaks.blogspot.com NFRK")
        sys.exit(1)
    
    blog_url = sys.argv[1]
    tag = sys.argv[2]
    
    converter = BloggerToGemtext(blog_url, tag)
    success = converter.run()
    
    sys.exit(0 if success else 1)

if __name__ == "__main__":
    main()