Blog Técnico — Ferramentas & Scripts

Downloader de Imagens — Privacy.com.br

Python Automation Networking
Neste post apresento uma ferramenta em Python desenvolvida para coletar e baixar imagens a partir de URLs específicas, com foco em automação, resiliência e organização.
Código-fonte


import os
import sys
import json
import base64
import requests
import time
import argparse
import re
from urllib.parse import urlparse
from typing import Optional, Dict, List
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry


BASE_URL = "https://image.privacy.com.br/"
DOWNLOAD_DIR = "Downloads"
VALID_BASE64 = re.compile(r'[^A-Za-z0-9+/=]')
INVALID_FILENAME = re.compile(r'[<>:"/\\|?*]')

DEFAULT_HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
    'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
    'Accept-Language': 'pt-BR,pt;q=0.9,en;q=0.8',
    'Referer': 'https://privacy.com.br/',
    'Connection': 'keep-alive',
}


# ------------------------------------------------------------
# UTILITÁRIOS
# ------------------------------------------------------------

def create_session() -> requests.Session:
    session = requests.Session()
    retries = Retry(
        total=5,
        backoff_factor=0.5,
        status_forcelist=[429, 500, 502, 503, 504]
    )
    adapter = HTTPAdapter(max_retries=retries)
    session.mount("http://", adapter)
    session.mount("https://", adapter)
    session.headers.update(DEFAULT_HEADERS)
    return session


def safe_print(msg: str) -> None:
    print(msg, flush=True)


# ------------------------------------------------------------
# DECODER
# ------------------------------------------------------------

def decode_privacy_url(url: str) -> Optional[Dict]:
    try:
        url = url.strip()
        if not url.startswith(BASE_URL):
            return None

        url = url.split('?', 1)[0]
        parsed = urlparse(url)
        parts = [p for p in parsed.path.strip('/').split('/') if p]

        base64_str = None
        for i, part in enumerate(parts):
            if part.startswith('w') and part[1:].isdigit() and i + 1 < len(parts):
                base64_str = parts[i + 1]
                break

        base64_str = base64_str or parts[-1]
        base64_str = VALID_BASE64.sub('', base64_str)

        base64_str += '=' * (-len(base64_str) % 4)

        decoded = base64.b64decode(base64_str, validate=True)
        data = json.loads(decoded)

        return {
            'url': url,
            'bucket': data.get('bucket', ''),
            'key': data.get('key', ''),
            'edits': data.get('edits', {}),
            'base64_decoded': data
        }

    except Exception:
        return None


# ------------------------------------------------------------
# FILENAME
# ------------------------------------------------------------

def get_filename_from_info(info: Dict, index: int) -> str:
    if not info or not info.get('key'):
        return f"imagem_{index:04d}.jpg"

    filename = info['key'].split('/')[-1]

    if filename.count('-') >= 4 and '.' in filename:
        filename = filename.split('-', 4)[-1]

    if not re.search(r'\.(jpg|jpeg|png|webp|gif)$', filename, re.I):
        filename += ".jpg"

    return INVALID_FILENAME.sub('_', filename)


# ------------------------------------------------------------
# DOWNLOAD
# ------------------------------------------------------------

def download_image(
    session: requests.Session,
    url: str,
    index: int
) -> bool:

    info = decode_privacy_url(url)
    if not info:
        safe_print(f"[{index}] ✗ URL inválida")
        return False

    filename = get_filename_from_info(info, index)
    filepath = os.path.join(DOWNLOAD_DIR, filename)

    if os.path.exists(filepath):
        safe_print(f"[{index}] ⚠ Já existe: {filename}")
        return True

    try:
        r = session.get(url, timeout=30)

        if r.status_code != 200:
            safe_print(f"[{index}] ✗ HTTP {r.status_code}")
            return False

        if "image" not in r.headers.get("Content-Type", ""):
            safe_print(f"[{index}] ✗ Resposta não é imagem")
            return False

        with open(filepath, "wb") as f:
            f.write(r.content)

        size_kb = len(r.content) / 1024
        safe_print(f"[{index}] ✅ Salvo: {filename} ({size_kb:.1f} KB)")
        return True

    except Exception as e:
        safe_print(f"[{index}] ✗ Erro: {e}")
        return False


# ------------------------------------------------------------
# COLETA DE URLS
# ------------------------------------------------------------

def read_urls_from_file(filepath: str) -> List[str]:
    if not os.path.isfile(filepath):
        safe_print("Arquivo não encontrado")
        return []

    with open(filepath, encoding="utf-8") as f:
        lines = [l.strip() for l in f if l.strip()]

    urls = [l for l in lines if l.startswith(BASE_URL)]
    safe_print(f"Ignoradas {len(lines) - len(urls)} linhas inválidas")
    return urls


def collect_urls_interactive() -> List[str]:
    safe_print("Cole as URLs (duas linhas vazias para continuar):")
    urls = []

    while True:
        try:
            line = input().strip()
            if not line:
                if urls:
                    break
                continue

            if line.startswith(BASE_URL):
                urls.append(line)

        except (KeyboardInterrupt, EOFError):
            break

    return urls


# ------------------------------------------------------------
# LOG
# ------------------------------------------------------------

def save_log(urls: List[str], success: int, fail: int):
    log_path = os.path.join(DOWNLOAD_DIR, "urls_processadas.txt")
    with open(log_path, "w", encoding="utf-8") as f:
        f.write(f"Total: {len(urls)}\nSucesso: {success}\nFalhas: {fail}\n")
        f.write("=" * 60 + "\n")
        for i, url in enumerate(urls, 1):
            f.write(f"{i:04d} | {url}\n")

    safe_print(f"\n📄 Log salvo em: {log_path}")


# ------------------------------------------------------------
# MAIN
# ------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("urls", nargs="*")
    parser.add_argument("-f", "--file")
    args = parser.parse_args()

    safe_print("=" * 60)
    safe_print("DOWNLOADER DE IMAGENS")
    safe_print("=" * 60)

    if args.file:
        urls = read_urls_from_file(args.file)
    elif args.urls:
        urls = [u for u in args.urls if u.startswith(BASE_URL)]
    else:
        urls = collect_urls_interactive()

    if not urls:
        safe_print("Nenhuma URL válida.")
        sys.exit(1)

    os.makedirs(DOWNLOAD_DIR, exist_ok=True)
    session = create_session()

    success = fail = 0

    for i, url in enumerate(urls, 1):
        if download_image(session, url, i):
            success += 1
        else:
            fail += 1
        time.sleep(0.3)

    safe_print("\n" + "=" * 60)
    safe_print(f"Total: {len(urls)} | Sucesso: {success} | Falhas: {fail}")

    save_log(urls, success, fail)


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        safe_print("\n⏹ Cancelado")
    except Exception as e:
        safe_print(f"\n💥 Erro: {e}")