patx/gitman
Replaced in-memory AUTH_FAILURES with auth_rate_limits SQLite table, Uses BEGIN IMMEDIATE for failure
Commit e71ee3b · patx · 2026-05-07T20:33:09-04:00
Replaced in-memory AUTH_FAILURES with auth_rate_limits SQLite table, Uses BEGIN IMMEDIATE for failure increments so concurrent workers serialize correctly, Made rate-limit checks read-only, Added bounded expired-row pruning to avoid cleanup spikes, Capped two existing unbounded DB-driven paths fuzzy repo search candidate loading and recent activity repository scanning, Documented the new config knobs in README.md.
Comments
No comments yet.
Diff
diff --git a/README.md b/README.md
index a74b72a..de2c92e 100644
--- a/README.md
+++ b/README.md
@@ -43,9 +43,14 @@ Pages-style static hosting is driven by the Git repository contents. A user site
- `GITMAN_GUNICORN_TIMEOUT_SECONDS`: Gunicorn worker timeout, default `GITMAN_IMPORT_TIMEOUT_SECONDS + 300`
- `GITMAN_MAX_RENDER_BYTES`: maximum file preview size, default `262144`
- `GITMAN_MAX_GIT_RESPONSE_BYTES`: maximum Git HTTP backend response size, default `268435456`
+- `GITMAN_REPO_SEARCH_CANDIDATE_LIMIT`: maximum repositories loaded for fuzzy search scoring, default `1000`
+- `GITMAN_ACTIVITY_REPO_SCAN_LIMIT`: maximum repositories scanned for commit activity, default `100`
- `GITMAN_RATE_LIMIT_ENABLED`: set to `0` to disable login, signup, and Git push auth rate limiting
+- `GITMAN_AUTH_RATE_LIMIT_PRUNE_BATCH_SIZE`: maximum expired rate-limit rows deleted during one cleanup, default `1000`
- `PORT`: HTTP port, default `8080`
+Rate-limit counters are stored in the configured SQLite database, so failed-attempt tracking is shared across worker processes.
+
When `GITMAN_DEBUG` is off, `SECRET_KEY` must be set to a non-default value before startup.
Repositories and their Git data live on local disk, so keep the database and repo root on persistent storage. The app uses SQLite WAL mode and shells out to Git, so the process user needs read/write access to both paths and access to the configured Git executable. It is recommended to run behind nginx/gunicorn.
diff --git a/app.py b/app.py
index 941558a..685155a 100644
--- a/app.py
+++ b/app.py
@@ -84,6 +84,8 @@ PERF_LOG_THRESHOLD_MS = env_int("GITMAN_PERF_LOG_THRESHOLD_MS", 250, minimum=0)
REF_PICKER_LIMIT = env_int("GITMAN_REF_PICKER_LIMIT", 25, minimum=1)
REF_LIST_LIMIT = env_int("GITMAN_REF_LIST_LIMIT", 200, minimum=1)
REF_SEARCH_COMMIT_LIMIT = env_int("GITMAN_REF_SEARCH_COMMIT_LIMIT", 100, minimum=1)
+REPO_SEARCH_CANDIDATE_LIMIT = env_int("GITMAN_REPO_SEARCH_CANDIDATE_LIMIT", 1000, minimum=25)
+ACTIVITY_REPO_SCAN_LIMIT = env_int("GITMAN_ACTIVITY_REPO_SCAN_LIMIT", 100, minimum=1)
GIT_BINARY = os.environ.get("GITMAN_GIT_BINARY", "git")
PAGES_DOMAIN = os.environ.get("GITMAN_PAGES_DOMAIN", "gitman.io").strip().lower().rstrip(".")
DEFAULT_EXEC_PATH = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
@@ -91,6 +93,7 @@ RATE_LIMIT_ENABLED = env_bool("GITMAN_RATE_LIMIT_ENABLED", True)
RATE_LIMIT_MAX_FAILURES = env_int("GITMAN_RATE_LIMIT_MAX_FAILURES", 5, minimum=1)
RATE_LIMIT_WINDOW_SECONDS = env_int("GITMAN_RATE_LIMIT_WINDOW_SECONDS", 300, minimum=1)
RATE_LIMIT_COOLDOWN_SECONDS = env_int("GITMAN_RATE_LIMIT_COOLDOWN_SECONDS", 300, minimum=1)
+AUTH_RATE_LIMIT_PRUNE_BATCH_SIZE = env_int("GITMAN_AUTH_RATE_LIMIT_PRUNE_BATCH_SIZE", 1000, minimum=1)
SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9._-]{1,62}$")
HOSTNAME_LABEL_RE = re.compile(r"^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?$")
REV_RE = re.compile(r"^(null|[0-9a-fA-F]{1,40})$")
@@ -222,7 +225,6 @@ CSP_HEADER = (
"style-src 'self' 'unsafe-inline' https://cdnjs.cloudflare.com; "
"script-src 'self' 'unsafe-inline' https://cdnjs.cloudflare.com"
)
-AUTH_FAILURES = {}
REPO_INDEX_THREADS = set()
REPO_INDEX_LOCK = threading.Lock()
@@ -457,6 +459,13 @@ def init_db():
updated_at TEXT NOT NULL
);
+ CREATE TABLE IF NOT EXISTS auth_rate_limits (
+ rate_key TEXT PRIMARY KEY,
+ failure_count INTEGER NOT NULL,
+ reset_at REAL NOT NULL,
+ blocked_until REAL NOT NULL
+ );
+
CREATE INDEX IF NOT EXISTS idx_repositories_owner ON repositories(owner_id);
CREATE INDEX IF NOT EXISTS idx_issues_repo_number ON issues(repo_id, number);
CREATE INDEX IF NOT EXISTS idx_issues_repo_status ON issues(repo_id, status);
@@ -469,6 +478,7 @@ def init_db():
CREATE INDEX IF NOT EXISTS idx_commit_comments_commit ON commit_comments(repo_id, commit_node);
CREATE INDEX IF NOT EXISTS idx_custom_domains_domain ON custom_domains(domain);
CREATE INDEX IF NOT EXISTS idx_custom_domains_user ON custom_domains(user_id);
+ CREATE INDEX IF NOT EXISTS idx_auth_rate_limits_expiry ON auth_rate_limits(reset_at, blocked_until);
"""
)
ensure_user_profile_columns(conn)
@@ -476,6 +486,7 @@ def init_db():
ensure_repository_pages_columns(conn)
ensure_pull_request_ref_columns(conn)
ensure_repo_metadata_table(conn)
+ ensure_auth_rate_limits_table(conn)
def ensure_user_profile_columns(conn):
@@ -545,6 +556,22 @@ def ensure_repo_metadata_table(conn):
)
+def ensure_auth_rate_limits_table(conn):
+ conn.execute(
+ """
+ CREATE TABLE IF NOT EXISTS auth_rate_limits (
+ rate_key TEXT PRIMARY KEY,
+ failure_count INTEGER NOT NULL,
+ reset_at REAL NOT NULL,
+ blocked_until REAL NOT NULL
+ )
+ """
+ )
+ conn.execute(
+ "CREATE INDEX IF NOT EXISTS idx_auth_rate_limits_expiry ON auth_rate_limits(reset_at, blocked_until)"
+ )
+
+
def normalize_slug(value, label):
slug = (value or "").strip().lower()
if not SLUG_RE.match(slug):
@@ -696,45 +723,94 @@ def browser_post_size_limit():
return MAX_FORM_BYTES
-def auth_rate_key(kind, identifier=""):
+def auth_rate_key(kind, identifier="", remote_addr=None):
identifier = (identifier or "").strip().lower()[:100]
- remote_addr = request.environ.get("REMOTE_ADDR", "")
+ if remote_addr is None:
+ remote_addr = request.environ.get("REMOTE_ADDR", "")
return f"{kind}:{remote_addr}:{identifier}"
-def rate_limit_blocked(kind, identifier=""):
+def rate_limit_blocked(kind, identifier="", remote_addr=None):
if not RATE_LIMIT_ENABLED:
return False
now = time.time()
- prune_auth_failures(now)
- record = AUTH_FAILURES.get(auth_rate_key(kind, identifier))
- return bool(record and record.get("blocked_until", 0) > now)
+ key = auth_rate_key(kind, identifier, remote_addr=remote_addr)
+ with db_connect() as conn:
+ record = conn.execute(
+ "SELECT blocked_until FROM auth_rate_limits WHERE rate_key = ?",
+ (key,),
+ ).fetchone()
+ return bool(record and record["blocked_until"] > now)
-def prune_auth_failures(now=None):
+def prune_auth_failures(now=None, conn=None, limit=None):
now = now or time.time()
- for key, record in list(AUTH_FAILURES.items()):
- if record.get("reset_at", 0) <= now and record.get("blocked_until", 0) <= now:
- AUTH_FAILURES.pop(key, None)
+ limit = limit or AUTH_RATE_LIMIT_PRUNE_BATCH_SIZE
+ if conn is None:
+ with db_connect() as conn:
+ prune_auth_failures(now, conn=conn, limit=limit)
+ return
+ conn.execute(
+ """
+ DELETE FROM auth_rate_limits
+ WHERE rowid IN (
+ SELECT rowid
+ FROM auth_rate_limits
+ WHERE reset_at <= ? AND blocked_until <= ?
+ ORDER BY reset_at
+ LIMIT ?
+ )
+ """,
+ (now, now, limit),
+ )
-def record_auth_failure(kind, identifier=""):
+def record_auth_failure(kind, identifier="", remote_addr=None):
if not RATE_LIMIT_ENABLED:
return
now = time.time()
- prune_auth_failures(now)
- key = auth_rate_key(kind, identifier)
- record = AUTH_FAILURES.get(key)
- if not record or record.get("reset_at", 0) <= now:
- record = {"count": 0, "reset_at": now + RATE_LIMIT_WINDOW_SECONDS, "blocked_until": 0}
- record["count"] += 1
- if record["count"] >= RATE_LIMIT_MAX_FAILURES:
- record["blocked_until"] = now + RATE_LIMIT_COOLDOWN_SECONDS
- AUTH_FAILURES[key] = record
+ key = auth_rate_key(kind, identifier, remote_addr=remote_addr)
+ with db_connect() as conn:
+ conn.execute("BEGIN IMMEDIATE")
+ prune_auth_failures(now, conn=conn)
+ record = conn.execute(
+ """
+ SELECT failure_count, reset_at, blocked_until
+ FROM auth_rate_limits
+ WHERE rate_key = ?
+ """,
+ (key,),
+ ).fetchone()
+ if not record or record["reset_at"] <= now:
+ failure_count = 0
+ reset_at = now + RATE_LIMIT_WINDOW_SECONDS
+ blocked_until = 0
+ else:
+ failure_count = record["failure_count"]
+ reset_at = record["reset_at"]
+ blocked_until = record["blocked_until"]
+ failure_count += 1
+ if failure_count >= RATE_LIMIT_MAX_FAILURES:
+ blocked_until = now + RATE_LIMIT_COOLDOWN_SECONDS
+ conn.execute(
+ """
+ INSERT INTO auth_rate_limits (rate_key, failure_count, reset_at, blocked_until)
+ VALUES (?, ?, ?, ?)
+ ON CONFLICT(rate_key) DO UPDATE SET
+ failure_count = excluded.failure_count,
+ reset_at = excluded.reset_at,
+ blocked_until = excluded.blocked_until
+ """,
+ (key, failure_count, reset_at, blocked_until),
+ )
-def clear_auth_failures(kind, identifier=""):
- AUTH_FAILURES.pop(auth_rate_key(kind, identifier), None)
+def clear_auth_failures(kind, identifier="", remote_addr=None):
+ with db_connect() as conn:
+ conn.execute(
+ "DELETE FROM auth_rate_limits WHERE rate_key = ?",
+ (auth_rate_key(kind, identifier, remote_addr=remote_addr),),
+ )
def too_many_requests_response():
@@ -945,6 +1021,7 @@ def search_public_repos(query, limit=10):
query = (query or "").strip()[:100]
if not query:
return []
+ result_limit = max(1, min(int(limit), 25))
with db_connect() as conn:
repos = conn.execute(
@@ -960,7 +1037,9 @@ def search_public_repos(query, limit=10):
FROM repositories
JOIN users ON users.id = repositories.owner_id
ORDER BY repositories.updated_at DESC
- """
+ LIMIT ?
+ """,
+ (max(REPO_SEARCH_CANDIDATE_LIMIT, result_limit),),
).fetchall()
matches = []
@@ -969,7 +1048,7 @@ def search_public_repos(query, limit=10):
if score is not None:
matches.append((score, repo))
matches.sort(key=lambda match: (match[0], match[1]["name"], match[1]["owner_username"]))
- return [repo_search_result(repo) for _, repo in matches[: max(1, min(int(limit), 25))]]
+ return [repo_search_result(repo) for _, repo in matches[:result_limit]]
def text_preview(value, limit=180):
@@ -1019,7 +1098,8 @@ def normalize_activity_action(row):
return action
-def list_activity_repositories():
+def list_activity_repositories(limit=ACTIVITY_REPO_SCAN_LIMIT):
+ limit = max(1, min(int(limit), ACTIVITY_REPO_SCAN_LIMIT))
with db_connect() as conn:
return conn.execute(
"""
@@ -1027,7 +1107,9 @@ def list_activity_repositories():
FROM repositories
JOIN users ON users.id = repositories.owner_id
ORDER BY repositories.created_at DESC
- """
+ LIMIT ?
+ """,
+ (limit,),
).fetchall()
diff --git a/tests/test_app.py b/tests/test_app.py
index 7638fab..e46efa6 100644
--- a/tests/test_app.py
+++ b/tests/test_app.py
@@ -195,7 +195,6 @@ def login_client(client, username, password="correct horse battery staple", next
def isolated_app(tmp_path, monkeypatch):
monkeypatch.setattr(gitman, "DB_PATH", tmp_path / "gitman.sqlite3")
monkeypatch.setattr(gitman, "REPO_ROOT", tmp_path / "repos")
- gitman.AUTH_FAILURES.clear()
gitman.init_db()
return gitman
@@ -549,6 +548,42 @@ def test_index_public_repo_search_finds_repositories_by_fuzzy_name(isolated_app)
assert json.loads(empty_response.text)["results"] == []
+def test_public_repo_search_uses_bounded_candidate_set(isolated_app, monkeypatch):
+ monkeypatch.setattr(gitman, "REPO_SEARCH_CANDIDATE_LIMIT", 1)
+ owner = create_user("alice")
+ isolated_app.create_repository(owner, "needle", "")
+ isolated_app.create_repository(owner, "newer", "")
+ with isolated_app.db_connect() as conn:
+ conn.execute("UPDATE repositories SET updated_at = ? WHERE name = ?", ("2020-01-01T00:00:00Z", "needle"))
+ conn.execute("UPDATE repositories SET updated_at = ? WHERE name = ?", ("2030-01-01T00:00:00Z", "newer"))
+
+ assert isolated_app.search_public_repos("needle", limit=1) == []
+
+ monkeypatch.setattr(gitman, "REPO_SEARCH_CANDIDATE_LIMIT", 2)
+ assert [result["full_name"] for result in isolated_app.search_public_repos("needle", limit=1)] == ["alice/needle"]
+
+
+def test_commit_activity_scans_bounded_repo_set(isolated_app, monkeypatch):
+ monkeypatch.setattr(gitman, "ACTIVITY_REPO_SCAN_LIMIT", 1)
+ owner = create_user("alice")
+ isolated_app.create_repository(owner, "old", "")
+ isolated_app.create_repository(owner, "new", "")
+ with isolated_app.db_connect() as conn:
+ conn.execute("UPDATE repositories SET created_at = ? WHERE name = ?", ("2020-01-01T00:00:00Z", "old"))
+ conn.execute("UPDATE repositories SET created_at = ? WHERE name = ?", ("2030-01-01T00:00:00Z", "new"))
+
+ scanned = []
+
+ def fake_commit_log(path, limit):
+ scanned.append(path.name)
+ return []
+
+ monkeypatch.setattr(gitman, "commit_log", fake_commit_log)
+
+ assert isolated_app.list_commit_activity_actions(50) == []
+ assert scanned == ["new"]
+
+
def test_csrf_required_for_browser_posts_and_git_is_exempt(isolated_app):
owner = create_user("alice", password="owner-password")
isolated_app.create_repository(owner, "demo", "")
@@ -595,7 +630,6 @@ def test_login_and_git_auth_failures_are_rate_limited(isolated_app, monkeypatch)
assert response.status_code == 429
assert response.header("Retry-After") == "60"
- gitman.AUTH_FAILURES.clear()
owner = gitman.get_user_by_username("alice")
isolated_app.create_repository(owner, "demo", "")
for _ in range(2):
@@ -699,12 +733,111 @@ def test_init_db_creates_expected_tables_and_is_idempotent(isolated_app):
repo_columns = {row["name"] for row in conn.execute("PRAGMA table_info(repositories)")}
pr_columns = {row["name"] for row in conn.execute("PRAGMA table_info(pull_requests)")}
- assert {"users", "repositories", "issues", "pull_requests", "repo_stars", "custom_domains"}.issubset(tables)
+ assert {
+ "users",
+ "repositories",
+ "issues",
+ "pull_requests",
+ "repo_stars",
+ "custom_domains",
+ "auth_rate_limits",
+ }.issubset(tables)
assert {"display_name", "bio", "website"}.issubset(user_columns)
assert {"forked_from_repo_id", "forked_at", "forked_from_node", "pages_docs_enabled"}.issubset(repo_columns)
assert {"target_ref_type", "target_ref_name", "source_ref_type", "source_ref_name"}.issubset(pr_columns)
+def test_auth_rate_limit_failures_are_shared_between_workers(isolated_app, monkeypatch):
+ monkeypatch.setattr(gitman, "RATE_LIMIT_MAX_FAILURES", 2)
+ monkeypatch.setattr(gitman, "RATE_LIMIT_COOLDOWN_SECONDS", 60)
+ repo_root = Path(__file__).resolve().parents[1]
+ env = os.environ.copy()
+ existing_pythonpath = env.get("PYTHONPATH")
+ env["PYTHONPATH"] = str(repo_root) if not existing_pythonpath else f"{repo_root}{os.pathsep}{existing_pythonpath}"
+
+ worker_script = """
+import os
+import sys
+
+os.environ["GITMAN_DB"] = sys.argv[1]
+os.environ["GITMAN_REPO_ROOT"] = sys.argv[2]
+os.environ["SECRET_KEY"] = "test-secret"
+
+import app as gitman
+
+gitman.RATE_LIMIT_MAX_FAILURES = int(sys.argv[3])
+gitman.record_auth_failure("login", "alice", remote_addr="127.0.0.1")
+"""
+
+ processes = [
+ subprocess.Popen(
+ [
+ sys.executable,
+ "-c",
+ worker_script,
+ str(isolated_app.DB_PATH),
+ str(isolated_app.REPO_ROOT),
+ str(gitman.RATE_LIMIT_MAX_FAILURES),
+ ],
+ cwd=repo_root,
+ env=env,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ text=True,
+ )
+ for _ in range(2)
+ ]
+
+ results = []
+ try:
+ for process in processes:
+ stdout, stderr = process.communicate(timeout=15)
+ results.append((process.returncode, stdout, stderr))
+ finally:
+ for process in processes:
+ if process.poll() is None:
+ process.kill()
+
+ for returncode, stdout, stderr in results:
+ assert returncode == 0, stdout + stderr
+
+ assert gitman.rate_limit_blocked("login", "alice", remote_addr="127.0.0.1")
+
+
+def test_auth_rate_limit_cleanup_is_bounded(isolated_app, monkeypatch):
+ monkeypatch.setattr(gitman, "AUTH_RATE_LIMIT_PRUNE_BATCH_SIZE", 2)
+ active_until = 10**12
+ with isolated_app.db_connect() as conn:
+ for index in range(3):
+ conn.execute(
+ """
+ INSERT INTO auth_rate_limits (rate_key, failure_count, reset_at, blocked_until)
+ VALUES (?, ?, ?, ?)
+ """,
+ (f"expired:{index}", 1, 1, 1),
+ )
+ conn.execute(
+ """
+ INSERT INTO auth_rate_limits (rate_key, failure_count, reset_at, blocked_until)
+ VALUES (?, ?, ?, ?)
+ """,
+ ("active", 1, active_until, active_until),
+ )
+
+ gitman.record_auth_failure("login", "alice", remote_addr="127.0.0.1")
+
+ with isolated_app.db_connect() as conn:
+ expired_count = conn.execute(
+ "SELECT COUNT(*) FROM auth_rate_limits WHERE rate_key LIKE 'expired:%'"
+ ).fetchone()[0]
+ active_count = conn.execute(
+ "SELECT COUNT(*) FROM auth_rate_limits WHERE rate_key = 'active'"
+ ).fetchone()[0]
+
+ assert expired_count == 1
+ assert active_count == 1
+
+
def test_db_connect_configures_sqlite_for_worker_contention(isolated_app):
isolated_app.init_db()