[+] AI Github Content Moderator

2023-05-01 22:19:34 -04:00 · 2023-05-01 22:19:34 -04:00 · 4225a0c453
commit 4225a0c453
parent 4fe0890f21
2 changed files with 169 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -119,3 +119,4 @@ dmypy.json
 webhook-log
 start_moderation.sh
 gh_moderator.toml
 moderator-data
--- a/tools/gh_moderator.py
+++ b/tools/gh_moderator.py
@ -0,0 +1,168 @@
 # Start the server with:
 #
 # uvicorn tools.gh_moderator:app --reload --port 59523
 import hashlib
 import hmac
 import json
 import time
 import unicodedata
 from datetime import datetime
 from pathlib import Path
 import requests
 import toml
 import openai
 from fastapi import FastAPI, Request, Response
 from github import Github
 from hypy_utils import write, json_stringify
 from hypy_utils.logging_utils import setup_logger
 from openai.openai_object import OpenAIObject
 from hyfetch.color_util import printc
 log = setup_logger()
 def read_config():
    with open(Path.home() / ".config/gh_moderator.toml") as f:
        return toml.load(f)
 app = FastAPI()
 config = read_config()
 webhook_secret = bytes(config["webhook_secret"], "utf-8")
 gh = Github(per_page=100, login_or_token=config["gh_token"])
 me = gh.get_user()
 repo = gh.get_repo(config["gh_repo"])
 printc(f"&a[+] Logged in as {me.login}")
 harm_classifier_url, harm_classifier_token = config["harm_classifier_url"], config["harm_classifier_token"]
 script_path = Path(__file__).parent
 supported_events = ["issue_comment", "issues", "pull_request", "pull_request_review_comment"]
 ai_notice = f"If you think this is a false-positive, please contact the owner of this repo."
 openai.organization = config['OpenAI']['org']
 openai.api_key = config['OpenAI']['key']
 openai_model = config['OpenAI']['model']
 def get_content(event: str, obj: dict) -> str:
    # Get the content of the event
    match event:
        case "issue_comment" | "pull_request_review_comment":
            return obj["comment"]["body"]
        case "issues":
            return obj["issue"]["title"] + "\n\n" + obj["issue"]["body"]
        case "pull_request":
            return obj["pull_request"]["title"] + "\n\n" + obj["pull_request"]["body"]
 def redact(event: str, obj: dict, id: str, reason: str):
    """
    Redact the event
    """
    printc(f"&c[!] Redacting {event} {id} for {reason}.")
    tail = f"\n\n> Reason: {reason}\n> {ai_notice}"
    redact_notice = f"[Redacted by [AI Content Moderator]({me.html_url})]{tail}"
    match event:
        case "issue_comment":
            # Redact the comment
            comment = repo.get_issue(obj["issue"]["number"]).get_comment(obj["comment"]["id"])
            comment.edit(body=redact_notice)
        case "pull_request_review_comment":
            # Redact the comment
            comment = repo.get_pull(obj["pull_request"]["number"]).get_review_comment(obj["comment"]["id"])
            comment.edit(body=redact_notice)
        case "issues" | "pull_request":
            # Close the issue
            iss = repo.get_issue(obj["issue"]["number"])
            iss.edit(title="[Redacted]", body=redact_notice, state="closed")
            iss.create_comment(f"Issue closed for potentially offensive content.{tail}")
            iss.lock("spam")
 async def process_event(event: str, obj: dict, id: str):
    # Preliminary checks
    if event not in supported_events:
        printc(f"&7[-] Unknown event: {event}")
        return
    if obj['repository']['full_name'] != repo.full_name:
        printc(f"&7[-] Unknown repository: {obj['repository']['full_name']}")
        return
    blacklist_users = {v for v in (script_path / "blacklist_users.csv").read_text().split("\n") if v}
    actor = obj["sender"]["login"]
    if actor == me.login:
        printc(f"&7[-] Ignoring event by myself: {id} {event} by {actor}")
        return
    printc(f"&e[+] Received event: {id} {event} by {actor}")
    if actor in blacklist_users:
        redact(event, obj, "User is blacklisted", id)
        return
    # Normalize content
    content = unicodedata.normalize("NFKC", get_content(event, obj))
    # Ask chatgpt to predict if it's offensive
    # res: requests.Response = requests.post(harm_classifier_url, headers={"token": harm_classifier_token}, data=content)
    # if res.status_code != 200:
    #     printc(f"&c[x] Error {res.status_code} from classifier: {res.text}")
    #     return
    # clas = res.text
    #
    # # Check if it's offensive
    # if clas == "HARMFUL":
    #     printc(f"\n&c[!] AI classified {event} {id} by {actor} as offensive !!!\n> Content: {content}\n\n")
    #     redact(event, obj, id, "Flagged by a large language model.")
    #     return
    # Ask OpenAI to predict if it's offensive
    res: OpenAIObject = openai.Moderation.create(content, openai_model).results[0]
    write(f"moderator-data/openai/{id}.json", json_stringify(res))
    if res.flagged:
        printc(f"\n&c[!] AI classified {event} {id} by {actor} as offensive !!!\n> Content: {content}\n\n")
        reason = " | ".join(f"{k} {res.category_scores.get(k) * 100:.0f}%" for k, v in res.categories.items() if v)
        redact(event, obj, id, f"Flagged by OpenAI : {reason}")
        return
    printc(f"&a[~] AI classified {event} {id} by {actor} as safe.")
@app.post("/")
 async def handle_webhook(request: Request, response: Response):
    # Read headers
    event = request.headers.get("X-GitHub-Event")
    signature = request.headers.get("X-Hub-Signature")
    # Verify the signature
    body = await request.body()
    if not verify_signature(signature, body):
        response.status_code = 401
        return {"message": "Invalid signature"}
    # Parse the event body
    obj = json.loads(body.decode())
    # Log the request
    id = datetime.now().isoformat()
    write(f"moderator-data/webhook/{id}-{event}.json", json_stringify(obj, indent=4))
    await process_event(event, obj, id)
    return {"message": "OK"}
 # Helper function to verify the signature
 def verify_signature(signature: str, payload: bytes) -> bool:
    hash_type, signature = signature.split("=")
    digest = hmac.new(webhook_secret, msg=payload, digestmod=getattr(hashlib, hash_type)).hexdigest()
    return hmac.compare_digest(digest, signature)