2023-05-01 22:19:34 -04:00
|
|
|
# Start the server with:
|
|
|
|
#
|
|
|
|
# uvicorn tools.gh_moderator:app --reload --port 59523
|
2023-05-09 10:46:23 -04:00
|
|
|
#
|
|
|
|
# pip install openai pygithub fastapi uvicorn hypy_utils
|
2023-05-01 22:19:34 -04:00
|
|
|
|
|
|
|
import hashlib
|
|
|
|
import hmac
|
|
|
|
import json
|
|
|
|
import unicodedata
|
|
|
|
from datetime import datetime
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
import openai
|
2023-05-09 10:46:23 -04:00
|
|
|
import tomllib as toml
|
2023-05-01 22:19:34 -04:00
|
|
|
from fastapi import FastAPI, Request, Response
|
|
|
|
from github import Github
|
|
|
|
from hypy_utils import write, json_stringify
|
|
|
|
from hypy_utils.logging_utils import setup_logger
|
|
|
|
from openai.openai_object import OpenAIObject
|
|
|
|
|
|
|
|
from hyfetch.color_util import printc
|
|
|
|
|
|
|
|
log = setup_logger()
|
|
|
|
|
|
|
|
|
|
|
|
def read_config():
|
2023-05-09 10:46:23 -04:00
|
|
|
with open(Path.home() / ".config/gh_moderator.toml", "rb") as f:
|
2023-05-01 22:19:34 -04:00
|
|
|
return toml.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
app = FastAPI()
|
|
|
|
config = read_config()
|
|
|
|
webhook_secret = bytes(config["webhook_secret"], "utf-8")
|
|
|
|
|
|
|
|
gh = Github(per_page=100, login_or_token=config["gh_token"])
|
|
|
|
me = gh.get_user()
|
|
|
|
repo = gh.get_repo(config["gh_repo"])
|
|
|
|
printc(f"&a[+] Logged in as {me.login}")
|
|
|
|
|
|
|
|
harm_classifier_url, harm_classifier_token = config["harm_classifier_url"], config["harm_classifier_token"]
|
|
|
|
|
|
|
|
script_path = Path(__file__).parent
|
|
|
|
supported_events = ["issue_comment", "issues", "pull_request", "pull_request_review_comment"]
|
|
|
|
ai_notice = f"If you think this is a false-positive, please contact the owner of this repo."
|
|
|
|
|
|
|
|
openai.organization = config['OpenAI']['org']
|
|
|
|
openai.api_key = config['OpenAI']['key']
|
|
|
|
openai_model = config['OpenAI']['model']
|
|
|
|
|
|
|
|
|
|
|
|
def get_content(event: str, obj: dict) -> str:
|
|
|
|
# Get the content of the event
|
|
|
|
match event:
|
|
|
|
case "issue_comment" | "pull_request_review_comment":
|
|
|
|
return obj["comment"]["body"]
|
|
|
|
case "issues":
|
|
|
|
return obj["issue"]["title"] + "\n\n" + obj["issue"]["body"]
|
|
|
|
case "pull_request":
|
|
|
|
return obj["pull_request"]["title"] + "\n\n" + obj["pull_request"]["body"]
|
|
|
|
|
|
|
|
|
|
|
|
def redact(event: str, obj: dict, id: str, reason: str):
|
|
|
|
"""
|
|
|
|
Redact the event
|
|
|
|
"""
|
|
|
|
printc(f"&c[!] Redacting {event} {id} for {reason}.")
|
|
|
|
tail = f"\n\n> Reason: {reason}\n> {ai_notice}"
|
|
|
|
redact_notice = f"[Redacted by [AI Content Moderator]({me.html_url})]{tail}"
|
|
|
|
|
|
|
|
match event:
|
|
|
|
case "issue_comment":
|
|
|
|
# Redact the comment
|
|
|
|
comment = repo.get_issue(obj["issue"]["number"]).get_comment(obj["comment"]["id"])
|
|
|
|
comment.edit(body=redact_notice)
|
|
|
|
|
|
|
|
case "pull_request_review_comment":
|
|
|
|
# Redact the comment
|
|
|
|
comment = repo.get_pull(obj["pull_request"]["number"]).get_review_comment(obj["comment"]["id"])
|
|
|
|
comment.edit(body=redact_notice)
|
|
|
|
|
|
|
|
case "issues" | "pull_request":
|
|
|
|
# Close the issue
|
|
|
|
iss = repo.get_issue(obj["issue"]["number"])
|
|
|
|
iss.edit(title="[Redacted]", body=redact_notice, state="closed")
|
|
|
|
iss.create_comment(f"Issue closed for potentially offensive content.{tail}")
|
|
|
|
iss.lock("spam")
|
|
|
|
|
|
|
|
|
|
|
|
async def process_event(event: str, obj: dict, id: str):
|
|
|
|
# Preliminary checks
|
|
|
|
if event not in supported_events:
|
|
|
|
printc(f"&7[-] Unknown event: {event}")
|
|
|
|
return
|
|
|
|
|
|
|
|
if obj['repository']['full_name'] != repo.full_name:
|
|
|
|
printc(f"&7[-] Unknown repository: {obj['repository']['full_name']}")
|
|
|
|
return
|
|
|
|
|
|
|
|
blacklist_users = {v for v in (script_path / "blacklist_users.csv").read_text().split("\n") if v}
|
|
|
|
actor = obj["sender"]["login"]
|
|
|
|
if actor == me.login:
|
|
|
|
printc(f"&7[-] Ignoring event by myself: {id} {event} by {actor}")
|
|
|
|
return
|
|
|
|
printc(f"&e[+] Received event: {id} {event} by {actor}")
|
|
|
|
|
|
|
|
if actor in blacklist_users:
|
2023-05-01 22:35:00 -04:00
|
|
|
redact(event, obj, id, "User is blacklisted")
|
2023-05-01 22:19:34 -04:00
|
|
|
return
|
|
|
|
|
|
|
|
# Normalize content
|
|
|
|
content = unicodedata.normalize("NFKC", get_content(event, obj))
|
|
|
|
|
|
|
|
# Ask OpenAI to predict if it's offensive
|
|
|
|
res: OpenAIObject = openai.Moderation.create(content, openai_model).results[0]
|
|
|
|
write(f"moderator-data/openai/{id}.json", json_stringify(res))
|
|
|
|
if res.flagged:
|
|
|
|
printc(f"\n&c[!] AI classified {event} {id} by {actor} as offensive !!!\n> Content: {content}\n\n")
|
|
|
|
reason = " | ".join(f"{k} {res.category_scores.get(k) * 100:.0f}%" for k, v in res.categories.items() if v)
|
|
|
|
redact(event, obj, id, f"Flagged by OpenAI : {reason}")
|
|
|
|
return
|
|
|
|
|
|
|
|
printc(f"&a[~] AI classified {event} {id} by {actor} as safe.")
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/")
|
|
|
|
async def handle_webhook(request: Request, response: Response):
|
|
|
|
# Read headers
|
|
|
|
event = request.headers.get("X-GitHub-Event")
|
|
|
|
signature = request.headers.get("X-Hub-Signature")
|
|
|
|
|
|
|
|
# Verify the signature
|
|
|
|
body = await request.body()
|
|
|
|
if not verify_signature(signature, body):
|
|
|
|
response.status_code = 401
|
|
|
|
return {"message": "Invalid signature"}
|
|
|
|
|
|
|
|
# Parse the event body
|
|
|
|
obj = json.loads(body.decode())
|
|
|
|
|
|
|
|
# Log the request
|
|
|
|
id = datetime.now().isoformat()
|
|
|
|
write(f"moderator-data/webhook/{id}-{event}.json", json_stringify(obj, indent=4))
|
|
|
|
|
|
|
|
await process_event(event, obj, id)
|
|
|
|
|
|
|
|
return {"message": "OK"}
|
|
|
|
|
|
|
|
|
|
|
|
# Helper function to verify the signature
|
|
|
|
def verify_signature(signature: str, payload: bytes) -> bool:
|
|
|
|
hash_type, signature = signature.split("=")
|
|
|
|
digest = hmac.new(webhook_secret, msg=payload, digestmod=getattr(hashlib, hash_type)).hexdigest()
|
|
|
|
return hmac.compare_digest(digest, signature)
|
|
|
|
|