[+] AI Github Content Moderator
This commit is contained in:
parent
4fe0890f21
commit
4225a0c453
2 changed files with 169 additions and 0 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -119,3 +119,4 @@ dmypy.json
|
|||
webhook-log
|
||||
start_moderation.sh
|
||||
gh_moderator.toml
|
||||
moderator-data
|
||||
|
|
168
tools/gh_moderator.py
Normal file
168
tools/gh_moderator.py
Normal file
|
@ -0,0 +1,168 @@
|
|||
# Start the server with:
|
||||
#
|
||||
# uvicorn tools.gh_moderator:app --reload --port 59523
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import time
|
||||
import unicodedata
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
import toml
|
||||
import openai
|
||||
from fastapi import FastAPI, Request, Response
|
||||
from github import Github
|
||||
from hypy_utils import write, json_stringify
|
||||
from hypy_utils.logging_utils import setup_logger
|
||||
from openai.openai_object import OpenAIObject
|
||||
|
||||
from hyfetch.color_util import printc
|
||||
|
||||
log = setup_logger()
|
||||
|
||||
|
||||
def read_config():
|
||||
with open(Path.home() / ".config/gh_moderator.toml") as f:
|
||||
return toml.load(f)
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
config = read_config()
|
||||
webhook_secret = bytes(config["webhook_secret"], "utf-8")
|
||||
|
||||
gh = Github(per_page=100, login_or_token=config["gh_token"])
|
||||
me = gh.get_user()
|
||||
repo = gh.get_repo(config["gh_repo"])
|
||||
printc(f"&a[+] Logged in as {me.login}")
|
||||
|
||||
harm_classifier_url, harm_classifier_token = config["harm_classifier_url"], config["harm_classifier_token"]
|
||||
|
||||
script_path = Path(__file__).parent
|
||||
supported_events = ["issue_comment", "issues", "pull_request", "pull_request_review_comment"]
|
||||
ai_notice = f"If you think this is a false-positive, please contact the owner of this repo."
|
||||
|
||||
openai.organization = config['OpenAI']['org']
|
||||
openai.api_key = config['OpenAI']['key']
|
||||
openai_model = config['OpenAI']['model']
|
||||
|
||||
|
||||
def get_content(event: str, obj: dict) -> str:
|
||||
# Get the content of the event
|
||||
match event:
|
||||
case "issue_comment" | "pull_request_review_comment":
|
||||
return obj["comment"]["body"]
|
||||
case "issues":
|
||||
return obj["issue"]["title"] + "\n\n" + obj["issue"]["body"]
|
||||
case "pull_request":
|
||||
return obj["pull_request"]["title"] + "\n\n" + obj["pull_request"]["body"]
|
||||
|
||||
|
||||
def redact(event: str, obj: dict, id: str, reason: str):
|
||||
"""
|
||||
Redact the event
|
||||
"""
|
||||
printc(f"&c[!] Redacting {event} {id} for {reason}.")
|
||||
tail = f"\n\n> Reason: {reason}\n> {ai_notice}"
|
||||
redact_notice = f"[Redacted by [AI Content Moderator]({me.html_url})]{tail}"
|
||||
|
||||
match event:
|
||||
case "issue_comment":
|
||||
# Redact the comment
|
||||
comment = repo.get_issue(obj["issue"]["number"]).get_comment(obj["comment"]["id"])
|
||||
comment.edit(body=redact_notice)
|
||||
|
||||
case "pull_request_review_comment":
|
||||
# Redact the comment
|
||||
comment = repo.get_pull(obj["pull_request"]["number"]).get_review_comment(obj["comment"]["id"])
|
||||
comment.edit(body=redact_notice)
|
||||
|
||||
case "issues" | "pull_request":
|
||||
# Close the issue
|
||||
iss = repo.get_issue(obj["issue"]["number"])
|
||||
iss.edit(title="[Redacted]", body=redact_notice, state="closed")
|
||||
iss.create_comment(f"Issue closed for potentially offensive content.{tail}")
|
||||
iss.lock("spam")
|
||||
|
||||
|
||||
async def process_event(event: str, obj: dict, id: str):
|
||||
# Preliminary checks
|
||||
if event not in supported_events:
|
||||
printc(f"&7[-] Unknown event: {event}")
|
||||
return
|
||||
|
||||
if obj['repository']['full_name'] != repo.full_name:
|
||||
printc(f"&7[-] Unknown repository: {obj['repository']['full_name']}")
|
||||
return
|
||||
|
||||
blacklist_users = {v for v in (script_path / "blacklist_users.csv").read_text().split("\n") if v}
|
||||
actor = obj["sender"]["login"]
|
||||
if actor == me.login:
|
||||
printc(f"&7[-] Ignoring event by myself: {id} {event} by {actor}")
|
||||
return
|
||||
printc(f"&e[+] Received event: {id} {event} by {actor}")
|
||||
|
||||
if actor in blacklist_users:
|
||||
redact(event, obj, "User is blacklisted", id)
|
||||
return
|
||||
|
||||
# Normalize content
|
||||
content = unicodedata.normalize("NFKC", get_content(event, obj))
|
||||
|
||||
# Ask chatgpt to predict if it's offensive
|
||||
# res: requests.Response = requests.post(harm_classifier_url, headers={"token": harm_classifier_token}, data=content)
|
||||
# if res.status_code != 200:
|
||||
# printc(f"&c[x] Error {res.status_code} from classifier: {res.text}")
|
||||
# return
|
||||
# clas = res.text
|
||||
#
|
||||
# # Check if it's offensive
|
||||
# if clas == "HARMFUL":
|
||||
# printc(f"\n&c[!] AI classified {event} {id} by {actor} as offensive !!!\n> Content: {content}\n\n")
|
||||
# redact(event, obj, id, "Flagged by a large language model.")
|
||||
# return
|
||||
|
||||
# Ask OpenAI to predict if it's offensive
|
||||
res: OpenAIObject = openai.Moderation.create(content, openai_model).results[0]
|
||||
write(f"moderator-data/openai/{id}.json", json_stringify(res))
|
||||
if res.flagged:
|
||||
printc(f"\n&c[!] AI classified {event} {id} by {actor} as offensive !!!\n> Content: {content}\n\n")
|
||||
reason = " | ".join(f"{k} {res.category_scores.get(k) * 100:.0f}%" for k, v in res.categories.items() if v)
|
||||
redact(event, obj, id, f"Flagged by OpenAI : {reason}")
|
||||
return
|
||||
|
||||
printc(f"&a[~] AI classified {event} {id} by {actor} as safe.")
|
||||
|
||||
|
||||
@app.post("/")
|
||||
async def handle_webhook(request: Request, response: Response):
|
||||
# Read headers
|
||||
event = request.headers.get("X-GitHub-Event")
|
||||
signature = request.headers.get("X-Hub-Signature")
|
||||
|
||||
# Verify the signature
|
||||
body = await request.body()
|
||||
if not verify_signature(signature, body):
|
||||
response.status_code = 401
|
||||
return {"message": "Invalid signature"}
|
||||
|
||||
# Parse the event body
|
||||
obj = json.loads(body.decode())
|
||||
|
||||
# Log the request
|
||||
id = datetime.now().isoformat()
|
||||
write(f"moderator-data/webhook/{id}-{event}.json", json_stringify(obj, indent=4))
|
||||
|
||||
await process_event(event, obj, id)
|
||||
|
||||
return {"message": "OK"}
|
||||
|
||||
|
||||
# Helper function to verify the signature
|
||||
def verify_signature(signature: str, payload: bytes) -> bool:
|
||||
hash_type, signature = signature.split("=")
|
||||
digest = hmac.new(webhook_secret, msg=payload, digestmod=getattr(hashlib, hash_type)).hexdigest()
|
||||
return hmac.compare_digest(digest, signature)
|
||||
|
Loading…
Reference in a new issue