♻️ Refactor moderation code into a feature (#1614)

This commit is contained in:
Luke Vella 2025-03-05 12:31:22 +00:00 committed by GitHub
parent 574097710b
commit b1a86769b2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 186 additions and 77 deletions

View file

@ -69,11 +69,16 @@ export const env = createEnv({
* OpenAI Configuration for AI moderation
*/
OPENAI_API_KEY: z.string().optional(),
/**
* Enable or disable content moderation
* @default "false"
*/
MODERATION_ENABLED: z.enum(["true", "false"]).default("false"),
},
/*
* Environment variables available on the client (and server).
*
* 💡 You'll get type errors if these are not prefixed with NEXT_PUBLIC_.
* You'll get type errors if these are not prefixed with NEXT_PUBLIC_.
*/
client: {
NEXT_PUBLIC_POSTHOG_API_KEY: z.string().optional(),
@ -84,7 +89,7 @@ export const env = createEnv({
* Due to how Next.js bundles environment variables on Edge and Client,
* we need to manually destructure them to make sure all are included in bundle.
*
* 💡 You'll get type errors if not all variables from `server` & `client` are included here.
* You'll get type errors if not all variables from `server` & `client` are included here.
*/
runtimeEnv: {
DATABASE_URL: process.env.DATABASE_URL,
@ -119,6 +124,7 @@ export const env = createEnv({
NOREPLY_EMAIL: process.env.NOREPLY_EMAIL,
NOREPLY_EMAIL_NAME: process.env.NOREPLY_EMAIL_NAME,
OPENAI_API_KEY: process.env.OPENAI_API_KEY,
MODERATION_ENABLED: process.env.MODERATION_ENABLED,
},
skipValidation: !!process.env.SKIP_ENV_VALIDATION,
});

View file

@ -0,0 +1,86 @@
import { env } from "@/env";
import { moderateContentWithAI } from "./libs/ai-moderation";
import { containsSuspiciousPatterns } from "./libs/pattern-moderation";
/**
* Log the moderation status at initialization
* This function is automatically called when this module is imported
*/
function initModeration() {
if (env.MODERATION_ENABLED === "true") {
if (env.OPENAI_API_KEY) {
console.info("✅ Content moderation is ENABLED with AI support");
} else {
console.info(
"⚠️ Content moderation is ENABLED but missing OPENAI_API_KEY - AI moderation will be skipped",
);
}
} else {
console.info(" Content moderation is DISABLED");
}
}
// Initialize moderation and log status
initModeration();
/**
* Moderates content to detect spam, inappropriate content, or abuse
* Uses a two-layer approach:
* 1. Pattern-based detection for common spam patterns
* 2. AI-based moderation for more sophisticated content analysis
*
* @param content Array of strings to moderate (can include undefined values which will be filtered out)
* @returns True if the content is flagged as inappropriate, false otherwise
*/
export async function moderateContent(content: Array<string | undefined>) {
// Skip moderation if the feature is disabled in environment
if (env.MODERATION_ENABLED !== "true") {
return false;
}
// Check if OpenAI API key is available
if (!env.OPENAI_API_KEY) {
console.warn(
"Content moderation is enabled but OPENAI_API_KEY is not set. AI-based moderation will be skipped.",
);
return false;
}
const textToModerate = content.filter(Boolean).join("\n");
// Log that moderation is being performed (without logging the actual content)
console.info(
`🔍 Performing content moderation check (content length: ${textToModerate.length} chars)`,
);
// First check for suspicious patterns (faster)
const hasSuspiciousPatterns = containsSuspiciousPatterns(textToModerate);
// If suspicious patterns are found, perform AI moderation
if (hasSuspiciousPatterns) {
console.info(
"⚠️ Suspicious patterns detected, performing AI moderation check",
);
try {
const isFlagged = await moderateContentWithAI(textToModerate);
if (isFlagged) {
console.warn("🚫 Content flagged by AI moderation");
}
return isFlagged;
} catch (error) {
console.error("Error during AI content moderation:", error);
return false;
}
}
return false;
}
/**
* Helper function to check if moderation is enabled
* @returns True if moderation is enabled, false otherwise
*/
export function isModerationEnabled(): boolean {
return env.MODERATION_ENABLED === "true";
}

View file

@ -0,0 +1,28 @@
import { openai } from "@ai-sdk/openai";
import { generateText } from "ai";
/**
* Moderates content using OpenAI's GPT-4 to detect inappropriate content
* @param text The text to moderate
* @returns True if the content is flagged as inappropriate, false otherwise
*/
export async function moderateContentWithAI(text: string) {
try {
const result = await generateText({
model: openai("gpt-4-turbo"),
messages: [
{
role: "system",
content:
"You are a content moderator. Analyze the following text and determine if it is attempting to misuse the app to advertise illegal drugs, prostitution, or promote illegal gambling and other illicit activities. Respond with 'FLAGGED' if detected, otherwise 'SAFE'.",
},
{ role: "user", content: text },
],
});
return result.text.includes("FLAGGED");
} catch (err) {
console.error(`❌ AI moderation failed:`, err);
return false;
}
}

View file

@ -1,27 +1,10 @@
import { openai } from "@ai-sdk/openai";
import { generateText } from "ai";
import { env } from "@/env";
async function moderateContentWithAI(text: string) {
const result = await generateText({
model: openai("gpt-4-turbo"),
messages: [
{
role: "system",
content:
"You are a content moderator. Analyze the following text and determine if it is attempting to misuse the app to advertise illegal drugs, prostitution, or promote illegal gambling and other illicit activities. Respond with 'FLAGGED' if detected, otherwise 'SAFE'.",
},
{ role: "user", content: text },
],
});
return result.text.trim() === "FLAGGED";
}
// Custom pattern-based checks
function containsSuspiciousPatterns(text: string) {
if (!text) return false;
/**
* Checks if the provided text contains suspicious patterns that might indicate spam or abuse
* @param text The text to check for suspicious patterns
* @returns True if suspicious patterns are detected, false otherwise
*/
export function containsSuspiciousPatterns(text: string) {
if (!text.trim()) return false;
// Define all patterns
const repetitiveCharsPattern = /(.)\1{4,}/;
@ -59,25 +42,3 @@ function containsSuspiciousPatterns(text: string) {
suspiciousUnicodePattern.test(text)
);
}
export async function moderateContent(...content: Array<string | undefined>) {
if (!env.OPENAI_API_KEY) {
console.info("OPENAI_API_KEY not set, skipping moderation");
return false;
}
const textToModerate = content.filter(Boolean).join("\n");
const hasSuspiciousPatterns = containsSuspiciousPatterns(textToModerate);
if (hasSuspiciousPatterns) {
try {
return moderateContentWithAI(textToModerate);
} catch (error) {
console.error(error);
return false;
}
}
return false;
}

View file

@ -8,8 +8,8 @@ import dayjs from "dayjs";
import * as ics from "ics";
import { z } from "zod";
import { moderateContent } from "@/features/moderation";
import { getEmailClient } from "@/utils/emails";
import { moderateContent } from "@/utils/moderation";
import { getTimeZoneAbbreviation } from "../../utils/date";
import {
@ -131,8 +131,6 @@ export const polls = router({
// START LEGACY ROUTES
create: possiblyPublicProcedure
.use(createRateLimitMiddleware("create_poll", 10, "1 h"))
.use(requireUserMiddleware)
.input(
z.object({
title: z.string().trim().min(1),
@ -149,29 +147,38 @@ export const polls = router({
endDate: z.string().optional(),
})
.array(),
demo: z.boolean().optional(),
}),
)
.use(requireUserMiddleware)
.use(createRateLimitMiddleware("create_poll", 10, "1 h"))
.use(async ({ ctx, input, next }) => {
const isFlaggedContent = await moderateContent([
input.title,
input.description,
input.location,
]);
if (isFlaggedContent) {
posthog?.capture({
distinctId: ctx.user.id,
event: "flagged_content",
properties: {
action: "create_poll",
},
});
throw new TRPCError({
code: "BAD_REQUEST",
message: "Inappropriate content",
});
}
return next();
})
.mutation(async ({ ctx, input }) => {
const adminToken = nanoid();
const participantUrlId = nanoid();
const pollId = nanoid();
const isFlaggedContent = await moderateContent(
input.title,
input.description,
);
if (isFlaggedContent) {
console.warn(
`User ${ctx.user.id} attempted to create flagged content: ${input.title}`,
);
throw new TRPCError({
code: "BAD_REQUEST",
message: "Content is flagged as spam",
});
}
const poll = await prisma.poll.create({
select: {
adminUrlId: true,
@ -249,7 +256,6 @@ export const polls = router({
return { id: poll.id };
}),
update: possiblyPublicProcedure
.use(requireUserMiddleware)
.input(
z.object({
urlId: z.string(),
@ -266,23 +272,34 @@ export const polls = router({
requireParticipantEmail: z.boolean().optional(),
}),
)
.mutation(async ({ ctx, input }) => {
const pollId = await getPollIdFromAdminUrlId(input.urlId);
const isFlaggedContent = await moderateContent(
.use(requireUserMiddleware)
.use(createRateLimitMiddleware("update_poll", 5, "1 m"))
.use(async ({ ctx, input, next }) => {
const isFlaggedContent = await moderateContent([
input.title,
input.description,
);
input.location,
]);
if (isFlaggedContent) {
console.warn(
`User ${ctx.user.id} attempted to create flagged content: ${input.title}`,
);
posthog?.capture({
distinctId: ctx.user.id,
event: "flagged_content",
properties: {
action: "update_poll",
},
});
throw new TRPCError({
code: "BAD_REQUEST",
message: "Content is flagged as spam",
message: "Inappropriate content",
});
}
return next();
})
.mutation(async ({ input }) => {
const pollId = await getPollIdFromAdminUrlId(input.urlId);
if (input.optionsToDelete && input.optionsToDelete.length > 0) {
await prisma.option.deleteMany({
where: {

View file

@ -1,4 +1,5 @@
import { prisma } from "@rallly/database";
import { posthog } from "@rallly/posthog/server";
import { initTRPC, TRPCError } from "@trpc/server";
import { Ratelimit } from "@upstash/ratelimit";
import { kv } from "@vercel/kv";
@ -140,6 +141,15 @@ export const createRateLimitMiddleware = (
const res = await ratelimit.limit(`${name}:${ctx.identifier}`);
if (!res.success) {
posthog?.capture({
distinctId: ctx.user?.id ?? "system",
event: "ratelimit_exceeded",
properties: {
name,
requests,
duration,
},
});
throw new TRPCError({
code: "TOO_MANY_REQUESTS",
message: "Too many requests",

View file

@ -77,6 +77,7 @@
"DATABASE_URL",
"DISABLE_LANDING_PAGE",
"EMAIL_PROVIDER",
"MODERATION_ENABLED",
"GOOGLE_CLIENT_ID",
"GOOGLE_CLIENT_SECRET",
"KV_REST_API_URL",