Turborepo monorepo with npm workspaces: - apps/web: Next.js 14 frontend with Tailwind v4, SSE progress, doc viewer - apps/worker: BullMQ job processor (clone → parse → LLM generate) - packages/shared: TypeScript types - packages/parser: Babel-based AST parser (JS/TS) + regex (Python) - packages/llm: OpenAI/Anthropic provider abstraction + prompt pipeline - packages/diagrams: Mermaid architecture & dependency graph generators - packages/database: Prisma schema (PostgreSQL) - Docker multi-stage build (web + worker targets) All packages compile successfully with tsc and next build.
158 lines
4.6 KiB
TypeScript
158 lines
4.6 KiB
TypeScript
import type {
|
|
FileNode,
|
|
FunctionNode,
|
|
ClassNode,
|
|
ImportNode,
|
|
ExportNode,
|
|
} from "@codeboard/shared";
|
|
import type { LanguageParser } from "./base.js";
|
|
|
|
const FUNC_RE = /^(\s*)def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?\s*:/gm;
|
|
const CLASS_RE = /^(\s*)class\s+(\w+)(?:\(([^)]*)\))?\s*:/gm;
|
|
const IMPORT_RE = /^(?:from\s+([\w.]+)\s+)?import\s+(.+)$/gm;
|
|
const DOCSTRING_RE = /^\s*(?:"""([\s\S]*?)"""|'''([\s\S]*?)''')/;
|
|
|
|
function parseParams(raw: string): string[] {
|
|
if (!raw.trim()) return [];
|
|
return raw
|
|
.split(",")
|
|
.map((p) => p.trim().split(":")[0].split("=")[0].trim())
|
|
.filter((p) => p && p !== "self" && p !== "cls");
|
|
}
|
|
|
|
export const pythonParser: LanguageParser = {
|
|
extensions: [".py"],
|
|
|
|
parse(content: string, filePath: string): FileNode {
|
|
const lines = content.split("\n");
|
|
const functions: FunctionNode[] = [];
|
|
const classes: ClassNode[] = [];
|
|
const imports: ImportNode[] = [];
|
|
const exports: ExportNode[] = [];
|
|
|
|
let match: RegExpExecArray | null;
|
|
|
|
FUNC_RE.lastIndex = 0;
|
|
while ((match = FUNC_RE.exec(content)) !== null) {
|
|
const indent = match[1].length;
|
|
const name = match[2];
|
|
const params = parseParams(match[3]);
|
|
const returnType = match[4]?.trim();
|
|
const lineStart =
|
|
content.substring(0, match.index).split("\n").length;
|
|
|
|
let lineEnd = lineStart;
|
|
for (let i = lineStart; i < lines.length; i++) {
|
|
const line = lines[i];
|
|
if (
|
|
i > lineStart &&
|
|
line.trim() &&
|
|
!line.startsWith(" ".repeat(indent + 1)) &&
|
|
!line.startsWith("\t".repeat(indent === 0 ? 1 : indent))
|
|
) {
|
|
lineEnd = i;
|
|
break;
|
|
}
|
|
lineEnd = i + 1;
|
|
}
|
|
|
|
let docstring: string | undefined;
|
|
if (lineStart < lines.length) {
|
|
const bodyStart = lines.slice(lineStart, lineStart + 5).join("\n");
|
|
const docMatch = DOCSTRING_RE.exec(bodyStart);
|
|
if (docMatch) {
|
|
docstring = (docMatch[1] ?? docMatch[2]).trim();
|
|
}
|
|
}
|
|
|
|
if (indent === 0) {
|
|
functions.push({
|
|
name,
|
|
params,
|
|
returnType,
|
|
lineStart,
|
|
lineEnd,
|
|
docstring,
|
|
calls: [],
|
|
});
|
|
}
|
|
}
|
|
|
|
CLASS_RE.lastIndex = 0;
|
|
while ((match = CLASS_RE.exec(content)) !== null) {
|
|
const name = match[2];
|
|
const methods: FunctionNode[] = [];
|
|
const classLineStart =
|
|
content.substring(0, match.index).split("\n").length;
|
|
|
|
const classBody = content.substring(match.index + match[0].length);
|
|
const methodRe = /^\s{2,}def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?\s*:/gm;
|
|
let methodMatch: RegExpExecArray | null;
|
|
while ((methodMatch = methodRe.exec(classBody)) !== null) {
|
|
const methodLineStart =
|
|
classLineStart +
|
|
classBody.substring(0, methodMatch.index).split("\n").length;
|
|
methods.push({
|
|
name: methodMatch[1],
|
|
params: parseParams(methodMatch[2]),
|
|
returnType: methodMatch[3]?.trim(),
|
|
lineStart: methodLineStart,
|
|
lineEnd: methodLineStart + 1,
|
|
calls: [],
|
|
});
|
|
}
|
|
|
|
classes.push({ name, methods, properties: [] });
|
|
}
|
|
|
|
IMPORT_RE.lastIndex = 0;
|
|
while ((match = IMPORT_RE.exec(content)) !== null) {
|
|
const fromModule = match[1];
|
|
const importedNames = match[2]
|
|
.split(",")
|
|
.map((s) => s.trim().split(" as ")[0].trim())
|
|
.filter(Boolean);
|
|
|
|
if (fromModule) {
|
|
imports.push({ source: fromModule, specifiers: importedNames });
|
|
} else {
|
|
for (const name of importedNames) {
|
|
imports.push({ source: name, specifiers: [name] });
|
|
}
|
|
}
|
|
}
|
|
|
|
const allRe = /^__all__\s*=\s*\[([^\]]*)\]/m;
|
|
const allMatch = allRe.exec(content);
|
|
if (allMatch) {
|
|
const names = allMatch[1]
|
|
.split(",")
|
|
.map((s) => s.trim().replace(/['"]/g, ""))
|
|
.filter(Boolean);
|
|
for (const name of names) {
|
|
exports.push({ name, isDefault: false });
|
|
}
|
|
}
|
|
|
|
let complexity = 0;
|
|
for (const line of lines) {
|
|
const trimmed = line.trim();
|
|
if (trimmed.startsWith("if ") || trimmed.startsWith("elif ")) complexity++;
|
|
if (trimmed.startsWith("for ") || trimmed.startsWith("while ")) complexity++;
|
|
if (trimmed.startsWith("except")) complexity++;
|
|
if (trimmed.includes(" and ") || trimmed.includes(" or ")) complexity++;
|
|
}
|
|
|
|
return {
|
|
path: filePath,
|
|
language: "python",
|
|
size: content.length,
|
|
functions,
|
|
classes,
|
|
imports,
|
|
exports,
|
|
complexity,
|
|
};
|
|
},
|
|
};
|