codeboard/packages/parser/src/languages/python.ts

import type {
  FileNode,
  FunctionNode,
  ClassNode,
  ImportNode,
  ExportNode,
} from "@codeboard/shared";
import type { LanguageParser } from "./base.js";

const FUNC_RE = /^(\s*)def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?\s*:/gm;
const CLASS_RE = /^(\s*)class\s+(\w+)(?:\(([^)]*)\))?\s*:/gm;
const IMPORT_RE = /^(?:from\s+([\w.]+)\s+)?import\s+(.+)$/gm;
const DOCSTRING_RE = /^\s*(?:"""([\s\S]*?)"""|'''([\s\S]*?)''')/;

function parseParams(raw: string): string[] {
  if (!raw.trim()) return [];
  return raw
    .split(",")
    .map((p) => p.trim().split(":")[0].split("=")[0].trim())
    .filter((p) => p && p !== "self" && p !== "cls");
}

export const pythonParser: LanguageParser = {
  extensions: [".py"],

  parse(content: string, filePath: string): FileNode {
    const lines = content.split("\n");
    const functions: FunctionNode[] = [];
    const classes: ClassNode[] = [];
    const imports: ImportNode[] = [];
    const exports: ExportNode[] = [];

    let match: RegExpExecArray | null;

    FUNC_RE.lastIndex = 0;
    while ((match = FUNC_RE.exec(content)) !== null) {
      const indent = match[1].length;
      const name = match[2];
      const params = parseParams(match[3]);
      const returnType = match[4]?.trim();
      const lineStart =
        content.substring(0, match.index).split("\n").length;

      let lineEnd = lineStart;
      for (let i = lineStart; i < lines.length; i++) {
        const line = lines[i];
        if (
          i > lineStart &&
          line.trim() &&
          !line.startsWith(" ".repeat(indent + 1)) &&
          !line.startsWith("\t".repeat(indent === 0 ? 1 : indent))
        ) {
          lineEnd = i;
          break;
        }
        lineEnd = i + 1;
      }

      let docstring: string | undefined;
      if (lineStart < lines.length) {
        const bodyStart = lines.slice(lineStart, lineStart + 5).join("\n");
        const docMatch = DOCSTRING_RE.exec(bodyStart);
        if (docMatch) {
          docstring = (docMatch[1] ?? docMatch[2]).trim();
        }
      }

      if (indent === 0) {
        functions.push({
          name,
          params,
          returnType,
          lineStart,
          lineEnd,
          docstring,
          calls: [],
        });
      }
    }

    CLASS_RE.lastIndex = 0;
    while ((match = CLASS_RE.exec(content)) !== null) {
      const name = match[2];
      const methods: FunctionNode[] = [];
      const classLineStart =
        content.substring(0, match.index).split("\n").length;

      const classBody = content.substring(match.index + match[0].length);
      const methodRe = /^\s{2,}def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?\s*:/gm;
      let methodMatch: RegExpExecArray | null;
      while ((methodMatch = methodRe.exec(classBody)) !== null) {
        const methodLineStart =
          classLineStart +
          classBody.substring(0, methodMatch.index).split("\n").length;
        methods.push({
          name: methodMatch[1],
          params: parseParams(methodMatch[2]),
          returnType: methodMatch[3]?.trim(),
          lineStart: methodLineStart,
          lineEnd: methodLineStart + 1,
          calls: [],
        });
      }

      classes.push({ name, methods, properties: [] });
    }

    IMPORT_RE.lastIndex = 0;
    while ((match = IMPORT_RE.exec(content)) !== null) {
      const fromModule = match[1];
      const importedNames = match[2]
        .split(",")
        .map((s) => s.trim().split(" as ")[0].trim())
        .filter(Boolean);

      if (fromModule) {
        imports.push({ source: fromModule, specifiers: importedNames });
      } else {
        for (const name of importedNames) {
          imports.push({ source: name, specifiers: [name] });
        }
      }
    }

    const allRe = /^__all__\s*=\s*\[([^\]]*)\]/m;
    const allMatch = allRe.exec(content);
    if (allMatch) {
      const names = allMatch[1]
        .split(",")
        .map((s) => s.trim().replace(/['"]/g, ""))
        .filter(Boolean);
      for (const name of names) {
        exports.push({ name, isDefault: false });
      }
    }

    let complexity = 0;
    for (const line of lines) {
      const trimmed = line.trim();
      if (trimmed.startsWith("if ") || trimmed.startsWith("elif ")) complexity++;
      if (trimmed.startsWith("for ") || trimmed.startsWith("while ")) complexity++;
      if (trimmed.startsWith("except")) complexity++;
      if (trimmed.includes(" and ") || trimmed.includes(" or ")) complexity++;
    }

    return {
      path: filePath,
      language: "python",
      size: content.length,
      functions,
      classes,
      imports,
      exports,
      complexity,
    };
  },
};