Files
codeboard/packages/parser/src/file-walker.ts
Vectry 79dad6124f feat: initial CodeBoard monorepo scaffold
Turborepo monorepo with npm workspaces:
- apps/web: Next.js 14 frontend with Tailwind v4, SSE progress, doc viewer
- apps/worker: BullMQ job processor (clone → parse → LLM generate)
- packages/shared: TypeScript types
- packages/parser: Babel-based AST parser (JS/TS) + regex (Python)
- packages/llm: OpenAI/Anthropic provider abstraction + prompt pipeline
- packages/diagrams: Mermaid architecture & dependency graph generators
- packages/database: Prisma schema (PostgreSQL)
- Docker multi-stage build (web + worker targets)

All packages compile successfully with tsc and next build.
2026-02-09 15:22:50 +00:00

122 lines
2.5 KiB
TypeScript

import { readdir, stat, readFile } from "node:fs/promises";
import { join, relative, extname, basename } from "node:path";
const IGNORED_DIRS = new Set([
"node_modules",
".git",
"dist",
"build",
"vendor",
"__pycache__",
".next",
".turbo",
"coverage",
".venv",
"venv",
".tox",
"target",
".cache",
".idea",
".vscode",
]);
const LANGUAGE_MAP: Record<string, string> = {
".ts": "typescript",
".tsx": "typescript",
".js": "javascript",
".jsx": "javascript",
".mjs": "javascript",
".cjs": "javascript",
".py": "python",
".go": "go",
".rs": "rust",
".java": "java",
".rb": "ruby",
".php": "php",
".cs": "csharp",
".cpp": "cpp",
".c": "c",
".h": "c",
".hpp": "cpp",
".swift": "swift",
".kt": "kotlin",
};
const ENTRY_POINT_NAMES = new Set([
"index",
"main",
"app",
"server",
"mod",
"lib",
"__init__",
"manage",
]);
export interface WalkedFile {
absolutePath: string;
relativePath: string;
language: string;
size: number;
isEntryPoint: boolean;
}
async function walkDir(
dir: string,
rootDir: string,
results: WalkedFile[]
): Promise<void> {
const entries = await readdir(dir, { withFileTypes: true });
for (const entry of entries) {
if (IGNORED_DIRS.has(entry.name)) continue;
if (entry.name.startsWith(".")) continue;
const fullPath = join(dir, entry.name);
if (entry.isDirectory()) {
await walkDir(fullPath, rootDir, results);
continue;
}
const ext = extname(entry.name);
const language = LANGUAGE_MAP[ext];
if (!language) continue;
const fileStat = await stat(fullPath);
if (fileStat.size > 500_000) continue;
const nameWithoutExt = basename(entry.name, ext);
const isEntryPoint = ENTRY_POINT_NAMES.has(nameWithoutExt);
results.push({
absolutePath: fullPath,
relativePath: relative(rootDir, fullPath),
language,
size: fileStat.size,
isEntryPoint,
});
}
}
export async function walkFiles(repoPath: string): Promise<WalkedFile[]> {
const results: WalkedFile[] = [];
await walkDir(repoPath, repoPath, results);
results.sort((a, b) => {
if (a.isEntryPoint && !b.isEntryPoint) return -1;
if (!a.isEntryPoint && b.isEntryPoint) return 1;
return a.relativePath.localeCompare(b.relativePath);
});
return results;
}
export async function readFileContent(filePath: string): Promise<string> {
return readFile(filePath, "utf-8");
}
export function detectLanguage(filePath: string): string | null {
return LANGUAGE_MAP[extname(filePath)] ?? null;
}