You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I added a max token argument to the CLI because some models has limits on the tokens request, like GPT-4o that has 30,000. Also important to mention that despite google gemini has a large limit, it charge base on the tokens request, like smaller then 250k is one price, bigger is expensive.
Bellow the sample code:
importdotenvimportosimportargparse# Import the function that creates the flowfromflowimportcreate_tutorial_flowdotenv.load_dotenv()
# Default file patternsDEFAULT_INCLUDE_PATTERNS= {
"*.py", "*.js", "*.jsx", "*.ts", "*.tsx", "*.go", "*.java", "*.pyi", "*.pyx",
"*.c", "*.cc", "*.cpp", "*.h", "*.md", "*.rst", "Dockerfile",
"Makefile", "*.yaml", "*.yml",
}
# Text-only mode default patternsTEXT_ONLY_INCLUDE_PATTERNS= {
"*.md", "*.txt", "*.rst", "*.markdown", "README*", "documentation/*", "docs/*", "*.html", "*.mdx"
}
DEFAULT_EXCLUDE_PATTERNS= {
"assets/*", "data/*", "examples/*", "images/*", "public/*", "static/*", "temp/*",
"docs/*",
"venv/*", ".venv/*", "*test*", "tests/*", "docs/*", "examples/*", "v1/*",
"dist/*", "build/*", "experimental/*", "deprecated/*", "misc/*",
"legacy/*", ".git/*", ".github/*", ".next/*", ".vscode/*", "obj/*", "bin/*", "node_modules/*", "*.log"
}
# Text-only mode exclude patterns (more permissive with docs)TEXT_ONLY_EXCLUDE_PATTERNS= {
"venv/*", ".venv/*", "node_modules/*", ".git/*", ".github/*", ".next/*", ".vscode/*",
"dist/*", "build/*", "obj/*", "bin/*", "*.log"
}
# --- Main Function ---defmain():
parser=argparse.ArgumentParser(description="Generate a tutorial for a GitHub codebase or local directory.")
# Create mutually exclusive group for sourcesource_group=parser.add_mutually_exclusive_group(required=True)
source_group.add_argument("--repo", help="URL of the public GitHub repository.")
source_group.add_argument("--dir", help="Path to local directory.")
parser.add_argument("-n", "--name", help="Project name (optional, derived from repo/directory if omitted).")
parser.add_argument("-t", "--token", help="GitHub personal access token (optional, reads from GITHUB_TOKEN env var if not provided).")
parser.add_argument("-o", "--output", default="output", help="Base directory for output (default: ./output).")
parser.add_argument("-i", "--include", nargs="+", help="Include file patterns (e.g. '*.py' '*.js'). Defaults to common code files if not specified.")
parser.add_argument("-e", "--exclude", nargs="+", help="Exclude file patterns (e.g. 'tests/*' 'docs/*'). Defaults to test/build directories if not specified.")
parser.add_argument("-s", "--max-size", type=int, default=100000, help="Maximum file size in bytes (default: 100000, about 100KB).")
# Add language parameter for multi-language supportparser.add_argument("--language", default="english", help="Language for the generated tutorial (default: english)")
# Add use_cache parameter to control LLM cachingparser.add_argument("--no-cache", action="store_true", help="Disable LLM response caching (default: caching enabled)")
# Add max_abstraction_num parameter to control the number of abstractionsparser.add_argument("--max-abstractions", type=int, default=10, help="Maximum number of abstractions to identify (default: 10)")
# Add text-only mode flag for focusing on text files rather than codeparser.add_argument("--text-only", action="store_true", help="Enable text-only mode to focus on documentation files (*.md, *.txt) rather than code")
# Add max_tokens parameter to control the token limit for LLM requestsparser.add_argument("--max-tokens", type=int, default=30000, help="Maximum tokens per LLM request (default: 30000, adjust based on model limits)")
args=parser.parse_args()
# Get GitHub token from argument or environment variable if using repogithub_token=Noneifargs.repo:
github_token=args.tokenoros.environ.get('GITHUB_TOKEN')
ifnotgithub_token:
print("Warning: No GitHub token provided. You might hit rate limits for public repositories.")
# Set include/exclude patterns based on text-only mode if not specified by userinclude_patterns=set(args.include) ifargs.includeelse (TEXT_ONLY_INCLUDE_PATTERNSifargs.text_onlyelseDEFAULT_INCLUDE_PATTERNS)
exclude_patterns=set(args.exclude) ifargs.excludeelse (TEXT_ONLY_EXCLUDE_PATTERNSifargs.text_onlyelseDEFAULT_EXCLUDE_PATTERNS)
# Initialize the shared dictionary with inputsshared= {
"repo_url": args.repo,
"local_dir": args.dir,
"project_name": args.name, # Can be None, FetchRepo will derive it"github_token": github_token,
"output_dir": args.output, # Base directory for CombineTutorial output# Add include/exclude patterns and max file size"include_patterns": include_patterns,
"exclude_patterns": exclude_patterns,
"max_file_size": args.max_size,
# Add language for multi-language support"language": args.language,
# Add use_cache flag (inverse of no-cache flag)"use_cache": notargs.no_cache,
# Add max_abstraction_num parameter"max_abstraction_num": args.max_abstractions,
# Add text_only flag"text_only": args.text_only,
# Add max_tokens parameter for LLM requests"max_tokens": args.max_tokens,
# Outputs will be populated by the nodes"files": [],
"abstractions": [],
"relationships": {},
"chapter_order": [],
"chapters": [],
"final_output_dir": None
}
# Display starting message with repository/directory and languageprint(f"Starting tutorial generation for: {args.repoorargs.dir} in {args.language.capitalize()} language")
print(f"LLM caching: {'Disabled'ifargs.no_cacheelse'Enabled'}")
print(f"Mode: {'Text-only'ifargs.text_onlyelse'Code analysis'}")
print(f"Max tokens per request: {args.max_tokens}")
# Create the flow instancetutorial_flow=create_tutorial_flow()
# Run the flowtutorial_flow.run(shared)
if__name__=="__main__":
main()
The text was updated successfully, but these errors were encountered:
I added a max token argument to the CLI because some models has limits on the tokens request, like GPT-4o that has 30,000. Also important to mention that despite google gemini has a large limit, it charge base on the tokens request, like smaller then 250k is one price, bigger is expensive.
Bellow the sample code:
The text was updated successfully, but these errors were encountered: