Skip to content

Max token CLI argument #111

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
Fcpacheco opened this issue May 15, 2025 · 0 comments
Open

Max token CLI argument #111

Fcpacheco opened this issue May 15, 2025 · 0 comments

Comments

@Fcpacheco
Copy link

I added a max token argument to the CLI because some models has limits on the tokens request, like GPT-4o that has 30,000. Also important to mention that despite google gemini has a large limit, it charge base on the tokens request, like smaller then 250k is one price, bigger is expensive.

Bellow the sample code:

import dotenv
import os
import argparse
# Import the function that creates the flow
from flow import create_tutorial_flow

dotenv.load_dotenv()

# Default file patterns
DEFAULT_INCLUDE_PATTERNS = {
    "*.py", "*.js", "*.jsx", "*.ts", "*.tsx", "*.go", "*.java", "*.pyi", "*.pyx",
    "*.c", "*.cc", "*.cpp", "*.h", "*.md", "*.rst", "Dockerfile",
    "Makefile", "*.yaml", "*.yml",
}

# Text-only mode default patterns
TEXT_ONLY_INCLUDE_PATTERNS = {
    "*.md", "*.txt", "*.rst", "*.markdown", "README*", "documentation/*", "docs/*", "*.html", "*.mdx"
}

DEFAULT_EXCLUDE_PATTERNS = {
    "assets/*", "data/*", "examples/*", "images/*", "public/*", "static/*", "temp/*",
    "docs/*", 
    "venv/*", ".venv/*", "*test*", "tests/*", "docs/*", "examples/*", "v1/*",
    "dist/*", "build/*", "experimental/*", "deprecated/*", "misc/*", 
    "legacy/*", ".git/*", ".github/*", ".next/*", ".vscode/*", "obj/*", "bin/*", "node_modules/*", "*.log"
}

# Text-only mode exclude patterns (more permissive with docs)
TEXT_ONLY_EXCLUDE_PATTERNS = {
    "venv/*", ".venv/*", "node_modules/*", ".git/*", ".github/*", ".next/*", ".vscode/*",
    "dist/*", "build/*", "obj/*", "bin/*", "*.log"
}

# --- Main Function ---
def main():
    parser = argparse.ArgumentParser(description="Generate a tutorial for a GitHub codebase or local directory.")

    # Create mutually exclusive group for source
    source_group = parser.add_mutually_exclusive_group(required=True)
    source_group.add_argument("--repo", help="URL of the public GitHub repository.")
    source_group.add_argument("--dir", help="Path to local directory.")

    parser.add_argument("-n", "--name", help="Project name (optional, derived from repo/directory if omitted).")
    parser.add_argument("-t", "--token", help="GitHub personal access token (optional, reads from GITHUB_TOKEN env var if not provided).")
    parser.add_argument("-o", "--output", default="output", help="Base directory for output (default: ./output).")
    parser.add_argument("-i", "--include", nargs="+", help="Include file patterns (e.g. '*.py' '*.js'). Defaults to common code files if not specified.")
    parser.add_argument("-e", "--exclude", nargs="+", help="Exclude file patterns (e.g. 'tests/*' 'docs/*'). Defaults to test/build directories if not specified.")
    parser.add_argument("-s", "--max-size", type=int, default=100000, help="Maximum file size in bytes (default: 100000, about 100KB).")
    # Add language parameter for multi-language support
    parser.add_argument("--language", default="english", help="Language for the generated tutorial (default: english)")
    # Add use_cache parameter to control LLM caching
    parser.add_argument("--no-cache", action="store_true", help="Disable LLM response caching (default: caching enabled)")
    # Add max_abstraction_num parameter to control the number of abstractions
    parser.add_argument("--max-abstractions", type=int, default=10, help="Maximum number of abstractions to identify (default: 10)")
    # Add text-only mode flag for focusing on text files rather than code
    parser.add_argument("--text-only", action="store_true", help="Enable text-only mode to focus on documentation files (*.md, *.txt) rather than code")
    # Add max_tokens parameter to control the token limit for LLM requests
    parser.add_argument("--max-tokens", type=int, default=30000, help="Maximum tokens per LLM request (default: 30000, adjust based on model limits)")

    args = parser.parse_args()

    # Get GitHub token from argument or environment variable if using repo
    github_token = None
    if args.repo:
        github_token = args.token or os.environ.get('GITHUB_TOKEN')
        if not github_token:
            print("Warning: No GitHub token provided. You might hit rate limits for public repositories.")

    # Set include/exclude patterns based on text-only mode if not specified by user
    include_patterns = set(args.include) if args.include else (TEXT_ONLY_INCLUDE_PATTERNS if args.text_only else DEFAULT_INCLUDE_PATTERNS)
    exclude_patterns = set(args.exclude) if args.exclude else (TEXT_ONLY_EXCLUDE_PATTERNS if args.text_only else DEFAULT_EXCLUDE_PATTERNS)

    # Initialize the shared dictionary with inputs
    shared = {
        "repo_url": args.repo,
        "local_dir": args.dir,
        "project_name": args.name, # Can be None, FetchRepo will derive it
        "github_token": github_token,
        "output_dir": args.output, # Base directory for CombineTutorial output

        # Add include/exclude patterns and max file size
        "include_patterns": include_patterns,
        "exclude_patterns": exclude_patterns,
        "max_file_size": args.max_size,

        # Add language for multi-language support
        "language": args.language,
        
        # Add use_cache flag (inverse of no-cache flag)
        "use_cache": not args.no_cache,
        
        # Add max_abstraction_num parameter
        "max_abstraction_num": args.max_abstractions,
        
        # Add text_only flag
        "text_only": args.text_only,
        
        # Add max_tokens parameter for LLM requests
        "max_tokens": args.max_tokens,

        # Outputs will be populated by the nodes
        "files": [],
        "abstractions": [],
        "relationships": {},
        "chapter_order": [],
        "chapters": [],
        "final_output_dir": None
    }

    # Display starting message with repository/directory and language
    print(f"Starting tutorial generation for: {args.repo or args.dir} in {args.language.capitalize()} language")
    print(f"LLM caching: {'Disabled' if args.no_cache else 'Enabled'}")
    print(f"Mode: {'Text-only' if args.text_only else 'Code analysis'}")
    print(f"Max tokens per request: {args.max_tokens}")

    # Create the flow instance
    tutorial_flow = create_tutorial_flow()

    # Run the flow
    tutorial_flow.run(shared)

if __name__ == "__main__":
    main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy