Skip to content

Codegate 844 #931

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 34 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
6159108
Initial suspicious commands
therealnb Feb 4, 2025
b3a35e9
Update lock file
therealnb Feb 4, 2025
b8a93c3
Well that's worse, in my view
therealnb Feb 4, 2025
ed7c5b8
Yep, the test file looks worse too
therealnb Feb 4, 2025
79f7990
More linting...
therealnb Feb 4, 2025
af85b5d
Merge branch 'main' into codegate-844
therealnb Feb 5, 2025
ced07c8
Merge branch 'main' into codegate-844
therealnb Feb 5, 2025
c842a3d
Pin versions, remove h5py
therealnb Feb 5, 2025
35d928b
Change saving protocol
therealnb Feb 5, 2025
1ec1d83
Merge branch 'main' into codegate-844
therealnb Feb 5, 2025
8782308
try skipping test
therealnb Feb 5, 2025
d205ba0
Unskip test
therealnb Feb 5, 2025
300da89
Merge branch 'main' into codegate-844
therealnb Feb 5, 2025
a87bdeb
Try pip for torch
therealnb Feb 6, 2025
ce9f728
Merge branch 'main' into codegate-844
therealnb Feb 6, 2025
11c2caa
install torch for tests too
therealnb Feb 6, 2025
f6a6101
try installing after poetry
therealnb Feb 6, 2025
d63aa06
don't use cache
therealnb Feb 6, 2025
71c4952
put the command in the right place
therealnb Feb 6, 2025
dc54194
Try removing big file
therealnb Feb 6, 2025
bdd13d2
Put it back
therealnb Feb 6, 2025
05de7b0
Fix weight loading.
therealnb Feb 7, 2025
8cc94fb
Revert pytorch based changes
therealnb Feb 7, 2025
ebd4343
Merge branch 'main' into codegate-844
therealnb Feb 11, 2025
80f895f
remove pandas
therealnb Feb 11, 2025
9842913
Merge branch 'main' into codegate-844
therealnb Feb 11, 2025
f1bdeb8
onnx basically working
therealnb Feb 12, 2025
a436354
Move training to a specific class
therealnb Feb 12, 2025
d8976d2
Merge branch 'main' into codegate-844
therealnb Feb 12, 2025
3077372
Merge branch 'main' into codegate-844
therealnb Feb 12, 2025
35320c7
pin versions
therealnb Feb 12, 2025
8e1bde5
Merge branch 'codegate-844' of github.com:stacklok/codegate into code…
therealnb Feb 12, 2025
d31d080
some more detailed comments
therealnb Feb 12, 2025
7a1a5f8
Merge branch 'main' into codegate-844
therealnb Feb 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
remove pandas
Signed-off-by: nigel brown <nigel@stacklok.com>
  • Loading branch information
therealnb committed Feb 11, 2025
commit 80f895f6b5dea5e7b5118ca26872126a89f1aa8b
136 changes: 1 addition & 135 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ greenlet = "==3.1.1"
cachetools = "==5.5.1"
legacy-cgi = "==2.6.2"
torch = "==2.6.0"
pandas = "==2.2.3"

[tool.poetry.group.dev.dependencies]
pytest = "==8.3.4"
Expand Down
21 changes: 10 additions & 11 deletions src/codegate/pipeline/comment/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
)
from codegate.pipeline.base import PipelineContext
from codegate.pipeline.output import OutputPipelineContext, OutputPipelineStep
from codegate.pipeline.suspicious_commands.suspicious_commands import SuspiciousCommands
from codegate.storage import StorageEngine
from codegate.utils.package_extractor import PackageExtractor

Expand Down Expand Up @@ -51,16 +50,16 @@ def _create_chunk(self, original_chunk: ModelResponse, content: str) -> ModelRes
async def _snippet_comment(self, snippet: CodeSnippet, context: PipelineContext) -> str:
"""Create a comment for a snippet"""
comment = ""
sc = SuspiciousCommands.get_instance()
class_, prob = await sc.classify_phrase(snippet.code)
if class_ == 1:
liklihood = "possibly"
language = "code"
if prob > 0.9:
liklihood = "likely"
if snippet.language is not None:
language = snippet.language
comment = f"{comment}\n\n🛡️ CodeGate: The {language} supplied is {liklihood} unsafe. Please check carefully!\n\n" # noqa: E501
# sc = SuspiciousCommands.get_instance()
# class_, prob = await sc.classify_phrase(snippet.code)
# if class_ == 1:
# liklihood = "possibly"
# language = "code"
# if prob > 0.9:
# liklihood = "likely"
# if snippet.language is not None:
# language = snippet.language
# comment = f"{comment}\n\n🛡️ CodeGate: The {language} supplied is {liklihood} unsafe. Please check carefully!\n\n" # noqa: E501

snippet.libraries = PackageExtractor.extract_packages(snippet.code, snippet.language)

Expand Down
69 changes: 43 additions & 26 deletions tests/test_suspicious_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,56 @@
"""
Testing the suspicious commands
"""
import csv
import os

import pandas as pd
import pytest

from codegate.pipeline.suspicious_commands.suspicious_commands import (
SuspiciousCommands,
)

# Global variables for test data
benign_test_cmds = malicious_test_cmds = pd.DataFrame()
unsafe_commands = safe_commands = train_data = pd.DataFrame()
benign_test_cmds = []
malicious_test_cmds = []
unsafe_commands = []
safe_commands = []
train_data = []

MODEL_FILE = "src/codegate/pipeline/suspicious_commands/simple_nn_model.pt"
TD_PATH = "tests/data/suspicious_commands"


def read_csv(file_path):
with open(file_path, mode="r") as file:
reader = csv.DictReader(file)
return [row for row in reader]


def setup_module(module):
"""
Setup function to initialize test data before running tests.
"""
global benign_test_cmds, malicious_test_cmds, safe_commands
global unsafe_commands, train_data, train_data
benign_test_cmds = pd.read_csv(f"{TD_PATH}/benign_test_cmds.csv")
malicious_test_cmds = pd.read_csv(f"{TD_PATH}/malicious_test_cmds.csv")
unsafe_commands = pd.read_csv(f"{TD_PATH}/unsafe_commands.csv")
safe_commands = pd.read_csv(f"{TD_PATH}/safe_commands.csv")
benign_test_cmds["label"] = 0
malicious_test_cmds["label"] = 1
safe_commands["label"] = 0
unsafe_commands["label"] = 1
train_data = pd.concat([safe_commands, unsafe_commands])
train_data = train_data.sample(frac=1).reset_index(drop=True)
global unsafe_commands, train_data
benign_test_cmds = read_csv(f"{TD_PATH}/benign_test_cmds.csv")
malicious_test_cmds = read_csv(f"{TD_PATH}/malicious_test_cmds.csv")
unsafe_commands = read_csv(f"{TD_PATH}/unsafe_commands.csv")
safe_commands = read_csv(f"{TD_PATH}/safe_commands.csv")

for cmd in benign_test_cmds:
cmd["label"] = 0
for cmd in malicious_test_cmds:
cmd["label"] = 1
for cmd in safe_commands:
cmd["label"] = 0
for cmd in unsafe_commands:
cmd["label"] = 1

train_data = safe_commands + unsafe_commands
import random

random.shuffle(train_data)


@pytest.fixture
Expand Down Expand Up @@ -69,8 +86,8 @@ async def test_train():
if os.path.exists(MODEL_FILE):
return
sc2 = SuspiciousCommands()
phrases = train_data["cmd"].tolist()
labels = train_data["label"].tolist()
phrases = [cmd["cmd"] for cmd in train_data]
labels = [cmd["label"] for cmd in train_data]
await sc2.train(phrases, labels)
assert sc2.simple_nn is not None
sc2.save_model(MODEL_FILE)
Expand Down Expand Up @@ -116,15 +133,15 @@ async def test_classify_phrase(sc):
sc (SuspiciousCommands): The instance to test.
"""
tp = tn = fp = fn = 0
for command in benign_test_cmds["cmd"]:
prediction, _ = await sc.classify_phrase(command)
for command in benign_test_cmds:
prediction, _ = await sc.classify_phrase(command["cmd"])
if prediction == 0:
tn += 1
else:
fn += 1

for command in malicious_test_cmds["cmd"]:
prediction, _ = await sc.classify_phrase(command)
for command in malicious_test_cmds:
prediction, _ = await sc.classify_phrase(command["cmd"])
if prediction == 1:
tp += 1
else:
Expand All @@ -143,23 +160,23 @@ async def test_classify_phrase_confident(sc):
"""
confidence = 0.9
tp = tn = fp = fn = 0
for command in benign_test_cmds["cmd"]:
prediction, prob = await sc.classify_phrase(command)
for command in benign_test_cmds:
prediction, prob = await sc.classify_phrase(command["cmd"])
if prob > confidence:
if prediction == 0:
tn += 1
else:
fn += 1
else:
print(f"{command} {prob} {prediction} 0")
print(f"{command['cmd']} {prob} {prediction} 0")

for command in malicious_test_cmds["cmd"]:
prediction, prob = await sc.classify_phrase(command)
for command in malicious_test_cmds:
prediction, prob = await sc.classify_phrase(command["cmd"])
if prob > confidence:
if prediction == 1:
tp += 1
else:
fp += 1
else:
print(f"{command} {prob} {prediction} 1")
print(f"{command['cmd']} {prob} {prediction} 1")
check_results(tp, tn, fp, fn)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy