From bb97d802086b4009a55fcf5dcae8af490f95ff67 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Tue, 29 Aug 2023 17:45:46 -0700 Subject: [PATCH] organize python related modules --- pgml-extension/requirements.txt | 1 + pgml-extension/src/api.rs | 70 +++----------- .../src/bindings/{ => langchain}/langchain.py | 0 .../{langchain.rs => langchain/mod.rs} | 4 +- pgml-extension/src/bindings/mod.rs | 4 +- pgml-extension/src/bindings/python/mod.rs | 91 +++++++++++++++++++ .../bindings/{venv.py => python/python.py} | 0 .../bindings/{sklearn.rs => sklearn/mod.rs} | 17 ++-- .../src/bindings/{ => sklearn}/sklearn.py | 0 .../src/bindings/transformers/mod.rs | 12 +-- .../src/bindings/transformers/transformers.py | 2 + pgml-extension/src/bindings/venv.rs | 40 -------- pgml-extension/src/orm/model.rs | 2 +- pgml-extension/src/orm/task.rs | 4 +- 14 files changed, 127 insertions(+), 120 deletions(-) rename pgml-extension/src/bindings/{ => langchain}/langchain.py (100%) rename pgml-extension/src/bindings/{langchain.rs => langchain/mod.rs} (87%) create mode 100644 pgml-extension/src/bindings/python/mod.rs rename pgml-extension/src/bindings/{venv.py => python/python.py} (100%) rename pgml-extension/src/bindings/{sklearn.rs => sklearn/mod.rs} (97%) rename pgml-extension/src/bindings/{ => sklearn}/sklearn.py (100%) delete mode 100644 pgml-extension/src/bindings/venv.rs diff --git a/pgml-extension/requirements.txt b/pgml-extension/requirements.txt index 3fdfeb4b7..db0c5d242 100644 --- a/pgml-extension/requirements.txt +++ b/pgml-extension/requirements.txt @@ -17,6 +17,7 @@ sacremoses==0.0.53 scikit-learn==1.3.0 sentencepiece==0.1.99 sentence-transformers==2.2.2 +tokenizers==0.13.3 torch==2.0.1 torchaudio==2.0.2 torchvision==0.15.2 diff --git a/pgml-extension/src/api.rs b/pgml-extension/src/api.rs index b560ee9d4..e1339d948 100644 --- a/pgml-extension/src/api.rs +++ b/pgml-extension/src/api.rs @@ -6,11 +6,9 @@ use pgrx::iter::{SetOfIterator, TableIterator}; use pgrx::*; #[cfg(feature = "python")] -use pyo3::prelude::*; use serde_json::json; #[cfg(feature = "python")] -use crate::bindings::sklearn::package_version; use crate::orm::*; macro_rules! unwrap_or_error { @@ -25,38 +23,13 @@ macro_rules! unwrap_or_error { #[cfg(feature = "python")] #[pg_extern] pub fn activate_venv(venv: &str) -> bool { - unwrap_or_error!(crate::bindings::venv::activate_venv(venv)) + unwrap_or_error!(crate::bindings::python::activate_venv(venv)) } #[cfg(feature = "python")] #[pg_extern(immutable, parallel_safe)] pub fn validate_python_dependencies() -> bool { - unwrap_or_error!(crate::bindings::venv::activate()); - - Python::with_gil(|py| { - let sys = PyModule::import(py, "sys").unwrap(); - let version: String = sys.getattr("version").unwrap().extract().unwrap(); - info!("Python version: {version}"); - for module in ["xgboost", "lightgbm", "numpy", "sklearn"] { - match py.import(module) { - Ok(_) => (), - Err(e) => { - panic!( - "The {module} package is missing. Install it with `sudo pip3 install {module}`\n{e}" - ); - } - } - } - }); - - let sklearn = unwrap_or_error!(package_version("sklearn")); - let xgboost = unwrap_or_error!(package_version("xgboost")); - let lightgbm = unwrap_or_error!(package_version("lightgbm")); - let numpy = unwrap_or_error!(package_version("numpy")); - - info!("Scikit-learn {sklearn}, XGBoost {xgboost}, LightGBM {lightgbm}, NumPy {numpy}",); - - true + unwrap_or_error!(crate::bindings::python::validate_dependencies()) } #[cfg(not(feature = "python"))] @@ -66,8 +39,7 @@ pub fn validate_python_dependencies() {} #[cfg(feature = "python")] #[pg_extern] pub fn python_package_version(name: &str) -> String { - unwrap_or_error!(crate::bindings::venv::activate()); - unwrap_or_error!(package_version(name)) + unwrap_or_error!(crate::bindings::python::package_version(name)) } #[cfg(not(feature = "python"))] @@ -79,13 +51,19 @@ pub fn python_package_version(name: &str) { #[cfg(feature = "python")] #[pg_extern] pub fn python_pip_freeze() -> TableIterator<'static, (name!(package, String),)> { - unwrap_or_error!(crate::bindings::venv::activate()); + unwrap_or_error!(crate::bindings::python::pip_freeze()) +} - let packages = unwrap_or_error!(crate::bindings::venv::freeze()) - .into_iter() - .map(|package| (package,)); +#[cfg(feature = "python")] +#[pg_extern] +fn python_version() -> String { + unwrap_or_error!(crate::bindings::python::version()) +} - TableIterator::new(packages) +#[cfg(not(feature = "python"))] +#[pg_extern] +pub fn python_version() -> String { + String::from("Python is not installed, recompile with `--features python`") } #[pg_extern] @@ -104,26 +82,6 @@ pub fn validate_shared_library() { } } -#[cfg(feature = "python")] -#[pg_extern] -fn python_version() -> String { - unwrap_or_error!(crate::bindings::venv::activate()); - let mut version = String::new(); - - Python::with_gil(|py| { - let sys = PyModule::import(py, "sys").unwrap(); - version = sys.getattr("version").unwrap().extract().unwrap(); - }); - - version -} - -#[cfg(not(feature = "python"))] -#[pg_extern] -pub fn python_version() -> String { - String::from("Python is not installed, recompile with `--features python`") -} - #[pg_extern(immutable, parallel_safe)] fn version() -> String { crate::VERSION.to_string() diff --git a/pgml-extension/src/bindings/langchain.py b/pgml-extension/src/bindings/langchain/langchain.py similarity index 100% rename from pgml-extension/src/bindings/langchain.py rename to pgml-extension/src/bindings/langchain/langchain.py diff --git a/pgml-extension/src/bindings/langchain.rs b/pgml-extension/src/bindings/langchain/mod.rs similarity index 87% rename from pgml-extension/src/bindings/langchain.rs rename to pgml-extension/src/bindings/langchain/mod.rs index 7ccaab954..00ee593fd 100644 --- a/pgml-extension/src/bindings/langchain.rs +++ b/pgml-extension/src/bindings/langchain/mod.rs @@ -6,10 +6,10 @@ use pyo3::types::PyTuple; use crate::{bindings::TracebackError, create_pymodule}; -create_pymodule!("/src/bindings/langchain.py"); +create_pymodule!("/src/bindings/langchain/langchain.py"); pub fn chunk(splitter: &str, text: &str, kwargs: &serde_json::Value) -> Result> { - crate::bindings::venv::activate()?; + crate::bindings::python::activate()?; let kwargs = serde_json::to_string(kwargs).unwrap(); diff --git a/pgml-extension/src/bindings/mod.rs b/pgml-extension/src/bindings/mod.rs index 94a7668be..2da42b10f 100644 --- a/pgml-extension/src/bindings/mod.rs +++ b/pgml-extension/src/bindings/mod.rs @@ -38,11 +38,11 @@ pub mod langchain; pub mod lightgbm; pub mod linfa; #[cfg(feature = "python")] +pub mod python; +#[cfg(feature = "python")] pub mod sklearn; #[cfg(feature = "python")] pub mod transformers; -#[cfg(feature = "python")] -pub mod venv; pub mod xgboost; pub type Fit = fn(dataset: &Dataset, hyperparams: &Hyperparams) -> Result>; diff --git a/pgml-extension/src/bindings/python/mod.rs b/pgml-extension/src/bindings/python/mod.rs new file mode 100644 index 000000000..7f527b0fc --- /dev/null +++ b/pgml-extension/src/bindings/python/mod.rs @@ -0,0 +1,91 @@ +//! Use virtualenv. + +use anyhow::Result; +use once_cell::sync::Lazy; +use pgrx::iter::TableIterator; +use pgrx::*; +use pyo3::prelude::*; +use pyo3::types::PyTuple; + +use crate::config::get_config; +use crate::{bindings::TracebackError, create_pymodule}; + +static CONFIG_NAME: &str = "pgml.venv"; + +create_pymodule!("/src/bindings/python/python.py"); + +pub fn activate_venv(venv: &str) -> Result { + Python::with_gil(|py| { + let activate_venv: Py = get_module!(PY_MODULE).getattr(py, "activate_venv")?; + let result: Py = + activate_venv.call1(py, PyTuple::new(py, &[venv.to_string().into_py(py)]))?; + + Ok(result.extract(py)?) + }) +} + +pub fn activate() -> Result { + match get_config(CONFIG_NAME) { + Some(venv) => activate_venv(&venv), + None => Ok(false), + } +} + +pub fn pip_freeze() -> Result> { + activate()?; + let packages = Python::with_gil(|py| -> Result> { + let freeze = get_module!(PY_MODULE).getattr(py, "freeze")?; + let result = freeze.call0(py)?; + + Ok(result.extract(py)?) + })?; + + Ok(TableIterator::new( + packages.into_iter().map(|package| (package,)), + )) +} + +pub fn validate_dependencies() -> Result { + activate()?; + Python::with_gil(|py| { + let sys = PyModule::import(py, "sys").unwrap(); + let version: String = sys.getattr("version").unwrap().extract().unwrap(); + info!("Python version: {version}"); + for module in ["xgboost", "lightgbm", "numpy", "sklearn"] { + match py.import(module) { + Ok(_) => (), + Err(e) => { + panic!( + "The {module} package is missing. Install it with `sudo pip3 install {module}`\n{e}" + ); + } + } + } + }); + + let sklearn = package_version("sklearn")?; + let xgboost = package_version("xgboost")?; + let lightgbm = package_version("lightgbm")?; + let numpy = package_version("numpy")?; + + info!("Scikit-learn {sklearn}, XGBoost {xgboost}, LightGBM {lightgbm}, NumPy {numpy}",); + + Ok(true) +} + +pub fn version() -> Result { + activate()?; + Python::with_gil(|py| { + let sys = PyModule::import(py, "sys").unwrap(); + let version: String = sys.getattr("version").unwrap().extract().unwrap(); + Ok(version) + }) +} + +pub fn package_version(name: &str) -> Result { + activate()?; + Python::with_gil(|py| { + let package = py.import(name)?; + Ok(package.getattr("__version__")?.extract()?) + }) +} diff --git a/pgml-extension/src/bindings/venv.py b/pgml-extension/src/bindings/python/python.py similarity index 100% rename from pgml-extension/src/bindings/venv.py rename to pgml-extension/src/bindings/python/python.py diff --git a/pgml-extension/src/bindings/sklearn.rs b/pgml-extension/src/bindings/sklearn/mod.rs similarity index 97% rename from pgml-extension/src/bindings/sklearn.rs rename to pgml-extension/src/bindings/sklearn/mod.rs index 99e9cfe78..05e85d97c 100644 --- a/pgml-extension/src/bindings/sklearn.rs +++ b/pgml-extension/src/bindings/sklearn/mod.rs @@ -15,11 +15,13 @@ use once_cell::sync::Lazy; use pyo3::prelude::*; use pyo3::types::PyTuple; -use crate::bindings::Bindings; +use crate::{ + bindings::{Bindings, TracebackError}, + create_pymodule, + orm::*, +}; -use crate::{bindings::TracebackError, create_pymodule, orm::*}; - -create_pymodule!("/src/bindings/sklearn.py"); +create_pymodule!("/src/bindings/sklearn/sklearn.py"); macro_rules! wrap_fit { ($fn_name:tt, $task:literal) => { @@ -355,10 +357,3 @@ pub fn cluster_metrics( Ok(scores) }) } - -pub fn package_version(name: &str) -> Result { - Python::with_gil(|py| { - let package = py.import(name)?; - Ok(package.getattr("__version__")?.extract()?) - }) -} diff --git a/pgml-extension/src/bindings/sklearn.py b/pgml-extension/src/bindings/sklearn/sklearn.py similarity index 100% rename from pgml-extension/src/bindings/sklearn.py rename to pgml-extension/src/bindings/sklearn/sklearn.py diff --git a/pgml-extension/src/bindings/transformers/mod.rs b/pgml-extension/src/bindings/transformers/mod.rs index 7621a2b3e..91158f860 100644 --- a/pgml-extension/src/bindings/transformers/mod.rs +++ b/pgml-extension/src/bindings/transformers/mod.rs @@ -24,7 +24,7 @@ pub fn transform( args: &serde_json::Value, inputs: Vec<&str>, ) -> Result { - crate::bindings::venv::activate()?; + crate::bindings::python::activate()?; whitelist::verify_task(task)?; @@ -70,7 +70,7 @@ pub fn embed( inputs: Vec<&str>, kwargs: &serde_json::Value, ) -> Result>> { - crate::bindings::venv::activate()?; + crate::bindings::python::activate()?; let kwargs = serde_json::to_string(kwargs)?; Python::with_gil(|py| -> Result>> { @@ -101,7 +101,7 @@ pub fn tune( hyperparams: &JsonB, path: &Path, ) -> Result> { - crate::bindings::venv::activate()?; + crate::bindings::python::activate()?; let task = task.to_string(); let hyperparams = serde_json::to_string(&hyperparams.0)?; @@ -131,7 +131,7 @@ pub fn tune( } pub fn generate(model_id: i64, inputs: Vec<&str>, config: JsonB) -> Result> { - crate::bindings::venv::activate()?; + crate::bindings::python::activate()?; Python::with_gil(|py| -> Result> { let generate = get_module!(PY_MODULE) @@ -219,7 +219,7 @@ pub fn load_dataset( limit: Option, kwargs: &serde_json::Value, ) -> Result { - crate::bindings::venv::activate()?; + crate::bindings::python::activate()?; let kwargs = serde_json::to_string(kwargs)?; @@ -376,7 +376,7 @@ pub fn load_dataset( } pub fn clear_gpu_cache(memory_usage: Option) -> Result { - crate::bindings::venv::activate().unwrap(); + crate::bindings::python::activate().unwrap(); Python::with_gil(|py| -> Result { let clear_gpu_cache: Py = get_module!(PY_MODULE) diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 0359085f5..af948e9ef 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -34,6 +34,8 @@ DataCollatorWithPadding, DefaultDataCollator, GenerationConfig, + PegasusForConditionalGeneration, + PegasusTokenizer, TrainingArguments, Trainer, ) diff --git a/pgml-extension/src/bindings/venv.rs b/pgml-extension/src/bindings/venv.rs deleted file mode 100644 index 458803a08..000000000 --- a/pgml-extension/src/bindings/venv.rs +++ /dev/null @@ -1,40 +0,0 @@ -//! Use virtualenv. - -use anyhow::Result; -use once_cell::sync::Lazy; -use pgrx::*; -use pyo3::prelude::*; -use pyo3::types::PyTuple; - -use crate::config::get_config; -use crate::{bindings::TracebackError, create_pymodule}; - -static CONFIG_NAME: &str = "pgml.venv"; - -create_pymodule!("/src/bindings/venv.py"); - -pub fn activate_venv(venv: &str) -> Result { - Python::with_gil(|py| { - let activate_venv: Py = get_module!(PY_MODULE).getattr(py, "activate_venv")?; - let result: Py = - activate_venv.call1(py, PyTuple::new(py, &[venv.to_string().into_py(py)]))?; - - Ok(result.extract(py)?) - }) -} - -pub fn activate() -> Result { - match get_config(CONFIG_NAME) { - Some(venv) => activate_venv(&venv), - None => Ok(false), - } -} - -pub fn freeze() -> Result> { - Python::with_gil(|py| { - let freeze = get_module!(PY_MODULE).getattr(py, "freeze")?; - let result = freeze.call0(py)?; - - Ok(result.extract(py)?) - }) -} diff --git a/pgml-extension/src/orm/model.rs b/pgml-extension/src/orm/model.rs index 46ef34821..d847d934d 100644 --- a/pgml-extension/src/orm/model.rs +++ b/pgml-extension/src/orm/model.rs @@ -89,7 +89,7 @@ impl Model { }; if runtime == Runtime::python { - crate::bindings::venv::activate().unwrap(); + crate::bindings::python::activate().unwrap(); } let dataset = snapshot.tabular_dataset(); diff --git a/pgml-extension/src/orm/task.rs b/pgml-extension/src/orm/task.rs index a5b47ea88..f0fe6b02f 100644 --- a/pgml-extension/src/orm/task.rs +++ b/pgml-extension/src/orm/task.rs @@ -48,7 +48,7 @@ impl Task { Task::text_generation => "perplexity", Task::text2text => "perplexity", Task::cluster => "silhouette", - Task::embedding => error!("No default target metric for embedding task") + Task::embedding => error!("No default target metric for embedding task"), } .to_string() } @@ -64,7 +64,7 @@ impl Task { Task::text_generation => false, Task::text2text => false, Task::cluster => true, - Task::embedding => error!("No default target metric positive for embedding task") + Task::embedding => error!("No default target metric positive for embedding task"), } } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy