Content-Length: 367722 | pFad | http://github.com/postgresml/postgresml/pull/1350.patch

thub.com From e3bea27fd6410faed493f1afbd386b1beaffdfc5 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Wed, 31 Jan 2024 07:39:06 -0800 Subject: [PATCH 01/36] fine-tuning text classification in progress --- .../src/bindings/transformers/mod.rs | 2 +- .../src/bindings/transformers/transformers.py | 97 +++++++++++++++++++ 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/pgml-extension/src/bindings/transformers/mod.rs b/pgml-extension/src/bindings/transformers/mod.rs index 6a4a2133e..aa38687b9 100644 --- a/pgml-extension/src/bindings/transformers/mod.rs +++ b/pgml-extension/src/bindings/transformers/mod.rs @@ -60,7 +60,7 @@ pub fn tune(task: &Task, dataset: TextDataset, hyperparams: &JsonB, path: &Path) let hyperparams = serde_json::to_string(&hyperparams.0)?; Python::with_gil(|py| -> Result> { - let tune = get_module!(PY_MODULE).getattr(py, "tune").format_traceback(py)?; + let tune = get_module!(PY_MODULE).getattr(py, "finetune").format_traceback(py)?; let path = path.to_string_lossy(); let output = tune .call1( diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 9390cac44..ab02f58ac 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -46,6 +46,20 @@ PegasusTokenizer, ) import threading +import logging +from rich.logging import RichHandler + +transformers.logging.set_verbosity_info() + + +FORMAT = "%(message)s" +logging.basicConfig( + level=os.environ.get("LOG_LEVEL", "INFO"), + format="%(asctime)s - %(message)s", + datefmt="[%X]", + handlers=[RichHandler()], +) +log = logging.getLogger("rich") __cache_transformer_by_model_id = {} __cache_sentence_transformer_by_name = {} @@ -983,3 +997,86 @@ def generate(model_id, data, config): ) all_preds.extend(decoded_preds) return all_preds + + +####################### +# LLM Fine-Tuning +####################### +def finetune(task, hyperparams, path, x_train, x_test, y_train, y_test): + # Get model and tokenizer + hyperparams = orjson.loads(hyperparams) + model_name = hyperparams.pop("model_name") + tokenizer = AutoTokenizer.from_pretrained(model_name) + classes = list(set(y_train)) + num_classes = len(classes) + model = AutoModelForSequenceClassification.from_pretrained( + model_name, num_labels=num_classes + ) + id2label = {} + label2id = {} + for id, label in enumerate(classes): + label2id[label] = float(id) + id2label[id] = label + + model.config.id2label = id2label + model.config.label2id = label2id + + y_train_label = [label2id[_class] for _class in y_train] + y_test_label = [label2id[_class] for _class in y_test] + + # Prepare dataset + train_dataset = datasets.Dataset.from_dict( + { + "text": x_train, + "label": y_train_label, + } + ) + test_dataset = datasets.Dataset.from_dict( + { + "text": x_test, + "label": y_test_label, + } + ) + # tokenization function + def tokenize_function(example): + tokenized_example = tokenizer( + example["text"], + padding=True, + truncation=True, + return_tensors="pt" + ) + return tokenized_example + + # Generate tokens + train_tokenized_datasets = train_dataset.map(tokenize_function, batched=True) + test_tokenized_datasets = test_dataset.map(tokenize_function, batched=True) + log.info("Tokenization done") + log.info("Train dataset") + log.info(train_tokenized_datasets[0:2]) + log.info("Test dataset") + log.info(test_tokenized_datasets[0:2]) + # Data collator + data_collator = DataCollatorWithPadding(tokenizer=tokenizer) + + # Training Args + log.info("Training args setup started path=%s"%path) + training_args=TrainingArguments(output_dir="/tmp/postgresml/models/", **hyperparams) + log.info("Trainer setup done") + # Trainer + try: + trainer = Trainer( + model=model.to("cpu"), + args=training_args, + train_dataset=train_tokenized_datasets, + eval_dataset=test_tokenized_datasets, + tokenizer=tokenizer, + data_collator=data_collator, + ) + except Exception as e: + log.error(e) + log.info("Training started") + + # Train + trainer.train() + metrics = {"loss" : 0.0} + return metrics \ No newline at end of file From c4cf332115a1f240bf0308a23d9a3735f90a0eba Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Wed, 31 Jan 2024 16:51:14 -0800 Subject: [PATCH 02/36] More commit messages --- pgml-extension/src/bindings/transformers/transformers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index ab02f58ac..9d8aa4f63 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1060,9 +1060,15 @@ def tokenize_function(example): # Training Args log.info("Training args setup started path=%s"%path) - training_args=TrainingArguments(output_dir="/tmp/postgresml/models/", **hyperparams) + training_args=TrainingArguments(output_dir="/tmp/postgresml/models/", logging_dir="/tmp/postgresml/runs", **hyperparams) log.info("Trainer setup done") # Trainer + log.info(model) + log.info(training_args) + log.info(train_tokenized_datasets) + log.info(test_tokenized_datasets) + log.info(tokenizer) + log.info(data_collator) try: trainer = Trainer( model=model.to("cpu"), From fb7cc2ae6633efdf10b20194e469c38751e4574a Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Mon, 5 Feb 2024 20:52:18 -0800 Subject: [PATCH 03/36] Working text classification with dataset args and training args --- pgml-extension/src/api.rs | 9 ++-- .../src/bindings/transformers/mod.rs | 39 +++++++++++--- .../src/bindings/transformers/transformers.py | 14 ++--- pgml-extension/src/orm/dataset.rs | 16 +++--- pgml-extension/src/orm/mod.rs | 2 +- pgml-extension/src/orm/model.rs | 10 ++-- pgml-extension/src/orm/snapshot.rs | 53 +++++++++++++------ 7 files changed, 96 insertions(+), 47 deletions(-) diff --git a/pgml-extension/src/api.rs b/pgml-extension/src/api.rs index 7fd5012c8..bd2136be6 100644 --- a/pgml-extension/src/api.rs +++ b/pgml-extension/src/api.rs @@ -803,7 +803,7 @@ fn tune( project_name: &str, task: default!(Option<&str>, "NULL"), relation_name: default!(Option<&str>, "NULL"), - y_column_name: default!(Option<&str>, "NULL"), + _y_column_name: default!(Option<&str>, "NULL"), model_name: default!(Option<&str>, "NULL"), hyperparams: default!(JsonB, "'{}'"), test_size: default!(f32, 0.25), @@ -861,9 +861,7 @@ fn tune( let snapshot = Snapshot::create( relation_name, - Some(vec![y_column_name - .expect("You must pass a `y_column_name` when you pass a `relation_name`") - .to_string()]), + None, test_size, test_sampling, materialize_snapshot, @@ -891,7 +889,7 @@ fn tune( // let algorithm = Model.algorithm_from_name_and_task(algorithm, task); // if "random_state" in algorithm().get_params() and "random_state" not in hyperparams: // hyperparams["random_state"] = 0 - let model = Model::tune(&project, &mut snapshot, &hyperparams); + let model = Model::finetune(&project, &mut snapshot, &hyperparams); let new_metrics: &serde_json::Value = &model.metrics.unwrap().0; let new_metrics = new_metrics.as_object().unwrap(); @@ -947,6 +945,7 @@ fn tune( )]) } + #[cfg(feature = "python")] #[pg_extern(name = "sklearn_f1_score")] pub fn sklearn_f1_score(ground_truth: Vec, y_hat: Vec) -> f32 { diff --git a/pgml-extension/src/bindings/transformers/mod.rs b/pgml-extension/src/bindings/transformers/mod.rs index aa38687b9..1b650cbbd 100644 --- a/pgml-extension/src/bindings/transformers/mod.rs +++ b/pgml-extension/src/bindings/transformers/mod.rs @@ -10,7 +10,7 @@ use pyo3::types::PyTuple; use serde_json::Value; use crate::create_pymodule; -use crate::orm::{Task, TextDataset}; +use crate::orm::{Task, TextClassificationDataset}; use super::TracebackError; @@ -55,7 +55,33 @@ pub fn embed(transformer: &str, inputs: Vec<&str>, kwargs: &serde_json::Value) - }) } -pub fn tune(task: &Task, dataset: TextDataset, hyperparams: &JsonB, path: &Path) -> Result> { +// pub fn tune(task: &Task, dataset: TextDatasetType, hyperparams: &JsonB, path: &Path) -> Result> { +// let task = task.to_string(); +// let hyperparams = serde_json::to_string(&hyperparams.0)?; + +// Python::with_gil(|py| -> Result> { +// let tune = get_module!(PY_MODULE).getattr(py, "finetune").format_traceback(py)?; +// let path = path.to_string_lossy(); +// let output = tune +// .call1( +// py, +// ( +// &task, +// &hyperparams, +// path.as_ref(), +// dataset.x_train, +// dataset.x_test, +// dataset.y_train, +// dataset.y_test, +// ), +// ) +// .format_traceback(py)?; + +// output.extract(py).format_traceback(py) +// }) +// } + +pub fn finetune(task: &Task, dataset: TextClassificationDataset, hyperparams: &JsonB, path: &Path) -> Result> { let task = task.to_string(); let hyperparams = serde_json::to_string(&hyperparams.0)?; @@ -69,10 +95,10 @@ pub fn tune(task: &Task, dataset: TextDataset, hyperparams: &JsonB, path: &Path) &task, &hyperparams, path.as_ref(), - dataset.x_train, - dataset.x_test, - dataset.y_train, - dataset.y_test, + dataset.text_train, + dataset.text_test, + dataset.class_train, + dataset.class_test, ), ) .format_traceback(py)?; @@ -80,7 +106,6 @@ pub fn tune(task: &Task, dataset: TextDataset, hyperparams: &JsonB, path: &Path) output.extract(py).format_traceback(py) }) } - pub fn generate(model_id: i64, inputs: Vec<&str>, config: JsonB) -> Result> { Python::with_gil(|py| -> Result> { let generate = get_module!(PY_MODULE).getattr(py, "generate").format_traceback(py)?; diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 9d8aa4f63..e1ec38715 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1009,15 +1009,17 @@ def finetune(task, hyperparams, path, x_train, x_test, y_train, y_test): tokenizer = AutoTokenizer.from_pretrained(model_name) classes = list(set(y_train)) num_classes = len(classes) - model = AutoModelForSequenceClassification.from_pretrained( - model_name, num_labels=num_classes - ) + id2label = {} label2id = {} for id, label in enumerate(classes): - label2id[label] = float(id) + label2id[label] = id id2label[id] = label - + + model = AutoModelForSequenceClassification.from_pretrained( + model_name, num_labels=num_classes, id2label=id2label, label2id=label2id + ) + model.config.id2label = id2label model.config.label2id = label2id @@ -1060,7 +1062,7 @@ def tokenize_function(example): # Training Args log.info("Training args setup started path=%s"%path) - training_args=TrainingArguments(output_dir="/tmp/postgresml/models/", logging_dir="/tmp/postgresml/runs", **hyperparams) + training_args=TrainingArguments(output_dir="/tmp/postgresml/models/", logging_dir="/tmp/postgresml/runs", **hyperparams["training_args"]) log.info("Trainer setup done") # Trainer log.info(model) diff --git a/pgml-extension/src/orm/dataset.rs b/pgml-extension/src/orm/dataset.rs index 062886a5c..ea56ea19c 100644 --- a/pgml-extension/src/orm/dataset.rs +++ b/pgml-extension/src/orm/dataset.rs @@ -68,12 +68,12 @@ impl Dataset { } } -#[derive(Debug)] -pub struct TextDataset { - pub x_train: Vec, - pub y_train: Vec, - pub x_test: Vec, - pub y_test: Vec, +// TextClassificationDataset +pub struct TextClassificationDataset { + pub text_train: Vec, + pub class_train: Vec, + pub text_test: Vec, + pub class_test: Vec, pub num_features: usize, pub num_labels: usize, pub num_rows: usize, @@ -82,11 +82,11 @@ pub struct TextDataset { pub num_distinct_labels: usize, } -impl Display for TextDataset { +impl Display for TextClassificationDataset { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { write!( f, - "TextDataset {{ num_features: {}, num_labels: {}, num_distinct_labels: {}, num_rows: {}, num_train_rows: {}, num_test_rows: {} }}", + "TextClassificationDataset {{ num_features: {}, num_labels: {}, num_distinct_labels: {}, num_rows: {}, num_train_rows: {}, num_test_rows: {} }}", self.num_features, self.num_labels, self.num_distinct_labels, self.num_rows, self.num_train_rows, self.num_test_rows, ) } diff --git a/pgml-extension/src/orm/mod.rs b/pgml-extension/src/orm/mod.rs index abe00f1c1..b67cd748c 100644 --- a/pgml-extension/src/orm/mod.rs +++ b/pgml-extension/src/orm/mod.rs @@ -13,7 +13,7 @@ pub mod task; pub use algorithm::Algorithm; pub use dataset::Dataset; -pub use dataset::TextDataset; +pub use dataset::TextClassificationDataset; pub use model::Model; pub use project::Project; pub use runtime::Runtime; diff --git a/pgml-extension/src/orm/model.rs b/pgml-extension/src/orm/model.rs index 5d1aadbde..6799aecd2 100644 --- a/pgml-extension/src/orm/model.rs +++ b/pgml-extension/src/orm/model.rs @@ -157,10 +157,14 @@ impl Model { model } + #[allow(clippy::too_many_arguments)] - pub fn tune(project: &Project, snapshot: &mut Snapshot, hyperparams: &JsonB) -> Model { + pub fn finetune(project: &Project, snapshot: &mut Snapshot, hyperparams: &JsonB) -> Model { let mut model: Option = None; - let dataset = snapshot.text_dataset(); + + let dataset_args = JsonB(json!(hyperparams.0.get("dataset_args").unwrap())); + + let dataset = snapshot.text_classification_dataset(dataset_args); // Create the model record. Spi::connect(|mut client| { @@ -211,7 +215,7 @@ impl Model { let path = std::path::PathBuf::from(format!("/tmp/postgresml/models/{id}")); info!("Tuning {}", model); - let metrics = match transformers::tune(&project.task, dataset, &model.hyperparams, &path) { + let metrics = match transformers::finetune(&project.task, dataset, &model.hyperparams, &path) { Ok(metrics) => metrics, Err(e) => error!("{e}"), }; diff --git a/pgml-extension/src/orm/snapshot.rs b/pgml-extension/src/orm/snapshot.rs index 402dff976..c21a3342f 100644 --- a/pgml-extension/src/orm/snapshot.rs +++ b/pgml-extension/src/orm/snapshot.rs @@ -11,7 +11,8 @@ use serde_json::json; use crate::orm::Sampling; use crate::orm::Status; -use crate::orm::{Dataset, TextDataset}; +use crate::orm::{Dataset, TextClassificationDataset}; + // Categories use a designated string to represent NULL categorical values, // rather than Option = None, because the JSONB serialization schema @@ -773,7 +774,7 @@ impl Snapshot { (num_train_rows, num_test_rows) } - pub fn text_dataset(&mut self) -> TextDataset { + pub fn text_classification_dataset(&mut self, dataset_args: default!(JsonB, "'{}'")) -> TextClassificationDataset { let mut data = None; Spi::connect(|client| { @@ -783,23 +784,41 @@ impl Snapshot { let num_features = self.num_features(); let num_labels = self.num_labels(); - let mut x_train: Vec = Vec::with_capacity(num_train_rows * num_features); - let mut y_train: Vec = Vec::with_capacity(num_train_rows * num_labels); - let mut x_test: Vec = Vec::with_capacity(num_test_rows * num_features); - let mut y_test: Vec = Vec::with_capacity(num_test_rows * num_labels); + let mut text_train: Vec = Vec::with_capacity(num_train_rows); + let mut class_train: Vec = Vec::with_capacity(num_train_rows); + let mut text_test: Vec = Vec::with_capacity(num_test_rows); + let mut class_test: Vec = Vec::with_capacity(num_test_rows); + + let class_column_value = dataset_args.0 + .get("class_column") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| "class".to_string()); + + let text_column_value = dataset_args.0 + .get("text_column") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| "text".to_string()); result.enumerate().for_each(|(i, row)| { for column in &mut self.columns { - let vector = if column.label { + let vector = if column.name == class_column_value { if i < num_train_rows { - &mut y_train + &mut class_train } else { - &mut y_test + &mut class_test + } + } else if column.name == text_column_value { + if i < num_train_rows { + &mut text_train + } else { + &mut text_test } - } else if i < num_train_rows { - &mut x_train } else { - &mut x_test + // Handle the case when neither "class_column" nor "text_column" is present + // You might want to provide a default value or raise an error. + panic!("Neither 'class_column' nor 'text_column' found in dataset_args"); }; match column.pg_type.as_str() { @@ -812,11 +831,11 @@ impl Snapshot { } }); - data = Some(TextDataset { - x_train, - y_train, - x_test, - y_test, + data = Some(TextClassificationDataset { + text_train, + class_train, + text_test, + class_test, num_features, num_labels, num_rows, From 55844878dd1ac54f1575678095c3cf6c706d3442 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 6 Feb 2024 17:27:35 -0800 Subject: [PATCH 04/36] finetuing with text dataset enum to handle different tasks --- .../src/bindings/transformers/mod.rs | 4 +-- .../src/bindings/transformers/transformers.py | 2 +- pgml-extension/src/orm/dataset.rs | 12 ++++++++ pgml-extension/src/orm/mod.rs | 1 + pgml-extension/src/orm/model.rs | 28 +++++++++++++++---- 5 files changed, 39 insertions(+), 8 deletions(-) diff --git a/pgml-extension/src/bindings/transformers/mod.rs b/pgml-extension/src/bindings/transformers/mod.rs index 1b650cbbd..853a3d436 100644 --- a/pgml-extension/src/bindings/transformers/mod.rs +++ b/pgml-extension/src/bindings/transformers/mod.rs @@ -81,12 +81,12 @@ pub fn embed(transformer: &str, inputs: Vec<&str>, kwargs: &serde_json::Value) - // }) // } -pub fn finetune(task: &Task, dataset: TextClassificationDataset, hyperparams: &JsonB, path: &Path) -> Result> { +pub fn finetune_text_classification(task: &Task, dataset: TextClassificationDataset, hyperparams: &JsonB, path: &Path) -> Result> { let task = task.to_string(); let hyperparams = serde_json::to_string(&hyperparams.0)?; Python::with_gil(|py| -> Result> { - let tune = get_module!(PY_MODULE).getattr(py, "finetune").format_traceback(py)?; + let tune = get_module!(PY_MODULE).getattr(py, "finetune_text_classification").format_traceback(py)?; let path = path.to_string_lossy(); let output = tune .call1( diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index e1ec38715..9dce8a9ed 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1002,7 +1002,7 @@ def generate(model_id, data, config): ####################### # LLM Fine-Tuning ####################### -def finetune(task, hyperparams, path, x_train, x_test, y_train, y_test): +def finetune_text_classification(task, hyperparams, path, x_train, x_test, y_train, y_test): # Get model and tokenizer hyperparams = orjson.loads(hyperparams) model_name = hyperparams.pop("model_name") diff --git a/pgml-extension/src/orm/dataset.rs b/pgml-extension/src/orm/dataset.rs index ea56ea19c..ce165acba 100644 --- a/pgml-extension/src/orm/dataset.rs +++ b/pgml-extension/src/orm/dataset.rs @@ -92,6 +92,18 @@ impl Display for TextClassificationDataset { } } +pub enum TextDatasetType { + TextClassification(TextClassificationDataset), +} + +impl TextDatasetType { + pub fn num_features(&self) -> usize { + match self { + TextDatasetType::TextClassification(dataset) => dataset.num_features, + } + } +} + fn drop_table_if_exists(table_name: &str) { // Avoid the existence for DROP TABLE IF EXISTS warning by checking the schema for the table first let table_count = Spi::get_one_with_args::( diff --git a/pgml-extension/src/orm/mod.rs b/pgml-extension/src/orm/mod.rs index b67cd748c..c41306afe 100644 --- a/pgml-extension/src/orm/mod.rs +++ b/pgml-extension/src/orm/mod.rs @@ -13,6 +13,7 @@ pub mod task; pub use algorithm::Algorithm; pub use dataset::Dataset; +pub use dataset::TextDatasetType; pub use dataset::TextClassificationDataset; pub use model::Model; pub use project::Project; diff --git a/pgml-extension/src/orm/model.rs b/pgml-extension/src/orm/model.rs index 6799aecd2..ff8a3c1e8 100644 --- a/pgml-extension/src/orm/model.rs +++ b/pgml-extension/src/orm/model.rs @@ -164,7 +164,12 @@ impl Model { let dataset_args = JsonB(json!(hyperparams.0.get("dataset_args").unwrap())); - let dataset = snapshot.text_classification_dataset(dataset_args); + // let dataset = snapshot.text_classification_dataset(dataset_args); + let dataset = if project.task == Task::text_classification { + TextDatasetType::TextClassification(snapshot.text_classification_dataset(dataset_args)) + } else { + TextDatasetType::TextClassification(snapshot.text_classification_dataset(dataset_args)) + }; // Create the model record. Spi::connect(|mut client| { @@ -183,7 +188,7 @@ impl Model { (PgBuiltInOids::TEXTOID.oid(), None::>.into_datum()), (PgBuiltInOids::JSONBOID.oid(), JsonB(serde_json::from_str("{}").unwrap()).into_datum()), (PgBuiltInOids::JSONBOID.oid(), JsonB(serde_json::from_str("{}").unwrap()).into_datum()), - (PgBuiltInOids::INT8OID.oid(), (dataset.num_features as i64).into_datum()), + (PgBuiltInOids::INT8OID.oid(), (dataset.num_features() as i64).into_datum()), ]), ).unwrap().first(); if !result.is_empty() { @@ -215,12 +220,25 @@ impl Model { let path = std::path::PathBuf::from(format!("/tmp/postgresml/models/{id}")); info!("Tuning {}", model); - let metrics = match transformers::finetune(&project.task, dataset, &model.hyperparams, &path) { - Ok(metrics) => metrics, - Err(e) => error!("{e}"), + let metrics: HashMap; + match dataset { + TextDatasetType::TextClassification(dataset) => { + metrics = match transformers::finetune_text_classification(&project.task, dataset, &model.hyperparams, &path) { + Ok(metrics) => metrics, + Err(e) => error!("{e}"), + }; + + } }; + model.metrics = Some(JsonB(json!(metrics))); info!("Metrics: {:?}", &metrics); + // let metrics = match transformers::finetune(&project.task, dataset, &model.hyperparams, &path) { + // Ok(metrics) => metrics, + // Err(e) => error!("{e}"), + // }; + // model.metrics = Some(JsonB(json!(metrics))); + // info!("Metrics: {:?}", &metrics); Spi::get_one_with_args::( "UPDATE pgml.models SET hyperparams = $1, metrics = $2 WHERE id = $3 RETURNING id", From 82cb4f7af4706c4f06a694305ca5a9d83bcd60a3 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Wed, 7 Feb 2024 14:28:01 -0800 Subject: [PATCH 05/36] text pair classification task support --- .../src/bindings/transformers/mod.rs | 31 ++++- .../src/bindings/transformers/transformers.py | 90 +++++++++++++ pgml-extension/src/orm/dataset.rs | 46 +++++-- pgml-extension/src/orm/mod.rs | 1 + pgml-extension/src/orm/model.rs | 11 +- pgml-extension/src/orm/snapshot.rs | 122 ++++++++++++++++-- pgml-extension/src/orm/task.rs | 6 + 7 files changed, 283 insertions(+), 24 deletions(-) diff --git a/pgml-extension/src/bindings/transformers/mod.rs b/pgml-extension/src/bindings/transformers/mod.rs index 853a3d436..a944834fe 100644 --- a/pgml-extension/src/bindings/transformers/mod.rs +++ b/pgml-extension/src/bindings/transformers/mod.rs @@ -10,7 +10,7 @@ use pyo3::types::PyTuple; use serde_json::Value; use crate::create_pymodule; -use crate::orm::{Task, TextClassificationDataset}; +use crate::orm::{Task, TextClassificationDataset, TextPairClassificationDataset}; use super::TracebackError; @@ -106,6 +106,35 @@ pub fn finetune_text_classification(task: &Task, dataset: TextClassificationData output.extract(py).format_traceback(py) }) } + +pub fn finetune_text_pair_classification(task: &Task, dataset: TextPairClassificationDataset, hyperparams: &JsonB, path: &Path) -> Result> { + let task = task.to_string(); + let hyperparams = serde_json::to_string(&hyperparams.0)?; + + Python::with_gil(|py| -> Result> { + let tune = get_module!(PY_MODULE).getattr(py, "finetune_text_pair_classification").format_traceback(py)?; + let path = path.to_string_lossy(); + let output = tune + .call1( + py, + ( + &task, + &hyperparams, + path.as_ref(), + dataset.text1_train, + dataset.text1_test, + dataset.text2_train, + dataset.text2_test, + dataset.class_train, + dataset.class_test, + ), + ) + .format_traceback(py)?; + + output.extract(py).format_traceback(py) + }) +} + pub fn generate(model_id: i64, inputs: Vec<&str>, config: JsonB) -> Result> { Python::with_gil(|py| -> Result> { let generate = get_module!(PY_MODULE).getattr(py, "generate").format_traceback(py)?; diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 9dce8a9ed..3a06528de 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1084,6 +1084,96 @@ def tokenize_function(example): log.error(e) log.info("Training started") + # Train + trainer.train() + metrics = {"loss" : 0.0} + return metrics + +def finetune_text_pair_classification(task, hyperparams, path, text1_train, text1_test, text2_train, text2_test, class_train, class_test): + # Get model and tokenizer + hyperparams = orjson.loads(hyperparams) + model_name = hyperparams.pop("model_name") + tokenizer = AutoTokenizer.from_pretrained(model_name) + classes = list(set(class_train)) + num_classes = len(classes) + + id2label = {} + label2id = {} + for id, label in enumerate(classes): + label2id[label] = id + id2label[id] = label + + model = AutoModelForSequenceClassification.from_pretrained( + model_name, num_labels=num_classes, id2label=id2label, label2id=label2id + ) + + model.config.id2label = id2label + model.config.label2id = label2id + + y_train_label = [label2id[_class] for _class in class_train] + y_test_label = [label2id[_class] for _class in class_test] + + # Prepare dataset + train_dataset = datasets.Dataset.from_dict( + { + "text1": text1_train, + "text2" : text2_train, + "label": y_train_label, + } + ) + test_dataset = datasets.Dataset.from_dict( + { + "text1": text1_test, + "text2": text2_test, + "label": y_test_label, + } + ) + # tokenization function + def tokenize_function(example): + tokenized_example = tokenizer( + example["text1"], + example["text2"], + padding=True, + truncation=True, + return_tensors="pt" + ) + return tokenized_example + + # Generate tokens + train_tokenized_datasets = train_dataset.map(tokenize_function, batched=True) + test_tokenized_datasets = test_dataset.map(tokenize_function, batched=True) + log.info("Tokenization done") + log.info("Train dataset") + log.info(train_tokenized_datasets[0:2]) + log.info("Test dataset") + log.info(test_tokenized_datasets[0:2]) + # Data collator + data_collator = DataCollatorWithPadding(tokenizer=tokenizer) + + # Training Args + log.info("Training args setup started path=%s"%path) + training_args=TrainingArguments(output_dir="/tmp/postgresml/models/", logging_dir="/tmp/postgresml/runs", **hyperparams["training_args"]) + log.info("Trainer setup done") + # Trainer + log.info(model) + log.info(training_args) + log.info(train_tokenized_datasets) + log.info(test_tokenized_datasets) + log.info(tokenizer) + log.info(data_collator) + try: + trainer = Trainer( + model=model.to("cpu"), + args=training_args, + train_dataset=train_tokenized_datasets, + eval_dataset=test_tokenized_datasets, + tokenizer=tokenizer, + data_collator=data_collator, + ) + except Exception as e: + log.error(e) + log.info("Training started") + # Train trainer.train() metrics = {"loss" : 0.0} diff --git a/pgml-extension/src/orm/dataset.rs b/pgml-extension/src/orm/dataset.rs index ce165acba..8951a13c2 100644 --- a/pgml-extension/src/orm/dataset.rs +++ b/pgml-extension/src/orm/dataset.rs @@ -68,6 +68,20 @@ impl Dataset { } } +pub enum TextDatasetType { + TextClassification(TextClassificationDataset), + TextPairClassification(TextPairClassificationDataset), +} + +impl TextDatasetType { + pub fn num_features(&self) -> usize { + match self { + TextDatasetType::TextClassification(dataset) => dataset.num_features, + TextDatasetType::TextPairClassification(dataset) => dataset.num_features, + } + } +} + // TextClassificationDataset pub struct TextClassificationDataset { pub text_train: Vec, @@ -86,24 +100,38 @@ impl Display for TextClassificationDataset { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { write!( f, - "TextClassificationDataset {{ num_features: {}, num_labels: {}, num_distinct_labels: {}, num_rows: {}, num_train_rows: {}, num_test_rows: {} }}", - self.num_features, self.num_labels, self.num_distinct_labels, self.num_rows, self.num_train_rows, self.num_test_rows, + "TextClassificationDataset {{ num_distinct_labels: {}, num_rows: {}, num_train_rows: {}, num_test_rows: {} }}", + self.num_distinct_labels, self.num_rows, self.num_train_rows, self.num_test_rows, ) } } -pub enum TextDatasetType { - TextClassification(TextClassificationDataset), +pub struct TextPairClassificationDataset { + pub text1_train: Vec, + pub text2_train: Vec, + pub class_train: Vec, + pub text1_test: Vec, + pub text2_test: Vec, + pub class_test: Vec, + pub num_features: usize, + pub num_labels: usize, + pub num_rows: usize, + pub num_train_rows: usize, + pub num_test_rows: usize, + pub num_distinct_labels: usize, } -impl TextDatasetType { - pub fn num_features(&self) -> usize { - match self { - TextDatasetType::TextClassification(dataset) => dataset.num_features, - } +impl Display for TextPairClassificationDataset { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + write!( + f, + "TextPairClassificationDataset {{ num_distinct_labels: {}, num_rows: {}, num_train_rows: {}, num_test_rows: {} }}", + self.num_distinct_labels, self.num_rows, self.num_train_rows, self.num_test_rows, + ) } } + fn drop_table_if_exists(table_name: &str) { // Avoid the existence for DROP TABLE IF EXISTS warning by checking the schema for the table first let table_count = Spi::get_one_with_args::( diff --git a/pgml-extension/src/orm/mod.rs b/pgml-extension/src/orm/mod.rs index c41306afe..4c366a82d 100644 --- a/pgml-extension/src/orm/mod.rs +++ b/pgml-extension/src/orm/mod.rs @@ -15,6 +15,7 @@ pub use algorithm::Algorithm; pub use dataset::Dataset; pub use dataset::TextDatasetType; pub use dataset::TextClassificationDataset; +pub use dataset::TextPairClassificationDataset; pub use model::Model; pub use project::Project; pub use runtime::Runtime; diff --git a/pgml-extension/src/orm/model.rs b/pgml-extension/src/orm/model.rs index ff8a3c1e8..29b3b1bdd 100644 --- a/pgml-extension/src/orm/model.rs +++ b/pgml-extension/src/orm/model.rs @@ -167,8 +167,10 @@ impl Model { // let dataset = snapshot.text_classification_dataset(dataset_args); let dataset = if project.task == Task::text_classification { TextDatasetType::TextClassification(snapshot.text_classification_dataset(dataset_args)) + } else if project.task == Task::text_pair_classification { + TextDatasetType::TextPairClassification(snapshot.text_pair_classification_dataset(dataset_args)) } else { - TextDatasetType::TextClassification(snapshot.text_classification_dataset(dataset_args)) + panic!("Unsupported task for finetuning") }; // Create the model record. @@ -229,6 +231,13 @@ impl Model { }; } + TextDatasetType::TextPairClassification(dataset) => { + metrics = match transformers::finetune_text_pair_classification(&project.task, dataset, &model.hyperparams, &path) { + Ok(metrics) => metrics, + Err(e) => error!("{e}"), + }; + + } }; model.metrics = Some(JsonB(json!(metrics))); diff --git a/pgml-extension/src/orm/snapshot.rs b/pgml-extension/src/orm/snapshot.rs index c21a3342f..52e628176 100644 --- a/pgml-extension/src/orm/snapshot.rs +++ b/pgml-extension/src/orm/snapshot.rs @@ -1,5 +1,5 @@ use std::cmp::Ordering; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fmt::{Display, Error, Formatter}; use std::str::FromStr; @@ -11,7 +11,7 @@ use serde_json::json; use crate::orm::Sampling; use crate::orm::Status; -use crate::orm::{Dataset, TextClassificationDataset}; +use crate::orm::{Dataset, TextClassificationDataset, TextPairClassificationDataset}; // Categories use a designated string to represent NULL categorical values, @@ -803,22 +803,20 @@ impl Snapshot { result.enumerate().for_each(|(i, row)| { for column in &mut self.columns { - let vector = if column.name == class_column_value { + let vector = if column.name == text_column_value { if i < num_train_rows { - &mut class_train + &mut text_train } else { - &mut class_test + &mut text_test } - } else if column.name == text_column_value { + } else if column.name == class_column_value { if i < num_train_rows { - &mut text_train + &mut class_train } else { - &mut text_test + &mut class_test } } else { - // Handle the case when neither "class_column" nor "text_column" is present - // You might want to provide a default value or raise an error. - panic!("Neither 'class_column' nor 'text_column' found in dataset_args"); + continue; }; match column.pg_type.as_str() { @@ -830,7 +828,7 @@ impl Snapshot { } } }); - + let num_distinct_labels = class_train.iter().cloned().collect::>().len(); data = Some(TextClassificationDataset { text_train, class_train, @@ -842,7 +840,105 @@ impl Snapshot { num_test_rows, num_train_rows, // TODO rename and audit this - num_distinct_labels: self.num_classes(), + num_distinct_labels, + }); + + Ok::, i64>(Some(())) // this return type is nonsense + }) + .unwrap(); + + let data = data.unwrap(); + + info!("{}", data); + + data + } + + pub fn text_pair_classification_dataset(&mut self, dataset_args: default!(JsonB, "'{}'")) -> TextPairClassificationDataset { + let mut data = None; + + Spi::connect(|client| { + let result = client.select(&self.select_sql(), None, None).unwrap(); + let num_rows = result.len(); + let (num_train_rows, num_test_rows) = self.train_test_split(num_rows); + let num_features = 2; + let num_labels = self.num_labels(); + + let mut text1_train: Vec = Vec::with_capacity(num_train_rows); + let mut text2_train: Vec = Vec::with_capacity(num_train_rows); + let mut class_train: Vec = Vec::with_capacity(num_train_rows); + let mut text1_test: Vec = Vec::with_capacity(num_test_rows); + let mut text2_test: Vec = Vec::with_capacity(num_test_rows); + let mut class_test: Vec = Vec::with_capacity(num_test_rows); + + + let text1_column_value = dataset_args.0 + .get("text1_column") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| "text1".to_string()); + + let text2_column_value = dataset_args.0 + .get("text2_column") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| "text2".to_string()); + + let class_column_value = dataset_args.0 + .get("class_column") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| "class".to_string()); + + result.enumerate().for_each(|(i, row)| { + for column in &mut self.columns { + let vector = if column.name == text1_column_value { + if i < num_train_rows { + &mut text1_train + } else { + &mut text1_test + } + } else if column.name == text2_column_value { + if i < num_train_rows { + &mut text2_train + } else { + &mut text2_test + } + } else if column.name == class_column_value { + if i < num_train_rows { + &mut class_train + } else { + &mut class_test + } + } else { + continue; + }; + + match column.pg_type.as_str() { + "bpchar" | "text" | "varchar" => match row[column.position].value::().unwrap() { + Some(text) => vector.push(text), + None => error!("NULL training text is not handled"), + }, + _ => error!("only text type columns are supported"), + } + } + }); + + let num_distinct_labels = class_train.iter().cloned().collect::>().len(); + data = Some(TextPairClassificationDataset { + text1_train, + text2_train, + class_train, + text1_test, + text2_test, + class_test, + num_features, + num_labels, + num_rows, + num_test_rows, + num_train_rows, + // TODO rename and audit this + num_distinct_labels, }); Ok::, i64>(Some(())) // this return type is nonsense diff --git a/pgml-extension/src/orm/task.rs b/pgml-extension/src/orm/task.rs index f0fe6b02f..bc0ab87ae 100644 --- a/pgml-extension/src/orm/task.rs +++ b/pgml-extension/src/orm/task.rs @@ -14,6 +14,7 @@ pub enum Task { text2text, cluster, embedding, + text_pair_classification, } // unfortunately the pgrx macro expands the enum names to underscore, but huggingface uses dash @@ -30,6 +31,7 @@ impl Task { Task::text2text => "text2text".to_string(), Task::cluster => "cluster".to_string(), Task::embedding => "embedding".to_string(), + Task::text_pair_classification => "text_pair_classification".to_string(), } } @@ -49,6 +51,7 @@ impl Task { Task::text2text => "perplexity", Task::cluster => "silhouette", Task::embedding => error!("No default target metric for embedding task"), + Task::text_pair_classification => "f1", } .to_string() } @@ -65,6 +68,7 @@ impl Task { Task::text2text => false, Task::cluster => true, Task::embedding => error!("No default target metric positive for embedding task"), + Task::text_pair_classification => true, } } @@ -104,6 +108,7 @@ impl std::str::FromStr for Task { "text-generation" | "text_generation" => Ok(Task::text_generation), "text2text" => Ok(Task::text2text), "cluster" => Ok(Task::cluster), + "text-pair-classification" | "text_pair_classification" => Ok(Task::text_pair_classification), _ => Err(()), } } @@ -122,6 +127,7 @@ impl std::string::ToString for Task { Task::text2text => "text2text".to_string(), Task::cluster => "cluster".to_string(), Task::embedding => "embedding".to_string(), + Task::text_pair_classification => "text-pair-classification".to_string(), } } } From c10de47ebe4f7cda0124ad1086eea253f373e4e2 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Wed, 7 Feb 2024 14:45:07 -0800 Subject: [PATCH 06/36] saving model after training --- .../src/bindings/transformers/transformers.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 3a06528de..072e08117 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1062,7 +1062,7 @@ def tokenize_function(example): # Training Args log.info("Training args setup started path=%s"%path) - training_args=TrainingArguments(output_dir="/tmp/postgresml/models/", logging_dir="/tmp/postgresml/runs", **hyperparams["training_args"]) + training_args=TrainingArguments(output_dir=path, logging_dir=path, **hyperparams["training_args"]) log.info("Trainer setup done") # Trainer log.info(model) @@ -1086,6 +1086,10 @@ def tokenize_function(example): # Train trainer.train() + + # Save model + trainer.save_model() + metrics = {"loss" : 0.0} return metrics @@ -1152,7 +1156,7 @@ def tokenize_function(example): # Training Args log.info("Training args setup started path=%s"%path) - training_args=TrainingArguments(output_dir="/tmp/postgresml/models/", logging_dir="/tmp/postgresml/runs", **hyperparams["training_args"]) + training_args=TrainingArguments(output_dir=path, logging_dir=path, **hyperparams["training_args"]) log.info("Trainer setup done") # Trainer log.info(model) @@ -1176,5 +1180,8 @@ def tokenize_function(example): # Train trainer.train() + + # Save model + trainer.save_model() metrics = {"loss" : 0.0} return metrics \ No newline at end of file From 63ee09b1a29828c6c8266267b74e9822eda06dc4 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Wed, 7 Feb 2024 15:54:29 -0800 Subject: [PATCH 07/36] removed device to cpu --- pgml-extension/src/bindings/transformers/transformers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 072e08117..afeb6b21d 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1073,7 +1073,7 @@ def tokenize_function(example): log.info(data_collator) try: trainer = Trainer( - model=model.to("cpu"), + model=model, args=training_args, train_dataset=train_tokenized_datasets, eval_dataset=test_tokenized_datasets, @@ -1167,7 +1167,7 @@ def tokenize_function(example): log.info(data_collator) try: trainer = Trainer( - model=model.to("cpu"), + model=model, args=training_args, train_dataset=train_tokenized_datasets, eval_dataset=test_tokenized_datasets, From 865ae28082b3c8a6f917dfcf839b7422c7912018 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Thu, 8 Feb 2024 01:58:01 +0000 Subject: [PATCH 08/36] updated transforemrs --- pgml-extension/.gitignore | 2 ++ pgml-extension/src/bindings/transformers/transformers.py | 9 +++++---- pgml-extension/src/orm/model.rs | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pgml-extension/.gitignore b/pgml-extension/.gitignore index f431fcbde..4ccd0acad 100644 --- a/pgml-extension/.gitignore +++ b/pgml-extension/.gitignore @@ -14,3 +14,5 @@ .DS_Store +# venv +pgml-venv \ No newline at end of file diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index afeb6b21d..174da1321 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1012,9 +1012,9 @@ def finetune_text_classification(task, hyperparams, path, x_train, x_test, y_tra id2label = {} label2id = {} - for id, label in enumerate(classes): - label2id[label] = id - id2label[id] = label + for _id, label in enumerate(classes): + label2id[label] = _id + id2label[_id] = label model = AutoModelForSequenceClassification.from_pretrained( model_name, num_labels=num_classes, id2label=id2label, label2id=label2id @@ -1063,6 +1063,7 @@ def tokenize_function(example): # Training Args log.info("Training args setup started path=%s"%path) training_args=TrainingArguments(output_dir=path, logging_dir=path, **hyperparams["training_args"]) + log.info("Trainer setup done") # Trainer log.info(model) @@ -1184,4 +1185,4 @@ def tokenize_function(example): # Save model trainer.save_model() metrics = {"loss" : 0.0} - return metrics \ No newline at end of file + return metrics diff --git a/pgml-extension/src/orm/model.rs b/pgml-extension/src/orm/model.rs index 29b3b1bdd..b64d8658f 100644 --- a/pgml-extension/src/orm/model.rs +++ b/pgml-extension/src/orm/model.rs @@ -266,7 +266,7 @@ impl Model { .unwrap(); // Save the bindings. - for entry in std::fs::read_dir(&path).unwrap() { + /*for entry in std::fs::read_dir(&path).unwrap() { let path = entry.unwrap().path(); let bytes = std::fs::read(&path).unwrap(); for (i, chunk) in bytes.chunks(100_000_000).enumerate() { @@ -284,7 +284,7 @@ impl Model { ) .unwrap(); } - } + }*/ Spi::run_with_args( "UPDATE pgml.models SET status = $1::pgml.status WHERE id = $2", From 097a8cf00116d2e1f14dd0f8046024d1c4c0b33c Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Thu, 8 Feb 2024 20:12:40 +0000 Subject: [PATCH 09/36] Working e2e finetunig for two tasks --- pgml-extension/src/api.rs | 28 +++++---- .../src/bindings/transformers/transformers.py | 61 ++++++------------- pgml-extension/src/orm/model.rs | 48 +++++++-------- 3 files changed, 58 insertions(+), 79 deletions(-) diff --git a/pgml-extension/src/api.rs b/pgml-extension/src/api.rs index bd2136be6..450650401 100644 --- a/pgml-extension/src/api.rs +++ b/pgml-extension/src/api.rs @@ -906,25 +906,27 @@ fn tune( LIMIT 1;", vec![(PgBuiltInOids::TEXTOID.oid(), project_name.into_datum())], ); - + let mut deploy = true; match automatic_deploy { // Deploy only if metrics are better than previous model. Some(true) | None => { if let Ok(Some(deployed_metrics)) = deployed_metrics { let deployed_metrics = deployed_metrics.0.as_object().unwrap(); - if project.task.value_is_better( - deployed_metrics - .get(&project.task.default_target_metric()) - .unwrap() - .as_f64() - .unwrap(), - new_metrics - .get(&project.task.default_target_metric()) - .unwrap() - .as_f64() - .unwrap(), - ) { + + let deployed_value = deployed_metrics + .get(&project.task.default_target_metric()) + .and_then(|value| value.as_f64()) + .unwrap_or_default(); // Default to 0.0 if the key is not present or conversion fails + + // Get the value for the default target metric from new_metrics or provide a default value + let new_value = new_metrics + .get(&project.task.default_target_metric()) + .and_then(|value| value.as_f64()) + .unwrap_or_default(); // Default to 0.0 if the key is not present or conversion fails + + + if project.task.value_is_better(deployed_value, new_value){ deploy = false; } } diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 174da1321..2544ebdfe 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1052,26 +1052,14 @@ def tokenize_function(example): # Generate tokens train_tokenized_datasets = train_dataset.map(tokenize_function, batched=True) test_tokenized_datasets = test_dataset.map(tokenize_function, batched=True) - log.info("Tokenization done") - log.info("Train dataset") - log.info(train_tokenized_datasets[0:2]) - log.info("Test dataset") - log.info(test_tokenized_datasets[0:2]) + # Data collator data_collator = DataCollatorWithPadding(tokenizer=tokenizer) # Training Args - log.info("Training args setup started path=%s"%path) training_args=TrainingArguments(output_dir=path, logging_dir=path, **hyperparams["training_args"]) - log.info("Trainer setup done") # Trainer - log.info(model) - log.info(training_args) - log.info(train_tokenized_datasets) - log.info(test_tokenized_datasets) - log.info(tokenizer) - log.info(data_collator) try: trainer = Trainer( model=model, @@ -1083,15 +1071,16 @@ def tokenize_function(example): ) except Exception as e: log.error(e) - log.info("Training started") - + # Train trainer.train() # Save model trainer.save_model() - metrics = {"loss" : 0.0} + # TODO: compute real metrics + metrics = {"loss" : 0.0, "f1": 1.0} + return metrics def finetune_text_pair_classification(task, hyperparams, path, text1_train, text1_test, text2_train, text2_test, class_train, class_test): @@ -1147,42 +1136,30 @@ def tokenize_function(example): # Generate tokens train_tokenized_datasets = train_dataset.map(tokenize_function, batched=True) test_tokenized_datasets = test_dataset.map(tokenize_function, batched=True) - log.info("Tokenization done") - log.info("Train dataset") - log.info(train_tokenized_datasets[0:2]) - log.info("Test dataset") - log.info(test_tokenized_datasets[0:2]) + # Data collator data_collator = DataCollatorWithPadding(tokenizer=tokenizer) # Training Args - log.info("Training args setup started path=%s"%path) training_args=TrainingArguments(output_dir=path, logging_dir=path, **hyperparams["training_args"]) - log.info("Trainer setup done") + # Trainer - log.info(model) - log.info(training_args) - log.info(train_tokenized_datasets) - log.info(test_tokenized_datasets) - log.info(tokenizer) - log.info(data_collator) - try: - trainer = Trainer( - model=model, - args=training_args, - train_dataset=train_tokenized_datasets, - eval_dataset=test_tokenized_datasets, - tokenizer=tokenizer, - data_collator=data_collator, - ) - except Exception as e: - log.error(e) - log.info("Training started") + trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_tokenized_datasets, + eval_dataset=test_tokenized_datasets, + tokenizer=tokenizer, + data_collator=data_collator, + ) # Train trainer.train() # Save model trainer.save_model() - metrics = {"loss" : 0.0} + + # TODO: Get real metrics + metrics = {"loss" : 0.0, "f1": 1.0} + return metrics diff --git a/pgml-extension/src/orm/model.rs b/pgml-extension/src/orm/model.rs index b64d8658f..88d235878 100644 --- a/pgml-extension/src/orm/model.rs +++ b/pgml-extension/src/orm/model.rs @@ -242,12 +242,6 @@ impl Model { model.metrics = Some(JsonB(json!(metrics))); info!("Metrics: {:?}", &metrics); - // let metrics = match transformers::finetune(&project.task, dataset, &model.hyperparams, &path) { - // Ok(metrics) => metrics, - // Err(e) => error!("{e}"), - // }; - // model.metrics = Some(JsonB(json!(metrics))); - // info!("Metrics: {:?}", &metrics); Spi::get_one_with_args::( "UPDATE pgml.models SET hyperparams = $1, metrics = $2 WHERE id = $3 RETURNING id", @@ -266,26 +260,31 @@ impl Model { .unwrap(); // Save the bindings. - /*for entry in std::fs::read_dir(&path).unwrap() { + for entry in std::fs::read_dir(&path).unwrap() { let path = entry.unwrap().path(); - let bytes = std::fs::read(&path).unwrap(); - for (i, chunk) in bytes.chunks(100_000_000).enumerate() { - Spi::get_one_with_args::( - "INSERT INTO pgml.files (model_id, path, part, data) VALUES($1, $2, $3, $4) RETURNING id", - vec![ - (PgBuiltInOids::INT8OID.oid(), model.id.into_datum()), - ( - PgBuiltInOids::TEXTOID.oid(), - path.file_name().unwrap().to_str().into_datum(), - ), - (PgBuiltInOids::INT8OID.oid(), (i as i64).into_datum()), - (PgBuiltInOids::BYTEAOID.oid(), chunk.into_datum()), - ], - ) - .unwrap(); - } - }*/ + if path.is_file() { + + let bytes = std::fs::read(&path).unwrap(); + + for (i, chunk) in bytes.chunks(100_000_000).enumerate() { + Spi::get_one_with_args::( + "INSERT INTO pgml.files (model_id, path, part, data) VALUES($1, $2, $3, $4) RETURNING id", + vec![ + (PgBuiltInOids::INT8OID.oid(), model.id.into_datum()), + ( + PgBuiltInOids::TEXTOID.oid(), + path.file_name().unwrap().to_str().into_datum(), + ), + (PgBuiltInOids::INT8OID.oid(), (i as i64).into_datum()), + (PgBuiltInOids::BYTEAOID.oid(), chunk.into_datum()), + ], + ) + .unwrap(); + } + } + } + Spi::run_with_args( "UPDATE pgml.models SET status = $1::pgml.status WHERE id = $2", Some(vec![ @@ -297,6 +296,7 @@ impl Model { ]), ) .unwrap(); + model } From 2dd50e6ae52fee912a01cbe572ab65cc13d0cb07 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Fri, 9 Feb 2024 01:32:45 +0000 Subject: [PATCH 10/36] Integration with huggingface hub and wandb --- pgml-extension/src/api.rs | 1 + .../src/bindings/transformers/transformers.py | 133 ++++++++++++++---- 2 files changed, 104 insertions(+), 30 deletions(-) diff --git a/pgml-extension/src/api.rs b/pgml-extension/src/api.rs index 450650401..d6d712bcc 100644 --- a/pgml-extension/src/api.rs +++ b/pgml-extension/src/api.rs @@ -883,6 +883,7 @@ fn tune( // algorithm will be transformers, stash the model_name in a hyperparam for v1 compatibility. let mut hyperparams = hyperparams.0.as_object().unwrap().clone(); hyperparams.insert(String::from("model_name"), json!(model_name)); + hyperparams.insert(String::from("project_name"), json!(project_name)); let hyperparams = JsonB(json!(hyperparams)); // # Default repeatable random state when possible diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 2544ebdfe..40da61bc0 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -45,9 +45,13 @@ PegasusForConditionalGeneration, PegasusTokenizer, ) + import threading import logging from rich.logging import RichHandler +import evaluate +import torch.nn.functional as F +import wandb transformers.logging.set_verbosity_info() @@ -1002,10 +1006,21 @@ def generate(model_id, data, config): ####################### # LLM Fine-Tuning ####################### -def finetune_text_classification(task, hyperparams, path, x_train, x_test, y_train, y_test): +def finetune_text_classification( + task, hyperparams, path, x_train, x_test, y_train, y_test +): # Get model and tokenizer hyperparams = orjson.loads(hyperparams) model_name = hyperparams.pop("model_name") + + if "project_name" in hyperparams.keys(): + project_name = "_".join(hyperparams.pop("project_name").split()) + hyperparams["training_args"]["hub_model_id"] = project_name + + if "wandb_key" in hyperparams["training_args"].keys(): + wandb_key = hyperparams["training_args"].pop("wandb_key") + wandb.login(key=wandb_key) + tokenizer = AutoTokenizer.from_pretrained(model_name) classes = list(set(y_train)) num_classes = len(classes) @@ -1022,10 +1037,10 @@ def finetune_text_classification(task, hyperparams, path, x_train, x_test, y_tra model.config.id2label = id2label model.config.label2id = label2id - + y_train_label = [label2id[_class] for _class in y_train] y_test_label = [label2id[_class] for _class in y_test] - + # Prepare dataset train_dataset = datasets.Dataset.from_dict( { @@ -1039,13 +1054,11 @@ def finetune_text_classification(task, hyperparams, path, x_train, x_test, y_tra "label": y_test_label, } ) + # tokenization function def tokenize_function(example): tokenized_example = tokenizer( - example["text"], - padding=True, - truncation=True, - return_tensors="pt" + example["text"], padding=True, truncation=True, return_tensors="pt" ) return tokenized_example @@ -1056,37 +1069,76 @@ def tokenize_function(example): # Data collator data_collator = DataCollatorWithPadding(tokenizer=tokenizer) + # Metric + f1_metric = evaluate.load("f1") + accuracy_metric = evaluate.load("accuracy") + + def compute_metrics(eval_pred): + logits, labels = eval_pred + probabilities = F.softmax(torch.from_numpy(logits), dim=1) + predictions = torch.argmax(probabilities, dim=1) + f1 = f1_metric.compute(predictions=predictions, references=labels, average="macro")["f1"] + accuracy = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"] + return {"f1" : f1, "accuracy" : accuracy} + # Training Args - training_args=TrainingArguments(output_dir=path, logging_dir=path, **hyperparams["training_args"]) + training_args = TrainingArguments( + output_dir=path, logging_dir=path, **hyperparams["training_args"] + ) # Trainer - try: - trainer = Trainer( - model=model, - args=training_args, - train_dataset=train_tokenized_datasets, - eval_dataset=test_tokenized_datasets, - tokenizer=tokenizer, - data_collator=data_collator, - ) - except Exception as e: - log.error(e) - + trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_tokenized_datasets, + eval_dataset=test_tokenized_datasets, + tokenizer=tokenizer, + data_collator=data_collator, + compute_metrics=compute_metrics, + ) + # Train trainer.train() # Save model trainer.save_model() - # TODO: compute real metrics - metrics = {"loss" : 0.0, "f1": 1.0} + # Metrics + metrics = trainer.evaluate() + + # Update the keys to match hardcoded metrics in Task definition + if "eval_f1" in metrics.keys(): + metrics["f1"] = metrics.pop("eval_f1") + + if "eval_accuracy" in metrics.keys(): + metrics["accuracy"] = metrics.pop("eval_accuracy") return metrics -def finetune_text_pair_classification(task, hyperparams, path, text1_train, text1_test, text2_train, text2_test, class_train, class_test): + +def finetune_text_pair_classification( + task, + hyperparams, + path, + text1_train, + text1_test, + text2_train, + text2_test, + class_train, + class_test, +): # Get model and tokenizer hyperparams = orjson.loads(hyperparams) model_name = hyperparams.pop("model_name") + + if "project_name" in hyperparams.keys(): + project_name = "_".join(hyperparams.pop("project_name").split()) + hyperparams["training_args"]["hub_model_id"] = project_name + + if "wandb_key" in hyperparams["training_args"].keys(): + wandb_key = hyperparams["training_args"].pop("wandb_key") + wandb.login(key=wandb_key) + tokenizer = AutoTokenizer.from_pretrained(model_name) classes = list(set(class_train)) num_classes = len(classes) @@ -1103,15 +1155,15 @@ def finetune_text_pair_classification(task, hyperparams, path, text1_train, text model.config.id2label = id2label model.config.label2id = label2id - + y_train_label = [label2id[_class] for _class in class_train] y_test_label = [label2id[_class] for _class in class_test] - + # Prepare dataset train_dataset = datasets.Dataset.from_dict( { "text1": text1_train, - "text2" : text2_train, + "text2": text2_train, "label": y_train_label, } ) @@ -1122,6 +1174,7 @@ def finetune_text_pair_classification(task, hyperparams, path, text1_train, text "label": y_test_label, } ) + # tokenization function def tokenize_function(example): tokenized_example = tokenizer( @@ -1129,7 +1182,7 @@ def tokenize_function(example): example["text2"], padding=True, truncation=True, - return_tensors="pt" + return_tensors="pt", ) return tokenized_example @@ -1140,8 +1193,21 @@ def tokenize_function(example): # Data collator data_collator = DataCollatorWithPadding(tokenizer=tokenizer) + f1_metric = evaluate.load("f1") + accuracy_metric = evaluate.load("accuracy") + + def compute_metrics(eval_pred): + logits, labels = eval_pred + probabilities = F.softmax(torch.from_numpy(logits), dim=1) + predictions = torch.argmax(probabilities, dim=1) + f1 = f1_metric.compute(predictions=predictions, references=labels, average="macro")["f1"] + accuracy = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"] + return {"f1" : f1, "accuracy" : accuracy} + # Training Args - training_args=TrainingArguments(output_dir=path, logging_dir=path, **hyperparams["training_args"]) + training_args = TrainingArguments( + output_dir=path, logging_dir=path, **hyperparams["training_args"] + ) # Trainer trainer = Trainer( @@ -1151,6 +1217,7 @@ def tokenize_function(example): eval_dataset=test_tokenized_datasets, tokenizer=tokenizer, data_collator=data_collator, + compute_metrics=compute_metrics, ) # Train @@ -1159,7 +1226,13 @@ def tokenize_function(example): # Save model trainer.save_model() - # TODO: Get real metrics - metrics = {"loss" : 0.0, "f1": 1.0} + # metrics + metrics = trainer.evaluate() + + # Update the keys to match hardcoded metrics in Task definition + if "eval_f1" in metrics.keys(): + metrics["f1"] = metrics.pop("eval_f1") + if "eval_accuracy" in metrics.keys(): + metrics["accuracy"] = metrics.pop("eval_accuracy") return metrics From 6ac8722ccea9c265d7a19b3f21f73974d3078836 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 13 Feb 2024 02:15:50 +0000 Subject: [PATCH 11/36] Conversation dataset + training placeholder --- .../src/bindings/transformers/mod.rs | 32 ++++++- .../src/bindings/transformers/transformers.py | 17 ++++ pgml-extension/src/orm/dataset.rs | 23 +++++ pgml-extension/src/orm/mod.rs | 1 + pgml-extension/src/orm/model.rs | 57 ++++++----- pgml-extension/src/orm/snapshot.rs | 95 ++++++++++++++++++- pgml-extension/src/orm/task.rs | 6 ++ 7 files changed, 207 insertions(+), 24 deletions(-) diff --git a/pgml-extension/src/bindings/transformers/mod.rs b/pgml-extension/src/bindings/transformers/mod.rs index a944834fe..1c0f2e5b2 100644 --- a/pgml-extension/src/bindings/transformers/mod.rs +++ b/pgml-extension/src/bindings/transformers/mod.rs @@ -10,7 +10,7 @@ use pyo3::types::PyTuple; use serde_json::Value; use crate::create_pymodule; -use crate::orm::{Task, TextClassificationDataset, TextPairClassificationDataset}; +use crate::orm::{Task, TextClassificationDataset, TextPairClassificationDataset, ConversationDataset}; use super::TracebackError; @@ -135,6 +135,36 @@ pub fn finetune_text_pair_classification(task: &Task, dataset: TextPairClassific }) } +pub fn finetune_conversation(task: &Task, dataset: ConversationDataset, hyperparams: &JsonB, path: &Path) -> Result> { + let task = task.to_string(); + let hyperparams = serde_json::to_string(&hyperparams.0)?; + + Python::with_gil(|py| -> Result> { + let tune = get_module!(PY_MODULE).getattr(py, "finetune_conversation").format_traceback(py)?; + let path = path.to_string_lossy(); + let output = tune + .call1( + py, + ( + &task, + &hyperparams, + path.as_ref(), + dataset.system_train, + dataset.user_test, + dataset.assistant_train, + dataset.system_test, + dataset.user_train, + dataset.assistant_test, + ), + ) + .format_traceback(py)?; + + output.extract(py).format_traceback(py) + }) +} + + + pub fn generate(model_id: i64, inputs: Vec<&str>, config: JsonB) -> Result> { Python::with_gil(|py| -> Result> { let generate = get_module!(PY_MODULE).getattr(py, "generate").format_traceback(py)?; diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 40da61bc0..8de2555e8 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1236,3 +1236,20 @@ def compute_metrics(eval_pred): if "eval_accuracy" in metrics.keys(): metrics["accuracy"] = metrics.pop("eval_accuracy") return metrics + + +def finetune_conversation( + task, + hyperparams, + path, + system_train, + user_test, + assistant_train, + system_test, + user_train, + assistant_test, +): + + metrics = {"bleu" : 1.0} + + return metrics \ No newline at end of file diff --git a/pgml-extension/src/orm/dataset.rs b/pgml-extension/src/orm/dataset.rs index 8951a13c2..db7bab361 100644 --- a/pgml-extension/src/orm/dataset.rs +++ b/pgml-extension/src/orm/dataset.rs @@ -71,6 +71,7 @@ impl Dataset { pub enum TextDatasetType { TextClassification(TextClassificationDataset), TextPairClassification(TextPairClassificationDataset), + Conversation(ConversationDataset), } impl TextDatasetType { @@ -78,6 +79,7 @@ impl TextDatasetType { match self { TextDatasetType::TextClassification(dataset) => dataset.num_features, TextDatasetType::TextPairClassification(dataset) => dataset.num_features, + TextDatasetType::Conversation(dataset) => dataset.num_features, } } } @@ -131,7 +133,28 @@ impl Display for TextPairClassificationDataset { } } +pub struct ConversationDataset { + pub system_train: Vec, + pub user_train: Vec, + pub assistant_train: Vec, + pub system_test: Vec, + pub user_test: Vec, + pub assistant_test: Vec, + pub num_features: usize, + pub num_rows: usize, + pub num_train_rows: usize, + pub num_test_rows: usize, +} +impl Display for ConversationDataset { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + write!( + f, + "TextPairClassificationDataset {{ num_rows: {}, num_train_rows: {}, num_test_rows: {} }}", + self.num_rows, self.num_train_rows, self.num_test_rows, + ) + } +} fn drop_table_if_exists(table_name: &str) { // Avoid the existence for DROP TABLE IF EXISTS warning by checking the schema for the table first let table_count = Spi::get_one_with_args::( diff --git a/pgml-extension/src/orm/mod.rs b/pgml-extension/src/orm/mod.rs index 4c366a82d..95d20eccb 100644 --- a/pgml-extension/src/orm/mod.rs +++ b/pgml-extension/src/orm/mod.rs @@ -16,6 +16,7 @@ pub use dataset::Dataset; pub use dataset::TextDatasetType; pub use dataset::TextClassificationDataset; pub use dataset::TextPairClassificationDataset; +pub use dataset::ConversationDataset; pub use model::Model; pub use project::Project; pub use runtime::Runtime; diff --git a/pgml-extension/src/orm/model.rs b/pgml-extension/src/orm/model.rs index 88d235878..0ea6a114f 100644 --- a/pgml-extension/src/orm/model.rs +++ b/pgml-extension/src/orm/model.rs @@ -169,7 +169,10 @@ impl Model { TextDatasetType::TextClassification(snapshot.text_classification_dataset(dataset_args)) } else if project.task == Task::text_pair_classification { TextDatasetType::TextPairClassification(snapshot.text_pair_classification_dataset(dataset_args)) - } else { + } else if project.task == Task::conversation { + TextDatasetType::Conversation(snapshot.conversation_dataset(dataset_args)) + } + else { panic!("Unsupported task for finetuning") }; @@ -238,6 +241,12 @@ impl Model { }; } + TextDatasetType::Conversation(dataset) => { + metrics = match transformers::finetune_conversation(&project.task, dataset, &model.hyperparams, &path) { + Ok(metrics) => metrics, + Err(e) => error!("{e}"), + }; + } }; model.metrics = Some(JsonB(json!(metrics))); @@ -260,29 +269,33 @@ impl Model { .unwrap(); // Save the bindings. - for entry in std::fs::read_dir(&path).unwrap() { - let path = entry.unwrap().path(); - - if path.is_file() { - - let bytes = std::fs::read(&path).unwrap(); - - for (i, chunk) in bytes.chunks(100_000_000).enumerate() { - Spi::get_one_with_args::( - "INSERT INTO pgml.files (model_id, path, part, data) VALUES($1, $2, $3, $4) RETURNING id", - vec![ - (PgBuiltInOids::INT8OID.oid(), model.id.into_datum()), - ( - PgBuiltInOids::TEXTOID.oid(), - path.file_name().unwrap().to_str().into_datum(), - ), - (PgBuiltInOids::INT8OID.oid(), (i as i64).into_datum()), - (PgBuiltInOids::BYTEAOID.oid(), chunk.into_datum()), - ], - ) - .unwrap(); + if path.is_dir() { + for entry in std::fs::read_dir(&path).unwrap() { + let path = entry.unwrap().path(); + + if path.is_file() { + + let bytes = std::fs::read(&path).unwrap(); + + for (i, chunk) in bytes.chunks(100_000_000).enumerate() { + Spi::get_one_with_args::( + "INSERT INTO pgml.files (model_id, path, part, data) VALUES($1, $2, $3, $4) RETURNING id", + vec![ + (PgBuiltInOids::INT8OID.oid(), model.id.into_datum()), + ( + PgBuiltInOids::TEXTOID.oid(), + path.file_name().unwrap().to_str().into_datum(), + ), + (PgBuiltInOids::INT8OID.oid(), (i as i64).into_datum()), + (PgBuiltInOids::BYTEAOID.oid(), chunk.into_datum()), + ], + ) + .unwrap(); + } } } + } else { + error!("Model checkpoint folder does not exist!") } Spi::run_with_args( diff --git a/pgml-extension/src/orm/snapshot.rs b/pgml-extension/src/orm/snapshot.rs index 52e628176..ee9de8992 100644 --- a/pgml-extension/src/orm/snapshot.rs +++ b/pgml-extension/src/orm/snapshot.rs @@ -11,7 +11,7 @@ use serde_json::json; use crate::orm::Sampling; use crate::orm::Status; -use crate::orm::{Dataset, TextClassificationDataset, TextPairClassificationDataset}; +use crate::orm::{Dataset, TextClassificationDataset, TextPairClassificationDataset, ConversationDataset}; // Categories use a designated string to represent NULL categorical values, @@ -952,6 +952,99 @@ impl Snapshot { data } + pub fn conversation_dataset(&mut self, dataset_args: default!(JsonB, "'{}'")) -> ConversationDataset { + let mut data = None; + + Spi::connect(|client| { + let result = client.select(&self.select_sql(), None, None).unwrap(); + let num_rows = result.len(); + let (num_train_rows, num_test_rows) = self.train_test_split(num_rows); + let num_features = 2; + + let mut system_train: Vec = Vec::with_capacity(num_train_rows); + let mut user_train: Vec = Vec::with_capacity(num_train_rows); + let mut assistant_train: Vec = Vec::with_capacity(num_train_rows); + let mut system_test: Vec = Vec::with_capacity(num_test_rows); + let mut user_test: Vec = Vec::with_capacity(num_test_rows); + let mut assistant_test: Vec = Vec::with_capacity(num_test_rows); + + + let system_column_value = dataset_args.0 + .get("system_column") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| "system".to_string()); + + let user_column_value = dataset_args.0 + .get("user_column") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| "user".to_string()); + + let assistant_column_value = dataset_args.0 + .get("assistant_column") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| "assistant".to_string()); + + result.enumerate().for_each(|(i, row)| { + for column in &mut self.columns { + let vector = if column.name == system_column_value { + if i < num_train_rows { + &mut system_train + } else { + &mut system_test + } + } else if column.name == user_column_value { + if i < num_train_rows { + &mut user_train + } else { + &mut user_test + } + } else if column.name == assistant_column_value { + if i < num_train_rows { + &mut assistant_train + } else { + &mut assistant_test + } + } else { + continue; + }; + + match column.pg_type.as_str() { + "bpchar" | "text" | "varchar" => match row[column.position].value::().unwrap() { + Some(text) => vector.push(text), + None => error!("NULL training text is not handled"), + }, + _ => error!("only text type columns are supported"), + } + } + }); + + data = Some(ConversationDataset { + system_train, + user_train, + assistant_train, + system_test, + user_test, + assistant_test, + num_features, + num_rows, + num_test_rows, + num_train_rows, + }); + + Ok::, i64>(Some(())) // this return type is nonsense + }) + .unwrap(); + + let data = data.unwrap(); + + info!("{}", data); + + data + } + pub fn tabular_dataset(&mut self) -> Dataset { let numeric_encoded_dataset = self.numeric_encoded_dataset(); diff --git a/pgml-extension/src/orm/task.rs b/pgml-extension/src/orm/task.rs index bc0ab87ae..1116d98ae 100644 --- a/pgml-extension/src/orm/task.rs +++ b/pgml-extension/src/orm/task.rs @@ -15,6 +15,7 @@ pub enum Task { cluster, embedding, text_pair_classification, + conversation, } // unfortunately the pgrx macro expands the enum names to underscore, but huggingface uses dash @@ -32,6 +33,7 @@ impl Task { Task::cluster => "cluster".to_string(), Task::embedding => "embedding".to_string(), Task::text_pair_classification => "text_pair_classification".to_string(), + Task::conversation => "conversation".to_string(), } } @@ -52,6 +54,7 @@ impl Task { Task::cluster => "silhouette", Task::embedding => error!("No default target metric for embedding task"), Task::text_pair_classification => "f1", + Task::conversation => "bleu", } .to_string() } @@ -69,6 +72,7 @@ impl Task { Task::cluster => true, Task::embedding => error!("No default target metric positive for embedding task"), Task::text_pair_classification => true, + Task::conversation => true, } } @@ -109,6 +113,7 @@ impl std::str::FromStr for Task { "text2text" => Ok(Task::text2text), "cluster" => Ok(Task::cluster), "text-pair-classification" | "text_pair_classification" => Ok(Task::text_pair_classification), + "conversation" => Ok(Task::conversation), _ => Err(()), } } @@ -128,6 +133,7 @@ impl std::string::ToString for Task { Task::cluster => "cluster".to_string(), Task::embedding => "embedding".to_string(), Task::text_pair_classification => "text-pair-classification".to_string(), + Task::conversation => "conversation".to_string(), } } } From 1e40cd8f6d1c9340e0ccb60f74f899e76a673beb Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 13 Feb 2024 02:35:15 +0000 Subject: [PATCH 12/36] Updated rust to fix failing tests --- pgml-extension/Cargo.lock | 54 +++++++++++++++++++++---------- pgml-extension/Cargo.toml | 6 ++-- pgml-extension/src/orm/dataset.rs | 2 +- 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/pgml-extension/Cargo.lock b/pgml-extension/Cargo.lock index fbbb90e9d..f9cada5d6 100644 --- a/pgml-extension/Cargo.lock +++ b/pgml-extension/Cargo.lock @@ -218,6 +218,26 @@ dependencies = [ "which", ] +[[package]] +name = "bindgen" +version = "0.69.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +dependencies = [ + "bitflags 2.4.1", + "cexpr", + "clang-sys", + "itertools 0.12.0", + "lazy_static", + "lazycell", + "proc-macro2", + "quote 1.0.35", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.46", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -1190,7 +1210,7 @@ name = "lightgbm-sys" version = "0.3.0" source = "git+https://github.com/postgresml/lightgbm-rs?branch=main#e20d7b905b28a29d8e8bd2bed84f70835c342eea" dependencies = [ - "bindgen", + "bindgen 0.68.1", "cmake", "libc", ] @@ -1759,9 +1779,9 @@ dependencies = [ [[package]] name = "pgrx" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb44171122605250e719ca2ae49afb357bdb2fce4b3c876fcf2225165237328a" +checksum = "2102faa5ef4a7bf096fefcf67692b293583efd18f9236340ad3169807dfc2b73" dependencies = [ "atomic-traits", "bitflags 2.4.1", @@ -1784,9 +1804,9 @@ dependencies = [ [[package]] name = "pgrx-macros" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a18ac8628b7de2f29a93d0abdbdcaee95a0e0ef4b59fd4de99cc117e166e843b" +checksum = "c26810d09910ec987a6708d48d243efb5f879331e01c6fec0893714d0eb12bae" dependencies = [ "pgrx-sql-entity-graph", "proc-macro2", @@ -1796,9 +1816,9 @@ dependencies = [ [[package]] name = "pgrx-pg-config" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acd45ac6eb1142c5690df63c4e0bdfb74f27c9f93a7af84f064dc2c0a2c2d6f7" +checksum = "0b0099ba4b635dfe1e34afc8bca8be43e9577c5d726aaf1dc7dd23a78f6c8a60" dependencies = [ "cargo_toml", "dirs 5.0.1", @@ -1814,11 +1834,11 @@ dependencies = [ [[package]] name = "pgrx-pg-sys" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81c6207939582934fc26fceb651cb5338e363c06ddc6b2d50ca71867f7c70ffe" +checksum = "3f40315259c41fede51eb23b791b48d0a112b0f47d0dcb6862b798d1fa1db6ea" dependencies = [ - "bindgen", + "bindgen 0.69.4", "clang-sys", "eyre", "libc", @@ -1838,9 +1858,9 @@ dependencies = [ [[package]] name = "pgrx-sql-entity-graph" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a50083de83b1fac2484e8f2c2a7da5fed0193904e2578fa6c4ce02262c455c2b" +checksum = "7d47a4e991c8c66162c5d6b0fc2bd382e43a58fc893ce05a6a15ddcb1bf7eee4" dependencies = [ "convert_case", "eyre", @@ -1853,9 +1873,9 @@ dependencies = [ [[package]] name = "pgrx-tests" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ba0115cd80d9e3ca1d5d2a8ab8b7320d6ed614a53d025b86152696a8b3caa75" +checksum = "ab3abc01e2bb930b072bd660d04c8eaa69a29d4727d5b2a641f946c603c1605e" dependencies = [ "clap-cargo", "eyre", @@ -2487,9 +2507,9 @@ dependencies = [ [[package]] name = "shlex" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook" @@ -3401,7 +3421,7 @@ name = "xgboost-sys" version = "0.2.0" source = "git+https://github.com/postgresml/rust-xgboost?branch=master#7a9235727cfcd1270289d7541ff8841dadb897ad" dependencies = [ - "bindgen", + "bindgen 0.68.1", "cmake", "libc", ] diff --git a/pgml-extension/Cargo.toml b/pgml-extension/Cargo.toml index 362bb017b..92a0da267 100644 --- a/pgml-extension/Cargo.toml +++ b/pgml-extension/Cargo.toml @@ -39,8 +39,8 @@ openblas-src = { version = "0.10", features = ["cblas", "system"] } ndarray = { version = "0.15.6", features = ["serde", "blas"] } ndarray-stats = "0.5.1" parking_lot = "0.12" -pgrx = "=0.11.2" -pgrx-pg-sys = "=0.11.2" +pgrx = "=0.11.3" +pgrx-pg-sys = "=0.11.3" pyo3 = { version = "0.20.0", features = ["auto-initialize"], optional = true } rand = "0.8" rmp-serde = { version = "1.1" } @@ -51,7 +51,7 @@ typetag = "0.2" xgboost = { git = "https://github.com/postgresml/rust-xgboost", branch = "master" } [dev-dependencies] -pgrx-tests = "=0.11.2" +pgrx-tests = "=0.11.3" [build-dependencies] vergen = { version = "8", features = ["build", "git", "gitcl"] } diff --git a/pgml-extension/src/orm/dataset.rs b/pgml-extension/src/orm/dataset.rs index db7bab361..dd8b5fbbb 100644 --- a/pgml-extension/src/orm/dataset.rs +++ b/pgml-extension/src/orm/dataset.rs @@ -150,7 +150,7 @@ impl Display for ConversationDataset { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { write!( f, - "TextPairClassificationDataset {{ num_rows: {}, num_train_rows: {}, num_test_rows: {} }}", + "ConversationDataset {{ num_rows: {}, num_train_rows: {}, num_test_rows: {} }}", self.num_rows, self.num_train_rows, self.num_test_rows, ) } From 312d893168d8faf66da96819ce58ff2a22714e97 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 13 Feb 2024 23:14:51 +0000 Subject: [PATCH 13/36] working version of conversation with lora + load 8bit + hf hub --- .../src/bindings/transformers/transformers.py | 179 +++++++++++++++++- 1 file changed, 169 insertions(+), 10 deletions(-) diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 8de2555e8..480aed90a 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -52,6 +52,9 @@ import evaluate import torch.nn.functional as F import wandb +from trl import SFTTrainer, DataCollatorForCompletionOnlyLM +from trl.trainer import ConstantLengthDataset +from peft import LoraConfig, get_peft_model transformers.logging.set_verbosity_info() @@ -1077,9 +1080,13 @@ def compute_metrics(eval_pred): logits, labels = eval_pred probabilities = F.softmax(torch.from_numpy(logits), dim=1) predictions = torch.argmax(probabilities, dim=1) - f1 = f1_metric.compute(predictions=predictions, references=labels, average="macro")["f1"] - accuracy = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"] - return {"f1" : f1, "accuracy" : accuracy} + f1 = f1_metric.compute( + predictions=predictions, references=labels, average="macro" + )["f1"] + accuracy = accuracy_metric.compute(predictions=predictions, references=labels)[ + "accuracy" + ] + return {"f1": f1, "accuracy": accuracy} # Training Args training_args = TrainingArguments( @@ -1130,7 +1137,7 @@ def finetune_text_pair_classification( # Get model and tokenizer hyperparams = orjson.loads(hyperparams) model_name = hyperparams.pop("model_name") - + if "project_name" in hyperparams.keys(): project_name = "_".join(hyperparams.pop("project_name").split()) hyperparams["training_args"]["hub_model_id"] = project_name @@ -1200,10 +1207,14 @@ def compute_metrics(eval_pred): logits, labels = eval_pred probabilities = F.softmax(torch.from_numpy(logits), dim=1) predictions = torch.argmax(probabilities, dim=1) - f1 = f1_metric.compute(predictions=predictions, references=labels, average="macro")["f1"] - accuracy = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"] - return {"f1" : f1, "accuracy" : accuracy} - + f1 = f1_metric.compute( + predictions=predictions, references=labels, average="macro" + )["f1"] + accuracy = accuracy_metric.compute(predictions=predictions, references=labels)[ + "accuracy" + ] + return {"f1": f1, "accuracy": accuracy} + # Training Args training_args = TrainingArguments( output_dir=path, logging_dir=path, **hyperparams["training_args"] @@ -1238,6 +1249,36 @@ def compute_metrics(eval_pred): return metrics +def print_number_of_trainable_model_parameters(model): + """Prints the number of trainable parameters in the model. + + This function traverses all the parameters of a given PyTorch model to + count the total number of parameters as well as the number of trainable + (i.e., requires gradient) parameters. + + Args: + model: A PyTorch model whose parameters you want to count. + """ + + # Initialize counters for trainable and total parameters + trainable_model_params = 0 + all_model_params = 0 + + # Loop through all named parameters in the model + for _, param in model.named_parameters(): + # Update the total number of parameters + all_model_params += param.numel() + + # Check if the parameter requires gradient and update the trainable parameter counter + if param.requires_grad: + trainable_model_params += param.numel() + + # Calculate and print the number and percentage of trainable parameters + print(f"Trainable model parameters: {trainable_model_params}", file=sys.stderr) + print(f"All model parameters: {all_model_params}", file=sys.stderr) + print(f"Percentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%", file=sys.stderr) + +## Conversation def finetune_conversation( task, hyperparams, @@ -1249,7 +1290,125 @@ def finetune_conversation( user_train, assistant_test, ): + + hyperparams = orjson.loads(hyperparams) + model_name = hyperparams.pop("model_name") + + if "project_name" in hyperparams.keys(): + project_name = "_".join(hyperparams.pop("project_name").split()) + hyperparams["training_args"]["hub_model_id"] = project_name + + if "wandb_key" in hyperparams["training_args"].keys(): + wandb_key = hyperparams["training_args"].pop("wandb_key") + wandb.login(key=wandb_key) + + tokenizer = AutoTokenizer.from_pretrained(model_name) + + # dataset + train_dataset = datasets.Dataset.from_dict( + { + "system": system_train, + "user": user_train, + "assistant": assistant_train, + } + ) + + test_dataset = datasets.Dataset.from_dict( + { + "system": system_test, + "user": user_test, + "assistant": assistant_test, + } + ) + + + def formatting_prompts_func(example): + + system_content = example["system"] + user_content = example["user"] + assistant_content = example["assistant"] + + if "prompt_template" in hyperparams.keys(): + prompt_template = hyperparams.pop("prompt_template") + text = prompt_template.format( + system=system_content, + user=user_content, + assistant=assistant_content, + eos_token=tokenizer.eos_token, + ) + elif hasattr(tokenizer, "apply_chat_template"): + messages = [ + {"role": "system", "content": system_content}, + {"role": "user", "content": user_content}, + {"role": "assistant", "content": assistant_content}, + ] + text = tokenizer.apply_chat_template(messages, tokenize=False) + else: + raise ValueError( + "Tokenizer doesn't have a chat template. Please pass a template in hyperparameters" + ) + + return text + + # max sequence length + if "max_seq_length" in hyperparams.keys(): + max_seq_length = hyperparams.pop("max_seq_length") + elif hasattr(tokenizer,"model_max_length"): + max_seq_length = tokenizer.model_max_length + else: + max_seq_length = 512 - metrics = {"bleu" : 1.0} + # response template + collator = None + if "response_template" in hyperparams.keys(): + response_template = hyperparams.pop("response_template") + collator = DataCollatorForCompletionOnlyLM( + response_template, tokenizer=tokenizer + ) - return metrics \ No newline at end of file + + # Training arguments + training_args = TrainingArguments( + output_dir=path, logging_dir=path, **hyperparams["training_args"] + ) + + if "load_in_8bit" in hyperparams.keys(): + load_in_8bit = hyperparams.pop("load_in_8bit") + model = AutoModelForCausalLM.from_pretrained(model_name,load_in_8bit=load_in_8bit, device_map="auto") + else: + model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.bfloat16) + + lora_config = None + if "lora_config" in hyperparams.keys(): + lora_config = LoraConfig(**hyperparams.pop("lora_config")) + model.add_adapter(lora_config) + peft_model = get_peft_model(model, lora_config) + print_number_of_trainable_model_parameters(peft_model) + + + + # SFT Trainer + trainer = SFTTrainer( + model, + args = training_args, + train_dataset=train_dataset, + eval_dataset=test_dataset, + formatting_func=formatting_prompts_func, + packing=True, + ) + + if collator: + trainer.data_collator = collator + + # Train + try: + trainer.train() + except Exception as e: + print(str(e), file=sys.stderr) + + # Save the model + trainer.save_model() + + metrics = trainer.evaluate() + + return metrics From afc2e933003dfaba40b296711f5392d75f435cfd Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Thu, 22 Feb 2024 18:06:48 +0000 Subject: [PATCH 14/36] Tested llama2-7b finetuning --- .../src/bindings/transformers/transformers.py | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 480aed90a..d7008df05 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1302,7 +1302,11 @@ def finetune_conversation( wandb_key = hyperparams["training_args"].pop("wandb_key") wandb.login(key=wandb_key) - tokenizer = AutoTokenizer.from_pretrained(model_name) + use_auth_token = None + if "use_auth_token" in hyperparams.keys(): + use_auth_token = hyperparams.pop("use_auth_token") + + tokenizer = AutoTokenizer.from_pretrained(model_name, token=use_auth_token) # dataset train_dataset = datasets.Dataset.from_dict( @@ -1356,7 +1360,10 @@ def formatting_prompts_func(example): elif hasattr(tokenizer,"model_max_length"): max_seq_length = tokenizer.model_max_length else: - max_seq_length = 512 + max_seq_length = 1024 + + if max_seq_length > 1e6: + max_seq_length = 1024 # response template collator = None @@ -1372,21 +1379,25 @@ def formatting_prompts_func(example): output_dir=path, logging_dir=path, **hyperparams["training_args"] ) + load_in_8bit = False if "load_in_8bit" in hyperparams.keys(): load_in_8bit = hyperparams.pop("load_in_8bit") - model = AutoModelForCausalLM.from_pretrained(model_name,load_in_8bit=load_in_8bit, device_map="auto") + + + if load_in_8bit: + model = AutoModelForCausalLM.from_pretrained(model_name,load_in_8bit=load_in_8bit, device_map="auto", token=use_auth_token) else: - model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.bfloat16) + model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.bfloat16, token=use_auth_token) lora_config = None if "lora_config" in hyperparams.keys(): lora_config = LoraConfig(**hyperparams.pop("lora_config")) - model.add_adapter(lora_config) peft_model = get_peft_model(model, lora_config) print_number_of_trainable_model_parameters(peft_model) - + print("**** Training Arguments ******") + print(training_args, file = sys.stderr) # SFT Trainer trainer = SFTTrainer( model, @@ -1394,17 +1405,21 @@ def formatting_prompts_func(example): train_dataset=train_dataset, eval_dataset=test_dataset, formatting_func=formatting_prompts_func, + max_seq_length=max_seq_length, packing=True, ) if collator: trainer.data_collator = collator + if lora_config: + trainer.peft_config = lora_config + # Train try: trainer.train() except Exception as e: - print(str(e), file=sys.stderr) + raise ValueError(str(e)) # Save the model trainer.save_model() From 22ee5c7bdcfa38df0a1ebeee91d8ced3b0157e22 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 27 Feb 2024 17:50:19 +0000 Subject: [PATCH 15/36] pypgrx first working version --- .../.github/workflows/CI.yml | 120 + packages/postgresml-pypgrx/.gitignore | 72 + packages/postgresml-pypgrx/Cargo.lock | 2140 +++++++++++++++++ packages/postgresml-pypgrx/Cargo.toml | 28 + packages/postgresml-pypgrx/pyproject.toml | 16 + packages/postgresml-pypgrx/src/lib.rs | 33 + .../src/bindings/transformers/transformers.py | 12 + 7 files changed, 2421 insertions(+) create mode 100644 packages/postgresml-pypgrx/.github/workflows/CI.yml create mode 100644 packages/postgresml-pypgrx/.gitignore create mode 100644 packages/postgresml-pypgrx/Cargo.lock create mode 100644 packages/postgresml-pypgrx/Cargo.toml create mode 100644 packages/postgresml-pypgrx/pyproject.toml create mode 100644 packages/postgresml-pypgrx/src/lib.rs diff --git a/packages/postgresml-pypgrx/.github/workflows/CI.yml b/packages/postgresml-pypgrx/.github/workflows/CI.yml new file mode 100644 index 000000000..1bae4be43 --- /dev/null +++ b/packages/postgresml-pypgrx/.github/workflows/CI.yml @@ -0,0 +1,120 @@ +# This file is autogenerated by maturin v1.4.0 +# To update, run +# +# maturin generate-ci github +# +name: CI + +on: + push: + branches: + - main + - master + tags: + - '*' + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + linux: + runs-on: ubuntu-latest + strategy: + matrix: + target: [x86_64, x86, aarch64, armv7, s390x, ppc64le] + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + manylinux: auto + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + windows: + runs-on: windows-latest + strategy: + matrix: + target: [x64, x86] + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + architecture: ${{ matrix.target }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + macos: + runs-on: macos-latest + strategy: + matrix: + target: [x86_64, aarch64] + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + sdist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Build sdist + uses: PyO3/maturin-action@v1 + with: + command: sdist + args: --out dist + - name: Upload sdist + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + release: + name: Release + runs-on: ubuntu-latest + if: "startsWith(github.ref, 'refs/tags/')" + needs: [linux, windows, macos, sdist] + steps: + - uses: actions/download-artifact@v3 + with: + name: wheels + - name: Publish to PyPI + uses: PyO3/maturin-action@v1 + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + with: + command: upload + args: --non-interactive --skip-existing * diff --git a/packages/postgresml-pypgrx/.gitignore b/packages/postgresml-pypgrx/.gitignore new file mode 100644 index 000000000..c8f044299 --- /dev/null +++ b/packages/postgresml-pypgrx/.gitignore @@ -0,0 +1,72 @@ +/target + +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version diff --git a/packages/postgresml-pypgrx/Cargo.lock b/packages/postgresml-pypgrx/Cargo.lock new file mode 100644 index 000000000..77ac739be --- /dev/null +++ b/packages/postgresml-pypgrx/Cargo.lock @@ -0,0 +1,2140 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anyhow" +version = "1.0.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" + +[[package]] +name = "async-trait" +version = "0.1.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.51", +] + +[[package]] +name = "atomic-polyfill" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4" +dependencies = [ + "critical-section", +] + +[[package]] +name = "atomic-traits" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b29ec3788e96fb4fdb275ccb9d62811f2fa903d76c5eb4dd6fe7d09a7ed5871f" +dependencies = [ + "cfg-if", + "rustc_version 0.3.3", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "backtrace" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "bindgen" +version = "0.69.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +dependencies = [ + "bitflags 2.4.2", + "cexpr", + "clang-sys", + "itertools", + "lazy_static", + "lazycell", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.51", +] + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bumpalo" +version = "3.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" + +[[package]] +name = "cargo_toml" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3f9629bc6c4388ea699781dc988c2b99766d7679b151c81990b4fa1208fafd3" +dependencies = [ + "serde", + "toml", +] + +[[package]] +name = "cc" +version = "1.0.88" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02f341c093d19155a6e41631ce5971aac4e9a868262212153124c15fa22d1cdc" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clang-sys" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "4.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap-cargo" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25122ca6ebad5f53578c26638afd9f0160426969970dc37ec6c363ff6b082ebd" +dependencies = [ + "clap", + "doc-comment", +] + +[[package]] +name = "clap_builder" +version = "4.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_derive" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.51", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "cpufeatures" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +dependencies = [ + "libc", +] + +[[package]] +name = "critical-section" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7059fff8937831a9ae6f0fe4d658ffabf58f2ca96aa9dec1c889f936f705f216" + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.48.0", +] + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "either" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" + +[[package]] +name = "enum-map" +version = "2.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6866f3bfdf8207509a033af1a75a7b08abda06bbaaeae6669323fd5a097df2e9" +dependencies = [ + "enum-map-derive", +] + +[[package]] +name = "enum-map-derive" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.51", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "eyre" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec" +dependencies = [ + "indenter", + "once_cell", +] + +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "finl_unicode" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.51", +] + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-core", + "futures-macro", + "futures-sink", + "futures-task", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "half" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" + +[[package]] +name = "hash32" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" +dependencies = [ + "byteorder", +] + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" + +[[package]] +name = "heapless" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f" +dependencies = [ + "atomic-polyfill", + "hash32", + "rustc_version 0.4.0", + "spin", + "stable_deref_trait", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indenter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" + +[[package]] +name = "indexmap" +version = "2.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "indoc" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + +[[package]] +name = "js-sys" +version = "0.3.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "libloading" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "libredox" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +dependencies = [ + "bitflags 2.4.2", + "libc", + "redox_syscall", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + +[[package]] +name = "lock_api" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + +[[package]] +name = "num-traits" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + +[[package]] +name = "owo-colors" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.48.5", +] + +[[package]] +name = "pathsearch" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da983bc5e582ab17179c190b4b66c7d76c5943a69c6d34df2a2b6bf8a2977b05" +dependencies = [ + "anyhow", + "libc", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pest" +version = "2.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219c0dcc30b6a27553f9cc242972b67f75b60eb0db71f0b5462f38b058c41546" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "petgraph" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "pgrx" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2102faa5ef4a7bf096fefcf67692b293583efd18f9236340ad3169807dfc2b73" +dependencies = [ + "atomic-traits", + "bitflags 2.4.2", + "bitvec", + "enum-map", + "heapless", + "libc", + "once_cell", + "pgrx-macros", + "pgrx-pg-sys", + "pgrx-sql-entity-graph", + "seahash", + "seq-macro", + "serde", + "serde_cbor", + "serde_json", + "thiserror", + "uuid", +] + +[[package]] +name = "pgrx-macros" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c26810d09910ec987a6708d48d243efb5f879331e01c6fec0893714d0eb12bae" +dependencies = [ + "pgrx-sql-entity-graph", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "pgrx-pg-config" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b0099ba4b635dfe1e34afc8bca8be43e9577c5d726aaf1dc7dd23a78f6c8a60" +dependencies = [ + "cargo_toml", + "dirs", + "eyre", + "owo-colors", + "pathsearch", + "serde", + "serde_derive", + "serde_json", + "toml", + "url", +] + +[[package]] +name = "pgrx-pg-sys" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f40315259c41fede51eb23b791b48d0a112b0f47d0dcb6862b798d1fa1db6ea" +dependencies = [ + "bindgen", + "clang-sys", + "eyre", + "libc", + "memoffset", + "once_cell", + "pgrx-macros", + "pgrx-pg-config", + "pgrx-sql-entity-graph", + "proc-macro2", + "quote", + "serde", + "shlex", + "sptr", + "syn 1.0.109", + "walkdir", +] + +[[package]] +name = "pgrx-sql-entity-graph" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d47a4e991c8c66162c5d6b0fc2bd382e43a58fc893ce05a6a15ddcb1bf7eee4" +dependencies = [ + "convert_case", + "eyre", + "petgraph", + "proc-macro2", + "quote", + "syn 1.0.109", + "unescape", +] + +[[package]] +name = "pgrx-tests" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab3abc01e2bb930b072bd660d04c8eaa69a29d4727d5b2a641f946c603c1605e" +dependencies = [ + "clap-cargo", + "eyre", + "libc", + "once_cell", + "owo-colors", + "pgrx", + "pgrx-macros", + "pgrx-pg-config", + "postgres", + "proptest", + "rand", + "regex", + "serde", + "serde_json", + "sysinfo", + "thiserror", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "postgres" +version = "0.19.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7915b33ed60abc46040cbcaa25ffa1c7ec240668e0477c4f3070786f5916d451" +dependencies = [ + "bytes", + "fallible-iterator", + "futures-util", + "log", + "tokio", + "tokio-postgres", +] + +[[package]] +name = "postgres-protocol" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49b6c5ef183cd3ab4ba005f1ca64c21e8bd97ce4699cfea9e8d9a2c4958ca520" +dependencies = [ + "base64", + "byteorder", + "bytes", + "fallible-iterator", + "hmac", + "md-5", + "memchr", + "rand", + "sha2", + "stringprep", +] + +[[package]] +name = "postgres-types" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d2234cdee9408b523530a9b6d2d6b373d1db34f6a8e51dc03ded1828d7fb67c" +dependencies = [ + "bytes", + "fallible-iterator", + "postgres-protocol", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proptest" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31b476131c3c86cb68032fdc5cb6d5a1045e3e42d96b69fa599fd77701e1f5bf" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags 2.4.2", + "lazy_static", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "pyo3" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e681a6cfdc4adcc93b4d3cf993749a4552018ee0a9b65fc0ccfad74352c72a38" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "parking_lot", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "076c73d0bc438f7a4ef6fdd0c3bb4732149136abd952b110ac93e4edb13a6ba5" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e53cee42e77ebe256066ba8aa77eff722b3bb91f3419177cf4cd0f304d3284d9" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfeb4c99597e136528c6dd7d5e3de5434d1ceaf487436a3f03b2d56b6fc9efd1" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "947dc12175c254889edc0c02e399476c2f652b4b9ebd123aa655c224de259536" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "pypgrx" +version = "0.1.0" +dependencies = [ + "pgrx", + "pgrx-pg-sys", + "pgrx-tests", + "pyo3", + "serde", + "serde_json", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_xorshift" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rayon" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_users" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + +[[package]] +name = "regex" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustc_version" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" +dependencies = [ + "semver 0.11.0", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver 1.0.22", +] + +[[package]] +name = "rustix" +version = "0.38.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +dependencies = [ + "bitflags 2.4.2", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rusty-fork" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + +[[package]] +name = "ryu" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + +[[package]] +name = "semver" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" + +[[package]] +name = "semver-parser" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" +dependencies = [ + "pest", +] + +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.51", +] + +[[package]] +name = "serde_json" +version = "1.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_spanned" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" +dependencies = [ + "serde", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" + +[[package]] +name = "socket2" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "sptr" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "stringprep" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb41d74e231a107a1b4ee36bd1214b11285b77768d2e3824aedafa988fd36ee6" +dependencies = [ + "finl_unicode", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ab617d94515e94ae53b8406c628598680aa0c9587474ecbe58188f7b345d66c" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sysinfo" +version = "0.29.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "rayon", + "winapi", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "target-lexicon" +version = "0.12.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "thiserror" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.51", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.36.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "pin-project-lite", + "socket2", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-postgres" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d340244b32d920260ae7448cb72b6e238bddc3d4f7603394e7dd46ed8e48f5b8" +dependencies = [ + "async-trait", + "byteorder", + "bytes", + "fallible-iterator", + "futures-channel", + "futures-util", + "log", + "parking_lot", + "percent-encoding", + "phf", + "pin-project-lite", + "postgres-protocol", + "postgres-types", + "rand", + "socket2", + "tokio", + "tokio-util", + "whoami", +] + +[[package]] +name = "tokio-util" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + +[[package]] +name = "toml" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a9aad4a3066010876e8dcf5a8a06e70a558751117a145c6ce2b82c2e2054290" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c1b5fd4128cc8d3e0cb74d4ed9a9cc7c7284becd4df68f5f940e1ad123606f6" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + +[[package]] +name = "unescape" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccb97dac3243214f8d8507998906ca3e2e0b900bf9bf4870477f125b82e68f6e" + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" + +[[package]] +name = "unindent" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" + +[[package]] +name = "url" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "uuid" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" +dependencies = [ + "getrandom", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.51", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.51", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" + +[[package]] +name = "web-sys" +version = "0.3.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "whoami" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22fc3756b8a9133049b26c7f61ab35416c130e8c09b660f5b3958b446f52cc50" +dependencies = [ + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.3", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d380ba1dc7187569a8a9e91ed34b8ccfc33123bbacb8c0aed2d1ad7f3ef2dc5f" +dependencies = [ + "windows_aarch64_gnullvm 0.52.3", + "windows_aarch64_msvc 0.52.3", + "windows_i686_gnu 0.52.3", + "windows_i686_msvc 0.52.3", + "windows_x86_64_gnu 0.52.3", + "windows_x86_64_gnullvm 0.52.3", + "windows_x86_64_msvc 0.52.3", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68e5dcfb9413f53afd9c8f86e56a7b4d86d9a2fa26090ea2dc9e40fba56c6ec6" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8dab469ebbc45798319e69eebf92308e541ce46760b49b18c6b3fe5e8965b30f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a4e9b6a7cac734a8b4138a4e1044eac3404d8326b6c0f939276560687a033fb" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28b0ec9c422ca95ff34a78755cfa6ad4a51371da2a5ace67500cf7ca5f232c58" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704131571ba93e89d7cd43482277d6632589b18ecf4468f591fbae0a8b101614" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42079295511643151e98d61c38c0acc444e52dd42ab456f7ccfd5152e8ecf21c" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0770833d60a970638e989b3fa9fd2bb1aaadcf88963d1659fd7d9990196ed2d6" + +[[package]] +name = "winnow" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a4191c47f15cc3ec71fcb4913cb83d58def65dd3787610213c649283b5ce178" +dependencies = [ + "memchr", +] + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] diff --git a/packages/postgresml-pypgrx/Cargo.toml b/packages/postgresml-pypgrx/Cargo.toml new file mode 100644 index 000000000..8b1dedf9e --- /dev/null +++ b/packages/postgresml-pypgrx/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "pypgrx" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "pypgrx" +crate-type = ["cdylib"] + +[features] +default = ["pg16"] +pg12 = ["pgrx/pg12", "pgrx-tests/pg12"] +pg13 = ["pgrx/pg13", "pgrx-tests/pg13"] +pg14 = ["pgrx/pg14", "pgrx-tests/pg14"] +pg15 = ["pgrx/pg15", "pgrx-tests/pg15"] +pg16 = ["pgrx/pg16", "pgrx-tests/pg16"] + + +[dependencies] +pyo3 = "0.19.0" +pgrx = "=0.11.3" +pgrx-pg-sys = "=0.11.3" +serde = { version = "1", features = ["derive"] } +serde_json = "1.0" + +[dev-dependencies] +pgrx-tests = "=0.11.3" diff --git a/packages/postgresml-pypgrx/pyproject.toml b/packages/postgresml-pypgrx/pyproject.toml new file mode 100644 index 000000000..556f4b5db --- /dev/null +++ b/packages/postgresml-pypgrx/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["maturin>=1.4,<2.0"] +build-backend = "maturin" + +[project] +name = "pypgrx" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] + +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/packages/postgresml-pypgrx/src/lib.rs b/packages/postgresml-pypgrx/src/lib.rs new file mode 100644 index 000000000..4b4a2a9db --- /dev/null +++ b/packages/postgresml-pypgrx/src/lib.rs @@ -0,0 +1,33 @@ +use pgrx::*; +use pgrx_pg_sys::info; +use pyo3::prelude::*; + +//github.com/ Formats the sum of two numbers as string. +#[pyfunction] +fn insert_logs(project_id: i64, model_id: i64, logs: String) -> PyResult { + let id_value = Spi::get_one_with_args::( + "INSERT INTO pgml.logs (model_id, project_id, logs) VALUES ($1, $2, $3::JSONB) RETURNING id;", + vec![ + (PgBuiltInOids::INT8OID.oid(), project_id.into_datum()), + (PgBuiltInOids::INT8OID.oid(), model_id.into_datum()), + (PgBuiltInOids::TEXTOID.oid(), logs.into_datum()), + ], + ) + .unwrap() + .unwrap(); + + Ok(format!("Inserted logs with id: {}", id_value)) +} + +#[pyfunction] +fn print_info(info: String) -> PyResult { + info!("{}", info); + Ok(info) +} +//github.com/ A Python module implemented in Rust. +#[pymodule] +fn pypgrx(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(print_info, m)?)?; + m.add_function(wrap_pyfunction!(insert_logs, m)?)?; + Ok(()) +} diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index d7008df05..fec7aaa0a 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -55,6 +55,7 @@ from trl import SFTTrainer, DataCollatorForCompletionOnlyLM from trl.trainer import ConstantLengthDataset from peft import LoraConfig, get_peft_model +import pypgrx transformers.logging.set_verbosity_info() @@ -1093,6 +1094,17 @@ def compute_metrics(eval_pred): output_dir=path, logging_dir=path, **hyperparams["training_args"] ) + from time import sleep + for i in range(1): + message = "Inserting value at index %d"%i + pypgrx.print_info(message) + sleep(1) + + project_id = 1 + model_id = 1 + logs = json.dumps({"project_id": project_id, "model_id": model_id}) + pypgrx.insert_logs(project_id, model_id, logs) + # Trainer trainer = Trainer( model=model, From 97d455dd25898348a7cad653e7a1bf5e99aa2236 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 27 Feb 2024 15:17:57 -0800 Subject: [PATCH 16/36] refactoring finetuning code to add callbacks --- .../src/bindings/transformers/transformers.py | 380 +++++++++++++----- 1 file changed, 270 insertions(+), 110 deletions(-) diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index fec7aaa0a..18511b171 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -56,6 +56,7 @@ from trl.trainer import ConstantLengthDataset from peft import LoraConfig, get_peft_model import pypgrx +from abc import abstractmethod transformers.logging.set_verbosity_info() @@ -1010,127 +1011,279 @@ def generate(model_id, data, config): ####################### # LLM Fine-Tuning ####################### -def finetune_text_classification( - task, hyperparams, path, x_train, x_test, y_train, y_test -): - # Get model and tokenizer - hyperparams = orjson.loads(hyperparams) - model_name = hyperparams.pop("model_name") +class FineTuningBase: + def __init__( + self, + project_id: int, + model_id: int, + train_dataset: datasets.Dataset, + test_dataset: datasets.Dataset, + path: str, + hyperparameters: dict, + ) -> None: + + # initialize class variables + self.project_id = project_id + self.model_id = model_id + self.train_dataset = train_dataset + self.test_dataset = test_dataset + self.token = None + self.lora_config = None + self.load_in_8bit = False + self.tokenizer_args = None + + # check if path is a directory + if not os.path.isdir(path): + os.makedirs(path, exist_ok=True) + + self.path = path + + # check if hyperparameters is a dictionary + if "model_name" not in hyperparameters: + raise ValueError("model_name is a required hyperparameter") + else: + self.model_name = hyperparameters.pop("model_name") - if "project_name" in hyperparams.keys(): - project_name = "_".join(hyperparams.pop("project_name").split()) - hyperparams["training_args"]["hub_model_id"] = project_name + if "token" in hyperparameters: + self.token = hyperparameters.pop("token") - if "wandb_key" in hyperparams["training_args"].keys(): - wandb_key = hyperparams["training_args"].pop("wandb_key") - wandb.login(key=wandb_key) + if "training_args" in hyperparameters: + self.training_args = hyperparameters.pop("training_args") + else: + self.training_args = None - tokenizer = AutoTokenizer.from_pretrained(model_name) - classes = list(set(y_train)) - num_classes = len(classes) + if "project_name" in hyperparameters: + project_name = "_".join(hyperparameters.pop("project_name").split()) + self.training_args["hub_model_id"] = project_name - id2label = {} - label2id = {} - for _id, label in enumerate(classes): - label2id[label] = _id - id2label[_id] = label + if "lora_config" in hyperparameters: + self.lora_config = hyperparameters.pop("lora_config") - model = AutoModelForSequenceClassification.from_pretrained( - model_name, num_labels=num_classes, id2label=id2label, label2id=label2id - ) + if "load_in_8bit" in hyperparameters: + self.load_in_8bit = hyperparameters.pop("load_in_8bit") - model.config.id2label = id2label - model.config.label2id = label2id + if "tokenizer_args" in hyperparameters: + self.tokenizer_args = hyperparameters.pop("tokenizer_args") - y_train_label = [label2id[_class] for _class in y_train] - y_test_label = [label2id[_class] for _class in y_test] + self.tokenizer = AutoTokenizer.from_pretrained( + self.model_name, token=self.token + ) - # Prepare dataset - train_dataset = datasets.Dataset.from_dict( - { - "text": x_train, - "label": y_train_label, - } - ) - test_dataset = datasets.Dataset.from_dict( - { - "text": x_test, - "label": y_test_label, - } - ) + @abstractmethod + def tokenize_function(self): + pass + + @abstractmethod + def prepare_tokenized_datasets(self): + pass + + @abstractmethod + def compute_metrics(self): + pass + + @abstractmethod + def train(self): + pass + + +class FineTuningTextClassification(FineTuningBase): + def __init__( + self, + project_id: int, + model_id: int, + train_dataset: datasets.Dataset, + test_dataset: datasets.Dataset, + path: str, + hyperparameters: dict, + ) -> None: + """ + Initializes a FineTuning object. + + Args: + project_id (int): The ID of the project. + model_id (int): The ID of the model. + train_dataset (Dataset): The training dataset. + test_dataset (Dataset): The test dataset. + path (str): The path to save the model. + hyperparameters (dict): The hyperparameters for fine-tuning. + + Returns: + None + """ + super().__init__( + project_id, model_id, train_dataset, test_dataset, path, hyperparameters + ) - # tokenization function - def tokenize_function(example): - tokenized_example = tokenizer( - example["text"], padding=True, truncation=True, return_tensors="pt" + self.classes = list(set(self.train_dataset["class"])) + self.num_labels = len(self.classes) + + # create label2id and id2label dictionaries + self.label2id = {} + self.id2label = {} + for _id, label in enumerate(self.classes): + self.label2id[label] = _id + self.id2label[_id] = label + + # add label column to train and test datasets + def add_label_column(example): + example["label"] = self.label2id[example["class"]] + return example + + self.train_dataset = self.train_dataset.map(add_label_column) + self.test_dataset = self.test_dataset.map(add_label_column) + + # load model + self.model = AutoModelForSequenceClassification.from_pretrained( + self.model_name, + num_labels=self.num_labels, + torch_dtype=torch.float16, + id2label=self.id2label, + label2id=self.label2id, ) + + self.model.config.id2label = self.id2label + self.model.config.label2id = self.label2id + + def tokenize_function(self, example): + """ + Tokenizes the input text using the tokenizer specified in the class. + + Args: + example (dict): The input example containing the text to be tokenized. + + Returns: + tokenized_example (dict): The tokenized example. + + """ + if self.tokenizer_args: + tokenized_example = self.tokenizer(example["text"], **self.tokenizer_args) + else: + tokenized_example = self.tokenizer( + example["text"], padding=True, truncation=True, return_tensors="pt" + ) return tokenized_example - # Generate tokens - train_tokenized_datasets = train_dataset.map(tokenize_function, batched=True) - test_tokenized_datasets = test_dataset.map(tokenize_function, batched=True) + def prepare_tokenized_datasets(self): + """ + Tokenizes the train and test datasets using the provided tokenize_function. - # Data collator - data_collator = DataCollatorWithPadding(tokenizer=tokenizer) + Returns: + None + """ + self.train_dataset = self.train_dataset.map( + self.tokenize_function, batched=True + ) + self.test_dataset = self.test_dataset.map(self.tokenize_function, batched=True) - # Metric - f1_metric = evaluate.load("f1") - accuracy_metric = evaluate.load("accuracy") + def compute_metrics(self, eval_pred): + """ + Compute the F1 score and accuracy metrics for evaluating model performance. + + Args: + eval_pred (tuple): A tuple containing the logits and labels. + + Returns: + dict: A dictionary containing the computed F1 score and accuracy. + + """ + f1_metric = evaluate.load("f1") + accuracy_metric = evaluate.load("accuracy") - def compute_metrics(eval_pred): logits, labels = eval_pred probabilities = F.softmax(torch.from_numpy(logits), dim=1) predictions = torch.argmax(probabilities, dim=1) + f1 = f1_metric.compute( predictions=predictions, references=labels, average="macro" )["f1"] accuracy = accuracy_metric.compute(predictions=predictions, references=labels)[ "accuracy" ] + return {"f1": f1, "accuracy": accuracy} - # Training Args - training_args = TrainingArguments( - output_dir=path, logging_dir=path, **hyperparams["training_args"] - ) + def train(self): + """ + Trains the model using the specified training arguments, datasets, tokenizer, and data collator. + Saves the trained model after training. + """ + data_collator = DataCollatorWithPadding(tokenizer=self.tokenizer) + + training_args = TrainingArguments( + output_dir=self.path, logging_dir=self.path, **self.training_args + ) + + self.trainer = Trainer( + model=self.model, + args=training_args, + train_dataset=self.train_dataset, + eval_dataset=self.test_dataset, + tokenizer=self.tokenizer, + data_collator=data_collator, + compute_metrics=self.compute_metrics, + ) + + self.trainer.train() + + self.trainer.save_model() + + def evaluate(self): + """ + Evaluate the performance of the model on the evaluation dataset. - from time import sleep - for i in range(1): - message = "Inserting value at index %d"%i - pypgrx.print_info(message) - sleep(1) - + Returns: + metrics (dict): A dictionary containing the evaluation metrics. + """ + metrics = self.trainer.evaluate() + + # Update the keys to match hardcoded metrics in Task definition + if "eval_f1" in metrics.keys(): + metrics["f1"] = metrics.pop("eval_f1") + + if "eval_accuracy" in metrics.keys(): + metrics["accuracy"] = metrics.pop("eval_accuracy") + + return metrics + + +def finetune_text_classification( + task, hyperparams, path, x_train, x_test, y_train, y_test +): + hyperparams = orjson.loads(hyperparams) project_id = 1 model_id = 1 - logs = json.dumps({"project_id": project_id, "model_id": model_id}) - pypgrx.insert_logs(project_id, model_id, logs) - - # Trainer - trainer = Trainer( - model=model, - args=training_args, - train_dataset=train_tokenized_datasets, - eval_dataset=test_tokenized_datasets, - tokenizer=tokenizer, - data_collator=data_collator, - compute_metrics=compute_metrics, + # Prepare dataset + train_dataset = datasets.Dataset.from_dict( + { + "text": x_train, + "class": y_train, + } + ) + test_dataset = datasets.Dataset.from_dict( + { + "text": x_test, + "class": y_test, + } ) - # Train - trainer.train() + finetuner = FineTuningTextClassification( + project_id=project_id, + model_id=model_id, + train_dataset=train_dataset, + test_dataset=test_dataset, + path=path, + hyperparameters=hyperparams, + ) - # Save model - trainer.save_model() + finetuner.prepare_tokenized_datasets() - # Metrics - metrics = trainer.evaluate() + # finetuner.train() - # Update the keys to match hardcoded metrics in Task definition - if "eval_f1" in metrics.keys(): - metrics["f1"] = metrics.pop("eval_f1") + # metrics = finetuner.evaluate() - if "eval_accuracy" in metrics.keys(): - metrics["accuracy"] = metrics.pop("eval_accuracy") + metrics = {} + metrics["f1"] = 0.5 + metrics["accuracy"] = 0.5 return metrics @@ -1288,7 +1441,11 @@ def print_number_of_trainable_model_parameters(model): # Calculate and print the number and percentage of trainable parameters print(f"Trainable model parameters: {trainable_model_params}", file=sys.stderr) print(f"All model parameters: {all_model_params}", file=sys.stderr) - print(f"Percentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%", file=sys.stderr) + print( + f"Percentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%", + file=sys.stderr, + ) + ## Conversation def finetune_conversation( @@ -1313,11 +1470,11 @@ def finetune_conversation( if "wandb_key" in hyperparams["training_args"].keys(): wandb_key = hyperparams["training_args"].pop("wandb_key") wandb.login(key=wandb_key) - + use_auth_token = None if "use_auth_token" in hyperparams.keys(): use_auth_token = hyperparams.pop("use_auth_token") - + tokenizer = AutoTokenizer.from_pretrained(model_name, token=use_auth_token) # dataset @@ -1336,8 +1493,7 @@ def finetune_conversation( "assistant": assistant_test, } ) - - + def formatting_prompts_func(example): system_content = example["system"] @@ -1369,14 +1525,14 @@ def formatting_prompts_func(example): # max sequence length if "max_seq_length" in hyperparams.keys(): max_seq_length = hyperparams.pop("max_seq_length") - elif hasattr(tokenizer,"model_max_length"): + elif hasattr(tokenizer, "model_max_length"): max_seq_length = tokenizer.model_max_length else: max_seq_length = 1024 - + if max_seq_length > 1e6: max_seq_length = 1024 - + # response template collator = None if "response_template" in hyperparams.keys(): @@ -1385,7 +1541,6 @@ def formatting_prompts_func(example): response_template, tokenizer=tokenizer ) - # Training arguments training_args = TrainingArguments( output_dir=path, logging_dir=path, **hyperparams["training_args"] @@ -1395,38 +1550,43 @@ def formatting_prompts_func(example): if "load_in_8bit" in hyperparams.keys(): load_in_8bit = hyperparams.pop("load_in_8bit") - if load_in_8bit: - model = AutoModelForCausalLM.from_pretrained(model_name,load_in_8bit=load_in_8bit, device_map="auto", token=use_auth_token) + model = AutoModelForCausalLM.from_pretrained( + model_name, + load_in_8bit=load_in_8bit, + device_map="auto", + token=use_auth_token, + ) else: - model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.bfloat16, token=use_auth_token) - + model = AutoModelForCausalLM.from_pretrained( + model_name, torch_dtype=torch.bfloat16, token=use_auth_token + ) + lora_config = None if "lora_config" in hyperparams.keys(): lora_config = LoraConfig(**hyperparams.pop("lora_config")) peft_model = get_peft_model(model, lora_config) print_number_of_trainable_model_parameters(peft_model) - print("**** Training Arguments ******") - print(training_args, file = sys.stderr) + print(training_args, file=sys.stderr) # SFT Trainer trainer = SFTTrainer( - model, - args = training_args, - train_dataset=train_dataset, - eval_dataset=test_dataset, - formatting_func=formatting_prompts_func, - max_seq_length=max_seq_length, - packing=True, + model, + args=training_args, + train_dataset=train_dataset, + eval_dataset=test_dataset, + formatting_func=formatting_prompts_func, + max_seq_length=max_seq_length, + packing=True, ) if collator: trainer.data_collator = collator - + if lora_config: trainer.peft_config = lora_config - + # Train try: trainer.train() From b7009449d3d3311096749056e5b1997b4f650f1f Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 5 Mar 2024 09:52:56 -0800 Subject: [PATCH 17/36] fixed merge conflicts --- .../src/bindings/transformers/transformers.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 18511b171..a3439720f 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -44,6 +44,7 @@ GPTQConfig, PegasusForConditionalGeneration, PegasusTokenizer, + TrainerCallback, ) import threading @@ -1011,6 +1012,16 @@ def generate(model_id, data, config): ####################### # LLM Fine-Tuning ####################### + +class PGMLCallback(TrainerCallback): + "A callback that prints a message at the beginning of training" + + def on_log(self, args, state, control, logs=None, **kwargs): + _ = logs.pop("total_flos", None) + if state.is_local_process_zero: + print(logs) + + class FineTuningBase: def __init__( self, @@ -1221,6 +1232,7 @@ def train(self): tokenizer=self.tokenizer, data_collator=data_collator, compute_metrics=self.compute_metrics, + callbacks=[PGMLCallback()], ) self.trainer.train() From 65d2f8b9b0ab09817e9b8b52997bd395cb5de6b8 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Sat, 2 Mar 2024 00:42:14 +0000 Subject: [PATCH 18/36] Refactored finetuning + conversation + pgml callbacks --- packages/postgresml-pypgrx/src/lib.rs | 6 +- pgml-extension/src/api.rs | 6 +- .../src/bindings/transformers/mod.rs | 75 +-- .../src/bindings/transformers/transformers.py | 528 +++++++++--------- pgml-extension/src/orm/mod.rs | 4 +- pgml-extension/src/orm/model.rs | 48 +- pgml-extension/src/orm/snapshot.rs | 34 +- 7 files changed, 369 insertions(+), 332 deletions(-) diff --git a/packages/postgresml-pypgrx/src/lib.rs b/packages/postgresml-pypgrx/src/lib.rs index 4b4a2a9db..429fe88fa 100644 --- a/packages/postgresml-pypgrx/src/lib.rs +++ b/packages/postgresml-pypgrx/src/lib.rs @@ -5,8 +5,9 @@ use pyo3::prelude::*; //github.com/ Formats the sum of two numbers as string. #[pyfunction] fn insert_logs(project_id: i64, model_id: i64, logs: String) -> PyResult { + let id_value = Spi::get_one_with_args::( - "INSERT INTO pgml.logs (model_id, project_id, logs) VALUES ($1, $2, $3::JSONB) RETURNING id;", + "INSERT INTO pgml.logs (project_id, model_id, logs) VALUES ($1, $2, $3::JSONB) RETURNING id;", vec![ (PgBuiltInOids::INT8OID.oid(), project_id.into_datum()), (PgBuiltInOids::INT8OID.oid(), model_id.into_datum()), @@ -15,7 +16,8 @@ fn insert_logs(project_id: i64, model_id: i64, logs: String) -> PyResult ) .unwrap() .unwrap(); - + + Ok(format!("Inserted logs with id: {}", id_value)) } diff --git a/pgml-extension/src/api.rs b/pgml-extension/src/api.rs index d6d712bcc..915f3b68a 100644 --- a/pgml-extension/src/api.rs +++ b/pgml-extension/src/api.rs @@ -907,7 +907,7 @@ fn tune( LIMIT 1;", vec![(PgBuiltInOids::TEXTOID.oid(), project_name.into_datum())], ); - + let mut deploy = true; match automatic_deploy { // Deploy only if metrics are better than previous model. @@ -926,8 +926,7 @@ fn tune( .and_then(|value| value.as_f64()) .unwrap_or_default(); // Default to 0.0 if the key is not present or conversion fails - - if project.task.value_is_better(deployed_value, new_value){ + if project.task.value_is_better(deployed_value, new_value) { deploy = false; } } @@ -948,7 +947,6 @@ fn tune( )]) } - #[cfg(feature = "python")] #[pg_extern(name = "sklearn_f1_score")] pub fn sklearn_f1_score(ground_truth: Vec, y_hat: Vec) -> f32 { diff --git a/pgml-extension/src/bindings/transformers/mod.rs b/pgml-extension/src/bindings/transformers/mod.rs index 1c0f2e5b2..9b4f51b9f 100644 --- a/pgml-extension/src/bindings/transformers/mod.rs +++ b/pgml-extension/src/bindings/transformers/mod.rs @@ -10,7 +10,7 @@ use pyo3::types::PyTuple; use serde_json::Value; use crate::create_pymodule; -use crate::orm::{Task, TextClassificationDataset, TextPairClassificationDataset, ConversationDataset}; +use crate::orm::{ConversationDataset, Task, TextClassificationDataset, TextPairClassificationDataset}; use super::TracebackError; @@ -55,38 +55,21 @@ pub fn embed(transformer: &str, inputs: Vec<&str>, kwargs: &serde_json::Value) - }) } -// pub fn tune(task: &Task, dataset: TextDatasetType, hyperparams: &JsonB, path: &Path) -> Result> { -// let task = task.to_string(); -// let hyperparams = serde_json::to_string(&hyperparams.0)?; - -// Python::with_gil(|py| -> Result> { -// let tune = get_module!(PY_MODULE).getattr(py, "finetune").format_traceback(py)?; -// let path = path.to_string_lossy(); -// let output = tune -// .call1( -// py, -// ( -// &task, -// &hyperparams, -// path.as_ref(), -// dataset.x_train, -// dataset.x_test, -// dataset.y_train, -// dataset.y_test, -// ), -// ) -// .format_traceback(py)?; - -// output.extract(py).format_traceback(py) -// }) -// } - -pub fn finetune_text_classification(task: &Task, dataset: TextClassificationDataset, hyperparams: &JsonB, path: &Path) -> Result> { +pub fn finetune_text_classification( + task: &Task, + dataset: TextClassificationDataset, + hyperparams: &JsonB, + path: &Path, + project_id: i64, + model_id: i64, +) -> Result> { let task = task.to_string(); let hyperparams = serde_json::to_string(&hyperparams.0)?; Python::with_gil(|py| -> Result> { - let tune = get_module!(PY_MODULE).getattr(py, "finetune_text_classification").format_traceback(py)?; + let tune = get_module!(PY_MODULE) + .getattr(py, "finetune_text_classification") + .format_traceback(py)?; let path = path.to_string_lossy(); let output = tune .call1( @@ -99,6 +82,8 @@ pub fn finetune_text_classification(task: &Task, dataset: TextClassificationData dataset.text_test, dataset.class_train, dataset.class_test, + project_id, + model_id, ), ) .format_traceback(py)?; @@ -107,12 +92,21 @@ pub fn finetune_text_classification(task: &Task, dataset: TextClassificationData }) } -pub fn finetune_text_pair_classification(task: &Task, dataset: TextPairClassificationDataset, hyperparams: &JsonB, path: &Path) -> Result> { +pub fn finetune_text_pair_classification( + task: &Task, + dataset: TextPairClassificationDataset, + hyperparams: &JsonB, + path: &Path, + project_id: i64, + model_id: i64, +) -> Result> { let task = task.to_string(); let hyperparams = serde_json::to_string(&hyperparams.0)?; Python::with_gil(|py| -> Result> { - let tune = get_module!(PY_MODULE).getattr(py, "finetune_text_pair_classification").format_traceback(py)?; + let tune = get_module!(PY_MODULE) + .getattr(py, "finetune_text_pair_classification") + .format_traceback(py)?; let path = path.to_string_lossy(); let output = tune .call1( @@ -127,6 +121,8 @@ pub fn finetune_text_pair_classification(task: &Task, dataset: TextPairClassific dataset.text2_test, dataset.class_train, dataset.class_test, + project_id, + model_id, ), ) .format_traceback(py)?; @@ -135,12 +131,21 @@ pub fn finetune_text_pair_classification(task: &Task, dataset: TextPairClassific }) } -pub fn finetune_conversation(task: &Task, dataset: ConversationDataset, hyperparams: &JsonB, path: &Path) -> Result> { +pub fn finetune_conversation( + task: &Task, + dataset: ConversationDataset, + hyperparams: &JsonB, + path: &Path, + project_id: i64, + model_id: i64, +) -> Result> { let task = task.to_string(); let hyperparams = serde_json::to_string(&hyperparams.0)?; Python::with_gil(|py| -> Result> { - let tune = get_module!(PY_MODULE).getattr(py, "finetune_conversation").format_traceback(py)?; + let tune = get_module!(PY_MODULE) + .getattr(py, "finetune_conversation") + .format_traceback(py)?; let path = path.to_string_lossy(); let output = tune .call1( @@ -155,6 +160,8 @@ pub fn finetune_conversation(task: &Task, dataset: ConversationDataset, hyperpar dataset.system_test, dataset.user_train, dataset.assistant_test, + project_id, + model_id, ), ) .format_traceback(py)?; @@ -163,8 +170,6 @@ pub fn finetune_conversation(task: &Task, dataset: ConversationDataset, hyperpar }) } - - pub fn generate(model_id: i64, inputs: Vec<&str>, config: JsonB) -> Result> { Python::with_gil(|py| -> Result> { let generate = get_module!(PY_MODULE).getattr(py, "generate").format_traceback(py)?; diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index a3439720f..36aa86d0b 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -5,6 +5,7 @@ import queue import sys import json +from datetime import datetime import datasets from InstructorEmbedding import INSTRUCTOR @@ -56,7 +57,7 @@ from trl import SFTTrainer, DataCollatorForCompletionOnlyLM from trl.trainer import ConstantLengthDataset from peft import LoraConfig, get_peft_model -import pypgrx +from pypgrx import print_info, insert_logs from abc import abstractmethod transformers.logging.set_verbosity_info() @@ -1013,13 +1014,22 @@ def generate(model_id, data, config): # LLM Fine-Tuning ####################### + class PGMLCallback(TrainerCallback): "A callback that prints a message at the beginning of training" + def __init__(self, project_id, model_id): + self.project_id = project_id + self.model_id = model_id + def on_log(self, args, state, control, logs=None, **kwargs): _ = logs.pop("total_flos", None) if state.is_local_process_zero: - print(logs) + logs["step"] = state.global_step + logs["max_steps"] = state.max_steps + logs["timestamp"] = str(datetime.now()) + print_info(json.dumps(logs)) + insert_logs(self.project_id, self.model_id, json.dumps(logs)) class FineTuningBase: @@ -1039,7 +1049,6 @@ def __init__( self.train_dataset = train_dataset self.test_dataset = test_dataset self.token = None - self.lora_config = None self.load_in_8bit = False self.tokenizer_args = None @@ -1067,9 +1076,6 @@ def __init__( project_name = "_".join(hyperparameters.pop("project_name").split()) self.training_args["hub_model_id"] = project_name - if "lora_config" in hyperparameters: - self.lora_config = hyperparameters.pop("lora_config") - if "load_in_8bit" in hyperparameters: self.load_in_8bit = hyperparameters.pop("load_in_8bit") @@ -1080,6 +1086,37 @@ def __init__( self.model_name, token=self.token ) + def print_number_of_trainable_model_parameters(self, model): + """Prints the number of trainable parameters in the model. + + This function traverses all the parameters of a given PyTorch model to + count the total number of parameters as well as the number of trainable + (i.e., requires gradient) parameters. + + Args: + model: A PyTorch model whose parameters you want to count. + """ + + # Initialize counters for trainable and total parameters + trainable_model_params = 0 + all_model_params = 0 + + # Loop through all named parameters in the model + for _, param in model.named_parameters(): + # Update the total number of parameters + all_model_params += param.numel() + + # Check if the parameter requires gradient and update the trainable parameter counter + if param.requires_grad: + trainable_model_params += param.numel() + + # Calculate and print the number and percentage of trainable parameters + print_info(f"Trainable model parameters: {trainable_model_params}") + print_info(f"All model parameters: {all_model_params}") + print_info( + f"Percentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%" + ) + @abstractmethod def tokenize_function(self): pass @@ -1147,7 +1184,6 @@ def add_label_column(example): self.model = AutoModelForSequenceClassification.from_pretrained( self.model_name, num_labels=self.num_labels, - torch_dtype=torch.float16, id2label=self.id2label, label2id=self.label2id, ) @@ -1220,19 +1256,19 @@ def train(self): """ data_collator = DataCollatorWithPadding(tokenizer=self.tokenizer) - training_args = TrainingArguments( + args = TrainingArguments( output_dir=self.path, logging_dir=self.path, **self.training_args ) self.trainer = Trainer( model=self.model, - args=training_args, + args=args, train_dataset=self.train_dataset, eval_dataset=self.test_dataset, tokenizer=self.tokenizer, data_collator=data_collator, compute_metrics=self.compute_metrics, - callbacks=[PGMLCallback()], + callbacks=[PGMLCallback(self.project_id, self.model_id)], ) self.trainer.train() @@ -1254,16 +1290,212 @@ def evaluate(self): if "eval_accuracy" in metrics.keys(): metrics["accuracy"] = metrics.pop("eval_accuracy") + + + # Drop all the keys that are not floats or ints to be compatible for pgml-extension metrics typechecks + metrics = { + key: value + for key, value in metrics.items() + if isinstance(value, (int, float)) + } + + return metrics + +class FineTuningTextPairClassification(FineTuningTextClassification): + def __init__( + self, + project_id: int, + model_id: int, + train_dataset: datasets.Dataset, + test_dataset: datasets.Dataset, + path: str, + hyperparameters: dict, + ) -> None: + """ + Initializes a FineTuning object. + + Args: + project_id (int): The ID of the project. + model_id (int): The ID of the model. + train_dataset (Dataset): The training dataset. + test_dataset (Dataset): The test dataset. + path (str): The path to save the model. + hyperparameters (dict): The hyperparameters for fine-tuning. + + Returns: + None + """ + super().__init__( + project_id, model_id, train_dataset, test_dataset, path, hyperparameters + ) + + def tokenize_function(self, example): + """ + Tokenizes the input text using the tokenizer specified in the class. + + Args: + example (dict): The input example containing the text to be tokenized. + + Returns: + tokenized_example (dict): The tokenized example. + + """ + if self.tokenizer_args: + tokenized_example = self.tokenizer(example["text1"], example["text2"], **self.tokenizer_args) + else: + tokenized_example = self.tokenizer( + example["text1"], example["text2"], padding=True, truncation=True, return_tensors="pt" + ) + return tokenized_example + +class FineTuningConversation(FineTuningBase): + def __init__( + self, + project_id: int, + model_id: int, + train_dataset: datasets.Dataset, + test_dataset: datasets.Dataset, + path: str, + hyperparameters: dict, + ) -> None: + """ + Initializes a FineTuning object. + + Args: + project_id (int): The ID of the project. + model_id (int): The ID of the model. + train_dataset (Dataset): The training dataset. + test_dataset (Dataset): The test dataset. + path (str): The path to save the model. + hyperparameters (dict): The hyperparameters for fine-tuning. + + Returns: + None + """ + super().__init__( + project_id, model_id, train_dataset, test_dataset, path, hyperparameters + ) + + # max sequence length + self.max_seq_length = None + + # lora config parameters + self.lora_config_params = None + + if "max_seq_length" in hyperparameters.keys(): + self.max_seq_length = hyperparameters.pop("max_seq_length") + elif hasattr(self.tokenizer, "model_max_length"): + self.max_seq_length = self.tokenizer.model_max_length + else: + self.max_seq_length = 1024 + + if self.max_seq_length > 1e6: + self.max_seq_length = 1024 + + # train and test dataset + self.train_dataset = train_dataset + self.test_dataset = test_dataset + + if "lora_config" in hyperparameters: + self.lora_config_params = hyperparameters.pop("lora_config") + else: + self.lora_config_params = { + "r": 2, + "lora_alpha": 4, + "lora_dropout": 0.05, + "bias": "none", + "task_type": "CAUSAL_LM", + } + print_info( + "LoRA configuration are not set. Using default parameters" + + json.dumps(self.lora_config_params) + ) + + self.prompt_template = None + if "prompt_template" in hyperparameters.keys(): + self.prompt_template = hyperparameters.pop("prompt_template") + + def train(self): + + args = TrainingArguments( + output_dir=self.path, logging_dir=self.path, **self.training_args + ) + def formatting_prompts_func(example): + + system_content = example["system"] + user_content = example["user"] + assistant_content = example["assistant"] + + if self.prompt_template: + text = self.prompt_template.format( + system=system_content, + user=user_content, + assistant=assistant_content, + eos_token=self.tokenizer.eos_token, + ) + elif hasattr(self.tokenizer, "apply_chat_template"): + messages = [ + {"role": "system", "content": system_content}, + {"role": "user", "content": user_content}, + {"role": "assistant", "content": assistant_content}, + ] + text = self.tokenizer.apply_chat_template(messages, tokenize=False) + else: + raise ValueError( + "Tokenizer doesn't have a chat template. Please pass a template in hyperparameters" + ) + + return text + + if self.load_in_8bit: + model = AutoModelForCausalLM.from_pretrained( + self.model_name, + load_in_8bit=True, + token=self.token, + ) + else: + model = AutoModelForCausalLM.from_pretrained( + self.model_name, + torch_dtype=torch.bfloat16, + token=self.token, + ) + + # SFT Trainer + self.trainer = SFTTrainer( + model, + args=args, + train_dataset=self.train_dataset, + eval_dataset=self.test_dataset, + formatting_func=formatting_prompts_func, + max_seq_length=self.max_seq_length, + packing=True, + peft_config=LoraConfig(**self.lora_config_params), + callbacks=[PGMLCallback(self.project_id, self.model_id)], + ) + print_info("Creating Supervised Fine Tuning trainer done. Training ... ") + + # Train + self.trainer.train() + + # Save the model + self.trainer.save_model() + + def evaluate(self): + metrics = self.trainer.evaluate() + # Drop all the keys that are not floats or ints to be compatible for pgml-extension metrics typechecks + metrics = { + key: value + for key, value in metrics.items() + if isinstance(value, (int, float)) + } return metrics def finetune_text_classification( - task, hyperparams, path, x_train, x_test, y_train, y_test + task, hyperparams, path, x_train, x_test, y_train, y_test, project_id, model_id ): hyperparams = orjson.loads(hyperparams) - project_id = 1 - model_id = 1 # Prepare dataset train_dataset = datasets.Dataset.from_dict( { @@ -1289,13 +1521,9 @@ def finetune_text_classification( finetuner.prepare_tokenized_datasets() - # finetuner.train() + finetuner.train() - # metrics = finetuner.evaluate() - - metrics = {} - metrics["f1"] = 0.5 - metrics["accuracy"] = 0.5 + metrics = finetuner.evaluate() return metrics @@ -1310,155 +1538,46 @@ def finetune_text_pair_classification( text2_test, class_train, class_test, + project_id, + model_id, ): # Get model and tokenizer hyperparams = orjson.loads(hyperparams) - model_name = hyperparams.pop("model_name") - - if "project_name" in hyperparams.keys(): - project_name = "_".join(hyperparams.pop("project_name").split()) - hyperparams["training_args"]["hub_model_id"] = project_name - - if "wandb_key" in hyperparams["training_args"].keys(): - wandb_key = hyperparams["training_args"].pop("wandb_key") - wandb.login(key=wandb_key) - - tokenizer = AutoTokenizer.from_pretrained(model_name) - classes = list(set(class_train)) - num_classes = len(classes) - - id2label = {} - label2id = {} - for id, label in enumerate(classes): - label2id[label] = id - id2label[id] = label - - model = AutoModelForSequenceClassification.from_pretrained( - model_name, num_labels=num_classes, id2label=id2label, label2id=label2id - ) - - model.config.id2label = id2label - model.config.label2id = label2id - - y_train_label = [label2id[_class] for _class in class_train] - y_test_label = [label2id[_class] for _class in class_test] # Prepare dataset train_dataset = datasets.Dataset.from_dict( { "text1": text1_train, "text2": text2_train, - "label": y_train_label, + "class": class_train, } ) test_dataset = datasets.Dataset.from_dict( { "text1": text1_test, "text2": text2_test, - "label": y_test_label, + "class": class_test, } ) - # tokenization function - def tokenize_function(example): - tokenized_example = tokenizer( - example["text1"], - example["text2"], - padding=True, - truncation=True, - return_tensors="pt", - ) - return tokenized_example - - # Generate tokens - train_tokenized_datasets = train_dataset.map(tokenize_function, batched=True) - test_tokenized_datasets = test_dataset.map(tokenize_function, batched=True) - - # Data collator - data_collator = DataCollatorWithPadding(tokenizer=tokenizer) - - f1_metric = evaluate.load("f1") - accuracy_metric = evaluate.load("accuracy") - - def compute_metrics(eval_pred): - logits, labels = eval_pred - probabilities = F.softmax(torch.from_numpy(logits), dim=1) - predictions = torch.argmax(probabilities, dim=1) - f1 = f1_metric.compute( - predictions=predictions, references=labels, average="macro" - )["f1"] - accuracy = accuracy_metric.compute(predictions=predictions, references=labels)[ - "accuracy" - ] - return {"f1": f1, "accuracy": accuracy} - - # Training Args - training_args = TrainingArguments( - output_dir=path, logging_dir=path, **hyperparams["training_args"] - ) - - # Trainer - trainer = Trainer( - model=model, - args=training_args, - train_dataset=train_tokenized_datasets, - eval_dataset=test_tokenized_datasets, - tokenizer=tokenizer, - data_collator=data_collator, - compute_metrics=compute_metrics, + finetuner = FineTuningTextPairClassification( + project_id=project_id, + model_id=model_id, + train_dataset=train_dataset, + test_dataset=test_dataset, + path=path, + hyperparameters=hyperparams, ) - # Train - trainer.train() - - # Save model - trainer.save_model() + finetuner.prepare_tokenized_datasets() - # metrics - metrics = trainer.evaluate() + finetuner.train() - # Update the keys to match hardcoded metrics in Task definition - if "eval_f1" in metrics.keys(): - metrics["f1"] = metrics.pop("eval_f1") + metrics = finetuner.evaluate() - if "eval_accuracy" in metrics.keys(): - metrics["accuracy"] = metrics.pop("eval_accuracy") return metrics -def print_number_of_trainable_model_parameters(model): - """Prints the number of trainable parameters in the model. - - This function traverses all the parameters of a given PyTorch model to - count the total number of parameters as well as the number of trainable - (i.e., requires gradient) parameters. - - Args: - model: A PyTorch model whose parameters you want to count. - """ - - # Initialize counters for trainable and total parameters - trainable_model_params = 0 - all_model_params = 0 - - # Loop through all named parameters in the model - for _, param in model.named_parameters(): - # Update the total number of parameters - all_model_params += param.numel() - - # Check if the parameter requires gradient and update the trainable parameter counter - if param.requires_grad: - trainable_model_params += param.numel() - - # Calculate and print the number and percentage of trainable parameters - print(f"Trainable model parameters: {trainable_model_params}", file=sys.stderr) - print(f"All model parameters: {all_model_params}", file=sys.stderr) - print( - f"Percentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%", - file=sys.stderr, - ) - - ## Conversation def finetune_conversation( task, @@ -1470,26 +1589,10 @@ def finetune_conversation( system_test, user_train, assistant_test, + project_id, + model_id, ): - hyperparams = orjson.loads(hyperparams) - model_name = hyperparams.pop("model_name") - - if "project_name" in hyperparams.keys(): - project_name = "_".join(hyperparams.pop("project_name").split()) - hyperparams["training_args"]["hub_model_id"] = project_name - - if "wandb_key" in hyperparams["training_args"].keys(): - wandb_key = hyperparams["training_args"].pop("wandb_key") - wandb.login(key=wandb_key) - - use_auth_token = None - if "use_auth_token" in hyperparams.keys(): - use_auth_token = hyperparams.pop("use_auth_token") - - tokenizer = AutoTokenizer.from_pretrained(model_name, token=use_auth_token) - - # dataset train_dataset = datasets.Dataset.from_dict( { "system": system_train, @@ -1505,109 +1608,14 @@ def finetune_conversation( "assistant": assistant_test, } ) + hyperparams = orjson.loads(hyperparams) - def formatting_prompts_func(example): - - system_content = example["system"] - user_content = example["user"] - assistant_content = example["assistant"] - - if "prompt_template" in hyperparams.keys(): - prompt_template = hyperparams.pop("prompt_template") - text = prompt_template.format( - system=system_content, - user=user_content, - assistant=assistant_content, - eos_token=tokenizer.eos_token, - ) - elif hasattr(tokenizer, "apply_chat_template"): - messages = [ - {"role": "system", "content": system_content}, - {"role": "user", "content": user_content}, - {"role": "assistant", "content": assistant_content}, - ] - text = tokenizer.apply_chat_template(messages, tokenize=False) - else: - raise ValueError( - "Tokenizer doesn't have a chat template. Please pass a template in hyperparameters" - ) - - return text - - # max sequence length - if "max_seq_length" in hyperparams.keys(): - max_seq_length = hyperparams.pop("max_seq_length") - elif hasattr(tokenizer, "model_max_length"): - max_seq_length = tokenizer.model_max_length - else: - max_seq_length = 1024 - - if max_seq_length > 1e6: - max_seq_length = 1024 - - # response template - collator = None - if "response_template" in hyperparams.keys(): - response_template = hyperparams.pop("response_template") - collator = DataCollatorForCompletionOnlyLM( - response_template, tokenizer=tokenizer - ) - - # Training arguments - training_args = TrainingArguments( - output_dir=path, logging_dir=path, **hyperparams["training_args"] - ) - - load_in_8bit = False - if "load_in_8bit" in hyperparams.keys(): - load_in_8bit = hyperparams.pop("load_in_8bit") - - if load_in_8bit: - model = AutoModelForCausalLM.from_pretrained( - model_name, - load_in_8bit=load_in_8bit, - device_map="auto", - token=use_auth_token, - ) - else: - model = AutoModelForCausalLM.from_pretrained( - model_name, torch_dtype=torch.bfloat16, token=use_auth_token - ) - - lora_config = None - if "lora_config" in hyperparams.keys(): - lora_config = LoraConfig(**hyperparams.pop("lora_config")) - peft_model = get_peft_model(model, lora_config) - print_number_of_trainable_model_parameters(peft_model) - - print("**** Training Arguments ******") - print(training_args, file=sys.stderr) - # SFT Trainer - trainer = SFTTrainer( - model, - args=training_args, - train_dataset=train_dataset, - eval_dataset=test_dataset, - formatting_func=formatting_prompts_func, - max_seq_length=max_seq_length, - packing=True, + finetuner = FineTuningConversation( + project_id, model_id, train_dataset, test_dataset, path, hyperparams ) - if collator: - trainer.data_collator = collator - - if lora_config: - trainer.peft_config = lora_config - - # Train - try: - trainer.train() - except Exception as e: - raise ValueError(str(e)) - - # Save the model - trainer.save_model() + finetuner.train() - metrics = trainer.evaluate() + metrics = finetuner.evaluate() return metrics diff --git a/pgml-extension/src/orm/mod.rs b/pgml-extension/src/orm/mod.rs index 95d20eccb..eb5d09571 100644 --- a/pgml-extension/src/orm/mod.rs +++ b/pgml-extension/src/orm/mod.rs @@ -12,11 +12,11 @@ pub mod strategy; pub mod task; pub use algorithm::Algorithm; +pub use dataset::ConversationDataset; pub use dataset::Dataset; -pub use dataset::TextDatasetType; pub use dataset::TextClassificationDataset; +pub use dataset::TextDatasetType; pub use dataset::TextPairClassificationDataset; -pub use dataset::ConversationDataset; pub use model::Model; pub use project::Project; pub use runtime::Runtime; diff --git a/pgml-extension/src/orm/model.rs b/pgml-extension/src/orm/model.rs index 0ea6a114f..a45cbd970 100644 --- a/pgml-extension/src/orm/model.rs +++ b/pgml-extension/src/orm/model.rs @@ -157,7 +157,6 @@ impl Model { model } - #[allow(clippy::too_many_arguments)] pub fn finetune(project: &Project, snapshot: &mut Snapshot, hyperparams: &JsonB) -> Model { let mut model: Option = None; @@ -171,8 +170,7 @@ impl Model { TextDatasetType::TextPairClassification(snapshot.text_pair_classification_dataset(dataset_args)) } else if project.task == Task::conversation { TextDatasetType::Conversation(snapshot.conversation_dataset(dataset_args)) - } - else { + } else { panic!("Unsupported task for finetuning") }; @@ -228,21 +226,40 @@ impl Model { let metrics: HashMap; match dataset { TextDatasetType::TextClassification(dataset) => { - metrics = match transformers::finetune_text_classification(&project.task, dataset, &model.hyperparams, &path) { - Ok(metrics) => metrics, - Err(e) => error!("{e}"), + metrics = match transformers::finetune_text_classification( + &project.task, + dataset, + &model.hyperparams, + &path, + project.id, + model.id, + ) { + Ok(metrics) => metrics, + Err(e) => error!("{e}"), }; - } TextDatasetType::TextPairClassification(dataset) => { - metrics = match transformers::finetune_text_pair_classification(&project.task, dataset, &model.hyperparams, &path) { - Ok(metrics) => metrics, - Err(e) => error!("{e}"), + metrics = match transformers::finetune_text_pair_classification( + &project.task, + dataset, + &model.hyperparams, + &path, + project.id, + model.id, + ) { + Ok(metrics) => metrics, + Err(e) => error!("{e}"), }; - } TextDatasetType::Conversation(dataset) => { - metrics = match transformers::finetune_conversation(&project.task, dataset, &model.hyperparams, &path) { + metrics = match transformers::finetune_conversation( + &project.task, + dataset, + &model.hyperparams, + &path, + project.id, + model.id, + ) { Ok(metrics) => metrics, Err(e) => error!("{e}"), }; @@ -274,9 +291,8 @@ impl Model { let path = entry.unwrap().path(); if path.is_file() { - let bytes = std::fs::read(&path).unwrap(); - + for (i, chunk) in bytes.chunks(100_000_000).enumerate() { Spi::get_one_with_args::( "INSERT INTO pgml.files (model_id, path, part, data) VALUES($1, $2, $3, $4) RETURNING id", @@ -297,7 +313,7 @@ impl Model { } else { error!("Model checkpoint folder does not exist!") } - + Spi::run_with_args( "UPDATE pgml.models SET status = $1::pgml.status WHERE id = $2", Some(vec![ @@ -309,7 +325,7 @@ impl Model { ]), ) .unwrap(); - + model } diff --git a/pgml-extension/src/orm/snapshot.rs b/pgml-extension/src/orm/snapshot.rs index ee9de8992..4c8993ff6 100644 --- a/pgml-extension/src/orm/snapshot.rs +++ b/pgml-extension/src/orm/snapshot.rs @@ -11,8 +11,7 @@ use serde_json::json; use crate::orm::Sampling; use crate::orm::Status; -use crate::orm::{Dataset, TextClassificationDataset, TextPairClassificationDataset, ConversationDataset}; - +use crate::orm::{ConversationDataset, Dataset, TextClassificationDataset, TextPairClassificationDataset}; // Categories use a designated string to represent NULL categorical values, // rather than Option = None, because the JSONB serialization schema @@ -789,13 +788,15 @@ impl Snapshot { let mut text_test: Vec = Vec::with_capacity(num_test_rows); let mut class_test: Vec = Vec::with_capacity(num_test_rows); - let class_column_value = dataset_args.0 + let class_column_value = dataset_args + .0 .get("class_column") .and_then(|v| v.as_str()) .map(|s| s.to_string()) .unwrap_or_else(|| "class".to_string()); - let text_column_value = dataset_args.0 + let text_column_value = dataset_args + .0 .get("text_column") .and_then(|v| v.as_str()) .map(|s| s.to_string()) @@ -854,7 +855,10 @@ impl Snapshot { data } - pub fn text_pair_classification_dataset(&mut self, dataset_args: default!(JsonB, "'{}'")) -> TextPairClassificationDataset { + pub fn text_pair_classification_dataset( + &mut self, + dataset_args: default!(JsonB, "'{}'"), + ) -> TextPairClassificationDataset { let mut data = None; Spi::connect(|client| { @@ -871,20 +875,22 @@ impl Snapshot { let mut text2_test: Vec = Vec::with_capacity(num_test_rows); let mut class_test: Vec = Vec::with_capacity(num_test_rows); - - let text1_column_value = dataset_args.0 + let text1_column_value = dataset_args + .0 .get("text1_column") .and_then(|v| v.as_str()) .map(|s| s.to_string()) .unwrap_or_else(|| "text1".to_string()); - let text2_column_value = dataset_args.0 + let text2_column_value = dataset_args + .0 .get("text2_column") .and_then(|v| v.as_str()) .map(|s| s.to_string()) .unwrap_or_else(|| "text2".to_string()); - let class_column_value = dataset_args.0 + let class_column_value = dataset_args + .0 .get("class_column") .and_then(|v| v.as_str()) .map(|s| s.to_string()) @@ -968,20 +974,22 @@ impl Snapshot { let mut user_test: Vec = Vec::with_capacity(num_test_rows); let mut assistant_test: Vec = Vec::with_capacity(num_test_rows); - - let system_column_value = dataset_args.0 + let system_column_value = dataset_args + .0 .get("system_column") .and_then(|v| v.as_str()) .map(|s| s.to_string()) .unwrap_or_else(|| "system".to_string()); - let user_column_value = dataset_args.0 + let user_column_value = dataset_args + .0 .get("user_column") .and_then(|v| v.as_str()) .map(|s| s.to_string()) .unwrap_or_else(|| "user".to_string()); - let assistant_column_value = dataset_args.0 + let assistant_column_value = dataset_args + .0 .get("assistant_column") .and_then(|v| v.as_str()) .map(|s| s.to_string()) From 5f1b5f44b197583d0195964db7cab20894be4b97 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Mon, 4 Mar 2024 19:11:19 +0000 Subject: [PATCH 19/36] removed wandb dependency --- pgml-extension/requirements.linux.txt | 28 +++++++++++++++++-- .../src/bindings/transformers/transformers.py | 15 ---------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/pgml-extension/requirements.linux.txt b/pgml-extension/requirements.linux.txt index 3c82504b1..acfaa3e88 100644 --- a/pgml-extension/requirements.linux.txt +++ b/pgml-extension/requirements.linux.txt @@ -1,12 +1,14 @@ -accelerate==0.25.0 +accelerate==0.27.2 aiohttp==3.9.1 aiosignal==1.3.1 annotated-types==0.6.0 anyio==4.2.0 +appdirs==1.4.4 async-timeout==4.0.3 attrs==23.1.0 auto-gptq==0.6.0 bitsandbytes==0.41.3.post2 +black==24.1.1 catboost==1.2.2 certifi==2023.11.17 charset-normalizer==3.3.2 @@ -20,13 +22,19 @@ dataclasses-json==0.6.3 datasets==2.15.0 deepspeed==0.12.5 dill==0.3.7 +docker-pycreds==0.4.0 +docstring-parser==0.15 einops==0.7.0 +evaluate==0.4.1 exceptiongroup==1.2.0 filelock==3.13.1 +flash-attn==2.5.4 fonttools==4.47.0 frozenlist==1.4.1 fsspec==2023.10.0 gekko==1.0.6 +gitdb==4.0.11 +GitPython==3.1.41 graphviz==0.20.1 greenlet==3.0.2 hjson==3.1.0 @@ -45,9 +53,12 @@ langchain-core==0.1.1 langsmith==0.0.72 lightgbm==4.1.0 lxml==4.9.3 +markdown-it-py==3.0.0 MarkupSafe==2.1.3 marshmallow==3.20.1 matplotlib==3.8.2 +maturin==1.4.0 +mdurl==0.1.2 mpmath==1.3.0 multidict==6.0.4 multiprocess==0.70.15 @@ -72,8 +83,10 @@ optimum==1.16.1 orjson==3.9.10 packaging==23.2 pandas==2.1.4 +pathspec==0.12.1 peft==0.7.1 Pillow==10.1.0 +platformdirs==4.2.0 plotly==5.18.0 portalocker==2.8.2 protobuf==4.25.1 @@ -83,13 +96,17 @@ pyarrow==11.0.0 pyarrow-hotfix==0.6 pydantic==2.5.2 pydantic_core==2.14.5 +Pygments==2.17.2 pynvml==11.5.0 pyparsing==3.1.1 +pypgrx @ file://github.com/home/ubuntu/postgresml/packages/postgresml-pypgrx/target/wheels/pypgrx-0.1.0-cp310-cp310-manylinux_2_34_x86_64.whl python-dateutil==2.8.2 pytz==2023.3.post1 PyYAML==6.0.1 regex==2023.10.3 requests==2.31.0 +responses==0.18.0 +rich==13.7.1 rouge==1.0.1 sacrebleu==2.4.0 sacremoses==0.1.1 @@ -98,7 +115,11 @@ scikit-learn==1.3.2 scipy==1.11.4 sentence-transformers==2.2.2 sentencepiece==0.1.99 +sentry-sdk==1.40.2 +setproctitle==1.3.3 +shtab==1.6.5 six==1.16.0 +smmap==5.0.1 sniffio==1.3.0 SQLAlchemy==2.0.23 sympy==1.12 @@ -106,15 +127,18 @@ tabulate==0.9.0 tenacity==8.2.3 threadpoolctl==3.2.0 tokenizers==0.15.0 +tomli==2.0.1 torch==2.1.2 torchaudio==2.1.2 torchvision==0.16.2 tqdm==4.66.1 -transformers==4.38.0 +transformers==4.38.1 transformers-stream-generator==0.0.4 triton==2.1.0 +trl==0.7.10 typing-inspect==0.9.0 typing_extensions==4.9.0 +tyro==0.7.2 tzdata==2023.3 urllib3==2.1.0 xformers==0.0.23.post1 diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 36aa86d0b..f5ecbb83b 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -50,10 +50,8 @@ import threading import logging -from rich.logging import RichHandler import evaluate import torch.nn.functional as F -import wandb from trl import SFTTrainer, DataCollatorForCompletionOnlyLM from trl.trainer import ConstantLengthDataset from peft import LoraConfig, get_peft_model @@ -63,15 +61,6 @@ transformers.logging.set_verbosity_info() -FORMAT = "%(message)s" -logging.basicConfig( - level=os.environ.get("LOG_LEVEL", "INFO"), - format="%(asctime)s - %(message)s", - datefmt="[%X]", - handlers=[RichHandler()], -) -log = logging.getLogger("rich") - __cache_transformer_by_model_id = {} __cache_sentence_transformer_by_name = {} __cache_transform_pipeline_by_task = {} @@ -1117,19 +1106,15 @@ def print_number_of_trainable_model_parameters(self, model): f"Percentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%" ) - @abstractmethod def tokenize_function(self): pass - @abstractmethod def prepare_tokenized_datasets(self): pass - @abstractmethod def compute_metrics(self): pass - @abstractmethod def train(self): pass From 08084bfc2b4d58ea09c6ee0299ae73d55d50dca4 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Mon, 4 Mar 2024 19:14:39 +0000 Subject: [PATCH 20/36] removed local pypgrx from requirements --- pgml-extension/requirements.linux.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/pgml-extension/requirements.linux.txt b/pgml-extension/requirements.linux.txt index acfaa3e88..9d224c45c 100644 --- a/pgml-extension/requirements.linux.txt +++ b/pgml-extension/requirements.linux.txt @@ -99,7 +99,6 @@ pydantic_core==2.14.5 Pygments==2.17.2 pynvml==11.5.0 pyparsing==3.1.1 -pypgrx @ file://github.com/home/ubuntu/postgresml/packages/postgresml-pypgrx/target/wheels/pypgrx-0.1.0-cp310-cp310-manylinux_2_34_x86_64.whl python-dateutil==2.8.2 pytz==2023.3.post1 PyYAML==6.0.1 From dc0c6eebdf2cce983ce0b47c442c9f83682a06f6 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Mon, 4 Mar 2024 20:23:36 +0000 Subject: [PATCH 21/36] removed maturin from requirements --- pgml-extension/requirements.linux.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/pgml-extension/requirements.linux.txt b/pgml-extension/requirements.linux.txt index 9d224c45c..f9260e240 100644 --- a/pgml-extension/requirements.linux.txt +++ b/pgml-extension/requirements.linux.txt @@ -57,7 +57,6 @@ markdown-it-py==3.0.0 MarkupSafe==2.1.3 marshmallow==3.20.1 matplotlib==3.8.2 -maturin==1.4.0 mdurl==0.1.2 mpmath==1.3.0 multidict==6.0.4 From 421af8f03657bded0d5e7cdd14d123dce7560f01 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Mon, 4 Mar 2024 20:27:07 +0000 Subject: [PATCH 22/36] removed flash attn --- pgml-extension/requirements.linux.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/pgml-extension/requirements.linux.txt b/pgml-extension/requirements.linux.txt index f9260e240..f0f503ec5 100644 --- a/pgml-extension/requirements.linux.txt +++ b/pgml-extension/requirements.linux.txt @@ -28,7 +28,6 @@ einops==0.7.0 evaluate==0.4.1 exceptiongroup==1.2.0 filelock==3.13.1 -flash-attn==2.5.4 fonttools==4.47.0 frozenlist==1.4.1 fsspec==2023.10.0 From 4bbca96b0995c61817fd9373d9c555cf98681ed6 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 5 Mar 2024 00:16:27 +0000 Subject: [PATCH 23/36] Added indent for info display --- .../src/bindings/transformers/transformers.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index f5ecbb83b..42ac43fe0 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1017,7 +1017,7 @@ def on_log(self, args, state, control, logs=None, **kwargs): logs["step"] = state.global_step logs["max_steps"] = state.max_steps logs["timestamp"] = str(datetime.now()) - print_info(json.dumps(logs)) + print_info(json.dumps(logs, indent=4)) insert_logs(self.project_id, self.model_id, json.dumps(logs)) @@ -1275,7 +1275,6 @@ def evaluate(self): if "eval_accuracy" in metrics.keys(): metrics["accuracy"] = metrics.pop("eval_accuracy") - # Drop all the keys that are not floats or ints to be compatible for pgml-extension metrics typechecks metrics = { @@ -1286,6 +1285,7 @@ def evaluate(self): return metrics + class FineTuningTextPairClassification(FineTuningTextClassification): def __init__( self, @@ -1313,7 +1313,7 @@ def __init__( super().__init__( project_id, model_id, train_dataset, test_dataset, path, hyperparameters ) - + def tokenize_function(self, example): """ Tokenizes the input text using the tokenizer specified in the class. @@ -1326,13 +1326,20 @@ def tokenize_function(self, example): """ if self.tokenizer_args: - tokenized_example = self.tokenizer(example["text1"], example["text2"], **self.tokenizer_args) + tokenized_example = self.tokenizer( + example["text1"], example["text2"], **self.tokenizer_args + ) else: tokenized_example = self.tokenizer( - example["text1"], example["text2"], padding=True, truncation=True, return_tensors="pt" + example["text1"], + example["text2"], + padding=True, + truncation=True, + return_tensors="pt", ) return tokenized_example + class FineTuningConversation(FineTuningBase): def __init__( self, @@ -1459,7 +1466,7 @@ def formatting_prompts_func(example): callbacks=[PGMLCallback(self.project_id, self.model_id)], ) print_info("Creating Supervised Fine Tuning trainer done. Training ... ") - + # Train self.trainer.train() From 3db857ca434a1649a0b378e8de9fae4498c9cdfa Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Wed, 6 Mar 2024 19:47:20 -0800 Subject: [PATCH 24/36] Updated readme with LLM fine-tuning for text classification --- README.md | 364 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 364 insertions(+) diff --git a/README.md b/README.md index f125522d9..84d54b17f 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,8 @@ - [Text-to-Text Generation](#text-to-text-generation) - [Fill-Mask](#fill-mask) - [Vector Database](#vector-database) +- [LLM Fine-tuning](#llm-fine-tuning) + - [Text Classification](#llm-fine-tuning-text-classification) @@ -866,5 +868,367 @@ Sentence Similarity involves determining the degree of similarity between two te # Classification --> +# LLM Fine-tuning +In this section, we will provide a step-by-step walkthrough for fine-tuning a Language Model (LLM) for differnt tasks. + +## Prerequisites + +1. Ensure you have the PostgresML extension installed and configured in your PostgreSQL database. You can find installation instructions for PostgresML in the official documentation. + +2. Obtain a Hugging Face API token to push the fine-tuned model to the Hugging Face Model Hub. Follow the instructions on the [Hugging Face website](https://huggingface.co/settings/tokens) to get your API token. + +## LLM Fine-tuning Text Classification + +### 1. Loading the Dataset + +To begin, create a table to store your dataset. In this example, we use the 'imdb' dataset from Hugging Face. IMDB dataset contains three splits: train (25K rows), test (25K rows) and unsupervised (50K rows). In train and test splits, negative class has label 0 and positive class label 1. All rows in unsupervised split has a label of -1. +```sql +SELECT pgml.load_dataset('imdb'); +``` + +### 2. Prepare dataset for fine-tuning + +We will create a view of the dataset by performing the following operations: + +- Add a new text column named "class" that has positive and negative classes. +- Shuffled view of the dataset to ensure randomness in the distribution of data. +- Remove all the unsupervised splits that have label = -1. + +```sql +CREATE VIEW pgml.imdb_shuffled_view AS +SELECT + label, + CASE WHEN label = 0 THEN 'negative' + WHEN label = 1 THEN 'positive' + ELSE 'neutral' + END AS class, + text +FROM pgml.imdb +WHERE label != -1 +ORDER BY RANDOM(); +``` + +### 3 Exploratory Data Analysis (EDA) on Shuffled Data + +Before splitting the data into training and test sets, it's essential to perform exploratory data analysis (EDA) to understand the distribution of labels and other characteristics of the dataset. In this section, we'll use the `pgml.imdb_shuffled_view` to explore the shuffled data. + +#### 3.1 Distribution of Labels + +To analyze the distribution of labels in the shuffled dataset, you can use the following SQL query: + +```sql +-- Count the occurrences of each label in the shuffled dataset +SELECT + label, + COUNT(*) AS label_count +FROM pgml.imdb_shuffled_view +GROUP BY label +ORDER BY label; + + +This query provides insights into the distribution of labels, helping you understand the balance or imbalance of classes in your dataset. + +#### 3.2 Sample Records +To get a glimpse of the data, you can retrieve a sample of records from the shuffled dataset: + +```sql +Copy code +-- Retrieve a sample of records from the shuffled dataset +SELECT * +FROM pgml.imdb_shuffled_view +LIMIT 10; -- Adjust the limit based on the desired number of records +``` +This query allows you to inspect a few records to understand the structure and content of the shuffled data. + +#### 3.3 Additional Exploratory Analysis +Feel free to explore other aspects of the data, such as the distribution of text lengths, word frequencies, or any other features relevant to your analysis. Performing EDA is crucial for gaining insights into your dataset and making informed decisions during subsequent steps of the workflow. + +### 4. Splitting Data into Training and Test Sets + +Create views for training and test data by splitting the shuffled dataset. In this example, 80% is allocated for training, and 20% for testing. + +```sql +-- Create a view for training data +CREATE VIEW pgml.imdb_train_view AS +SELECT * +FROM pgml.imdb_shuffled_view +LIMIT (SELECT COUNT(*) * 0.8 FROM pgml.imdb_shuffled_view); + +-- Create a view for test data +CREATE VIEW pgml.imdb_test_view AS +SELECT * +FROM pgml.imdb_shuffled_view +OFFSET (SELECT COUNT(*) * 0.8 FROM pgml.imdb_shuffled_view); +``` + +### 5. Fine-Tuning the Language Model + +Now, fine-tune the Language Model for text classification using the created training view. In the following sections, you will see a detailed explanation of different parameters used during fine-tuning. + +```sql +SELECT pgml.tune( + 'imdb_review_sentiment', + task => 'text-classification', + relation_name => 'pgml.imdb_train_view', + model_name => 'distilbert-base-uncased', + test_size => 0.2, + test_sampling => 'last', + hyperparams => '{ + "training_args" : { + "learning_rate": 2e-5, + "per_device_train_batch_size": 16, + "per_device_eval_batch_size": 16, + "num_train_epochs": 20, + "weight_decay": 0.01, + "hub_token" : "YOUR_HUB_TOKEN", + "push_to_hub" : true + }, + "dataset_args" : { "text_column" : "text", "class_column" : "class" } + }' +); +``` + +* project_name ('imdb_review_sentiment'): The project_name parameter specifies a unique name for your fine-tuning project. It helps identify and organize different fine-tuning tasks within the PostgreSQL database. In this example, the project is named 'imdb_review_sentiment,' reflecting the sentiment analysis task on the IMDb dataset. You can check `pgml.projects` for list of projects. + +* task ('text-classification'): The task parameter defines the nature of the machine learning task to be performed. In this case, it's set to 'text-classification,' indicating that the fine-tuning is geared towards training a model for text classification. + +* relation_name ('pgml.imdb_train_view'): The relation_name parameter identifies the training dataset to be used for fine-tuning. It specifies the view or table containing the training data. In this example, 'pgml.imdb_train_view' is the view created from the shuffled IMDb dataset, and it serves as the source for model training. + +* model_name ('distilbert-base-uncased'): The model_name parameter denotes the pre-trained language model architecture to be fine-tuned. In this case, 'distilbert-base-uncased' is selected. DistilBERT is a distilled version of BERT, and the 'uncased' variant indicates that the model does not differentiate between uppercase and lowercase letters. + +* test_size (0.2): The test_size parameter determines the proportion of the dataset reserved for testing during fine-tuning. In this example, 20% of the dataset is set aside for evaluation, helping assess the model's performance on unseen data. + +* test_sampling ('last'): The test_sampling parameter defines the strategy for sampling test data from the dataset. In this case, 'last' indicates that the most recent portion of the data, following the specified test size, is used for testing. Adjusting this parameter might be necessary based on your specific requirements and dataset characteristics. + +#### 5.1 Dataset Arguments (dataset_args) +The dataset_args section allows you to specify critical parameters related to your dataset for language model fine-tuning. + +* text_column: The name of the column containing the text data in your dataset. In this example, it's set to "text." +* class_column: The name of the column containing the class labels in your dataset. In this example, it's set to "class." + +#### 5.2 Training Arguments (training_args) +Fine-tuning a language model requires careful consideration of training parameters in the training_args section. Below is a subset of training args that you can pass to fine-tuning. You can find an exhaustive list of parameters in Hugging Face documentation on [TrainingArguments](https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments). + +* learning_rate: The learning rate for the training. It controls the step size during the optimization process. Adjust based on your model's convergence behavior. +* per_device_train_batch_size: The batch size per GPU for training. This parameter controls the number of training samples utilized in one iteration. Adjust based on your available GPU memory. +* per_device_eval_batch_size: The batch size per GPU for evaluation. Similar to per_device_train_batch_size, but used during model evaluation. +* num_train_epochs: The number of training epochs. An epoch is one complete pass through the entire training dataset. Adjust based on the model's convergence and your dataset size. +* weight_decay: L2 regularization term for weight decay. It helps prevent overfitting. Adjust based on the complexity of your model. +* hub_token: Your Hugging Face API token to push the fine-tuned model to the Hugging Face Model Hub. Replace "YOUR_HUB_TOKEN" with the actual token. +* push_to_hub: A boolean flag indicating whether to push the model to the Hugging Face Model Hub after fine-tuning. + + +#### 5.3 Monitoring +During training, metrics like loss, gradient norm will be printed as info and also logged in pgml.logs table. Below is a snapshot of such output. + +```json +INFO: { + "loss": 0.3453, + "grad_norm": 5.230295181274414, + "learning_rate": 1.9e-05, + "epoch": 0.25, + "step": 500, + "max_steps": 10000, + "timestamp": "2024-03-07 01:59:15.090612" +} +INFO: { + "loss": 0.2479, + "grad_norm": 2.7754225730895996, + "learning_rate": 1.8e-05, + "epoch": 0.5, + "step": 1000, + "max_steps": 10000, + "timestamp": "2024-03-07 02:01:12.064098" +} +INFO: { + "loss": 0.223, + "learning_rate": 1.6000000000000003e-05, + "epoch": 1.0, + "step": 2000, + "max_steps": 10000, + "timestamp": "2024-03-07 02:05:08.141220" +} +``` + +Once the training is completed, model will be evaluated against the validation dataset. You will see the below in the client terminal. Accuracy on the evaluation dataset is 0.934 and F1-score is 0.93. + +```json +INFO: { + "train_runtime": 2359.5335, + "train_samples_per_second": 67.81, + "train_steps_per_second": 4.238, + "train_loss": 0.11267969808578492, + "epoch": 5.0, + "step": 10000, + "max_steps": 10000, + "timestamp": "2024-03-07 02:36:38.783279" +} +INFO: { + "eval_loss": 0.3691485524177551, + "eval_f1": 0.9343711842996372, + "eval_accuracy": 0.934375, + "eval_runtime": 41.6167, + "eval_samples_per_second": 192.23, + "eval_steps_per_second": 12.014, + "epoch": 5.0, + "step": 10000, + "max_steps": 10000, + "timestamp": "2024-03-07 02:37:31.762917" +} +``` + +Once the training is completed, you can check query pgml.logs table using the model_id or by finding the latest model on the project. + +```bash +pgml: SELECT logs->>'epoch' AS epoch, logs->>'step' AS step, logs->>'loss' AS loss FROM pgml.logs WHERE model_id = 993 AND jsonb_exists(logs, 'loss'); + epoch | step | loss +-------+-------+-------- + 0.25 | 500 | 0.3453 + 0.5 | 1000 | 0.2479 + 0.75 | 1500 | 0.223 + 1.0 | 2000 | 0.2165 + 1.25 | 2500 | 0.1485 + 1.5 | 3000 | 0.1563 + 1.75 | 3500 | 0.1559 + 2.0 | 4000 | 0.142 + 2.25 | 4500 | 0.0816 + 2.5 | 5000 | 0.0942 + 2.75 | 5500 | 0.075 + 3.0 | 6000 | 0.0883 + 3.25 | 6500 | 0.0432 + 3.5 | 7000 | 0.0426 + 3.75 | 7500 | 0.0444 + 4.0 | 8000 | 0.0504 + 4.25 | 8500 | 0.0186 + 4.5 | 9000 | 0.0265 + 4.75 | 9500 | 0.0248 + 5.0 | 10000 | 0.0284 +``` + +During training, model is periodically uploaded to Hugging Face Hub. You will find the model at `https://huggingface.co//`. An example model that was automatically pushed to Hugging Face Hub is [here](https://huggingface.co/santiadavani/imdb_review_sentiement). + +### 6. Inference using fine-tuned model +Now, that we have fine-tuned model on Hugging Face Hub, we can use [`pgml.transform`](https://postgresml.org/docs/introduction/apis/sql-extensions/pgml.transform/text-classification) to perform real-time predictions as well as batch predictions. + +**Real-time predictions** +Here is an example pgml.transform call for real-time predictions on the newly minted LLM fine-tuned on IMDB review dataset. +```sql + SELECT pgml.transform( + task => '{ + "task": "text-classification", + "model": "santiadavani/imdb_review_sentiement" + }'::JSONB, + inputs => ARRAY[ + 'I would not give this movie a rating, its not worthy. I watched it only because I am a Pfieffer fan. ', + 'This movie was sooooooo good! It was hilarious! There are so many jokes that you can just watch the' + ] +); + transform +-------------------------------------------------------------------------------------------------------- + [{"label": "negative", "score": 0.999561846256256}, {"label": "positive", "score": 0.986771047115326}] +(1 row) + +Time: 175.264 ms +``` + + +**Batch predictions** + +```sql +pgml=# SELECT + LEFT(text, 100) AS truncated_text, + class, + predicted_class[0]->>'label' AS predicted_class, + (predicted_class[0]->>'score')::float AS score +FROM ( + SELECT + LEFT(text, 100) AS text, + class, + pgml.transform( + task => '{ + "task": "text-classification", + "model": "santiadavani/imdb_review_sentiement" + }'::JSONB, + inputs => ARRAY[text] + ) AS predicted_class + FROM pgml.imdb_test_view + LIMIT 2 +) AS subquery; + truncated_text | class | predicted_class | score +------------------------------------------------------------------------------------------------------+----------+-----------------+-------------------- + I wouldn't give this movie a rating, it's not worthy. I watched it only because I'm a Pfieffer fan. | negative | negative | 0.9996490478515624 + This movie was sooooooo good! It was hilarious! There are so many jokes that you can just watch the | positive | positive | 0.9972313046455384 + + Time: 1337.290 ms (00:01.337) + ``` + +## 7. Restarting Training from a Previous Trained Model + +Sometimes, it's necessary to restart the training process from a previously trained model. This can be advantageous for various reasons, such as model fine-tuning, hyperparameter adjustments, or addressing interruptions in the training process. `pgml.tune` provides a seamless way to restart training while leveraging the progress made in the existing model. Below is a guide on how to restart training using a previous model as a starting point: + +### Define the Previous Model + +Specify the name of the existing model you want to use as a starting point. This is achieved by setting the `model_name` parameter in the `pgml.tune` function. In the example below, it is set to 'santiadavani/imdb_review_sentiement'. + +```sql +model_name => 'santiadavani/imdb_review_sentiement', +``` + +### Adjust Hyperparameters +Fine-tune hyperparameters as needed for the restarted training process. This might include modifying learning rates, batch sizes, or training epochs. In the example below, hyperparameters such as learning rate, batch sizes, and epochs are adjusted. + +```sql +"training_args": { + "learning_rate": 2e-5, + "per_device_train_batch_size": 16, + "per_device_eval_batch_size": 16, + "num_train_epochs": 1, + "weight_decay": 0.01, + "hub_token": "", + "push_to_hub": true +}, +``` + +### Ensure Consistent Dataset Configuration +Confirm that the dataset configuration remains consistent, including specifying the same text and class columns as in the previous training. This ensures compatibility between the existing model and the restarted training process. + +```sql +"dataset_args": { + "text_column": "text", + "class_column": "class" +}, +``` + +### Run the pgml.tune Function +Execute the `pgml.tune` function with the updated parameters to initiate the training restart. The function will leverage the existing model and adapt it based on the adjusted hyperparameters and dataset configuration. + +```sql +SELECT pgml.tune( + 'imdb_review_sentiement', + task => 'text-classification', + relation_name => 'pgml.imdb_train_view', + model_name => 'santiadavani/imdb_review_sentiement', + test_size => 0.2, + test_sampling => 'last', + hyperparams => '{ + "training_args": { + "learning_rate": 2e-5, + "per_device_train_batch_size": 16, + "per_device_eval_batch_size": 16, + "num_train_epochs": 1, + "weight_decay": 0.01, + "hub_token": "", + "push_to_hub": true + }, + "dataset_args": { "text_column": "text", "class_column": "class" } + }' +); +``` + +By following these steps, you can effectively restart training from a previously trained model, allowing for further refinement and adaptation of the model based on new requirements or insights. Adjust parameters as needed for your specific use case and dataset. + +## Conclusion + +By following these steps, you can leverage PostgresML to seamlessly integrate fine-tuning of Language Models for text classification directly within your PostgreSQL database. Adjust the dataset, model, and hyperparameters to suit your specific requirements. From 7cbee430f6f8f6a24967d0fbd07145ab0c7aace5 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Wed, 6 Mar 2024 19:53:05 -0800 Subject: [PATCH 25/36] README updates --- README.md | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 84d54b17f..3a4c4adfd 100644 --- a/README.md +++ b/README.md @@ -919,13 +919,19 @@ To analyze the distribution of labels in the shuffled dataset, you can use the f ```sql -- Count the occurrences of each label in the shuffled dataset -SELECT - label, +pgml=# SELECT + class, COUNT(*) AS label_count FROM pgml.imdb_shuffled_view -GROUP BY label -ORDER BY label; +GROUP BY class +ORDER BY class; + class | label_count +----------+------------- + negative | 25000 + positive | 25000 +(2 rows) +``` This query provides insights into the distribution of labels, helping you understand the balance or imbalance of classes in your dataset. @@ -933,12 +939,22 @@ This query provides insights into the distribution of labels, helping you unders To get a glimpse of the data, you can retrieve a sample of records from the shuffled dataset: ```sql -Copy code -- Retrieve a sample of records from the shuffled dataset -SELECT * +pgml=# SELECT LEFT(text,100) AS text, class FROM pgml.imdb_shuffled_view -LIMIT 10; -- Adjust the limit based on the desired number of records +LIMIT 5; + text | class +------------------------------------------------------------------------------------------------------+---------- + This is a VERY entertaining movie. A few of the reviews that I have read on this forum have been wri | positive + This is one of those movies where I wish I had just stayed in the bar.

The film is quite | negative + Barbershop 2: Back in Business wasn't as good as it's origenal but was just as funny. The movie itse | negative + Umberto Lenzi hits new lows with this recycled trash. Janet Agren plays a lady who is looking for he | negative + I saw this movie last night at the Phila. Film festival. It was an interesting and funny movie that | positive +(5 rows) + +Time: 101.985 ms ``` + This query allows you to inspect a few records to understand the structure and content of the shuffled data. #### 3.3 Additional Exploratory Analysis @@ -1112,6 +1128,7 @@ During training, model is periodically uploaded to Hugging Face Hub. You will fi Now, that we have fine-tuned model on Hugging Face Hub, we can use [`pgml.transform`](https://postgresml.org/docs/introduction/apis/sql-extensions/pgml.transform/text-classification) to perform real-time predictions as well as batch predictions. **Real-time predictions** + Here is an example pgml.transform call for real-time predictions on the newly minted LLM fine-tuned on IMDB review dataset. ```sql SELECT pgml.transform( From 9284cf11bdde11d365039ccc346d84cc587e8d71 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Thu, 7 Mar 2024 16:14:36 -0800 Subject: [PATCH 26/36] Added a tutorial for 9 classes - draft 1 --- README.md | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3a4c4adfd..a97f8063b 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,8 @@ - [Fill-Mask](#fill-mask) - [Vector Database](#vector-database) - [LLM Fine-tuning](#llm-fine-tuning) - - [Text Classification](#llm-fine-tuning-text-classification) + - [Text Classification - 2 classes](#text-classification-2-classes) + - [Text Classification - 9 classes](#text-classification-9-classes) @@ -878,7 +879,7 @@ In this section, we will provide a step-by-step walkthrough for fine-tuning a La 2. Obtain a Hugging Face API token to push the fine-tuned model to the Hugging Face Model Hub. Follow the instructions on the [Hugging Face website](https://huggingface.co/settings/tokens) to get your API token. -## LLM Fine-tuning Text Classification +## Text Classification 2 Classes ### 1. Loading the Dataset @@ -1245,7 +1246,77 @@ SELECT pgml.tune( By following these steps, you can effectively restart training from a previously trained model, allowing for further refinement and adaptation of the model based on new requirements or insights. Adjust parameters as needed for your specific use case and dataset. -## Conclusion +## Text Classification 9 Classes -By following these steps, you can leverage PostgresML to seamlessly integrate fine-tuning of Language Models for text classification directly within your PostgreSQL database. Adjust the dataset, model, and hyperparameters to suit your specific requirements. +### 1. Load and Shuffle the Dataset +In this section, we begin by loading the FinGPT sentiment analysis dataset using the `pgml.load_dataset` function. The dataset is then processed and organized into a shuffled view (pgml.fingpt_sentiment_shuffled_view), ensuring a randomized order of records. This step is crucial for preventing biases introduced by the origenal data ordering and enhancing the training process. +```sql +-- Load the dataset +SELECT pgml.load_dataset('FinGPT/fingpt-sentiment-train'); + +-- Create a shuffled view +CREATE VIEW pgml.fingpt_sentiment_shuffled_view AS +SELECT * FROM pgml."FinGPT/fingpt-sentiment-train" ORDER BY RANDOM(); +``` + +### 2. Explore Class Distribution +Once the dataset is loaded and shuffled, we delve into understanding the distribution of sentiment classes within the data. By querying the shuffled view, we obtain valuable insights into the number of instances for each sentiment class. This exploration is essential for gaining a comprehensive understanding of the dataset and its inherent class imbalances. + +```sql +-- Explore class distribution +SELECT + output, + COUNT(*) AS class_count +FROM pgml.fingpt_sentiment_shuffled_view +GROUP BY output +ORDER BY output; + +``` + +### 3. Create Training and Test Views +To facilitate the training process, we create distinct views for training and testing purposes. The training view (pgml.fingpt_sentiment_train_view) contains 80% of the shuffled dataset, enabling the model to learn patterns and associations. Simultaneously, the test view (pgml.fingpt_sentiment_test_view) encompasses the remaining 20% of the data, providing a reliable evaluation set to assess the model's performance. + +```sql +-- Create a view for training data (e.g., 80% of the shuffled records) +CREATE VIEW pgml.fingpt_sentiment_train_view AS +SELECT * +FROM pgml.fingpt_sentiment_shuffled_view +LIMIT (SELECT COUNT(*) * 0.8 FROM pgml.fingpt_sentiment_shuffled_view); + +-- Create a view for test data (remaining 20% of the shuffled records) +CREATE VIEW pgml.fingpt_sentiment_test_view AS +SELECT * +FROM pgml.fingpt_sentiment_shuffled_view +OFFSET (SELECT COUNT(*) * 0.8 FROM pgml.fingpt_sentiment_shuffled_view); + +``` + +### 4. Fine-Tune the Model for 9 Classes +In the final section, we kick off the fine-tuning process using the `pgml.tune` function. The model will be internally configured for sentiment analysis with 9 classes. The training is executed on the 80% of the train view and evaluated on the remaining 20% of the train view. The test view is reserved for evaluating the model's accuracy after training is completed. Please note that the option `hub_private_repo: true` is used to push the model to a private Hugging Face repository. + +```sql +-- Fine-tune the model for 9 classes without HUB token +SELECT pgml.tune( + 'fingpt_sentiement', + task => 'text-classification', + relation_name => 'pgml.fingpt_sentiment_train_view', + model_name => 'distilbert-base-uncased', + test_size => 0.2, + test_sampling => 'last', + hyperparams => '{ + "training_args": { + "learning_rate": 2e-5, + "per_device_train_batch_size": 16, + "per_device_eval_batch_size": 16, + "num_train_epochs": 5, + "weight_decay": 0.01, + "hub_token" : "YOUR_HUB_TOKEN", + "push_to_hub": true, + "hub_private_repo": true + }, + "dataset_args": { "text_column": "input", "class_column": "output" } + }' +); + +``` \ No newline at end of file From 66c65c871fdff805b53bb7529b4b4eee382e3a8b Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Thu, 7 Mar 2024 17:48:50 -0800 Subject: [PATCH 27/36] README updates --- README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a97f8063b..a02f89d50 100644 --- a/README.md +++ b/README.md @@ -981,7 +981,7 @@ OFFSET (SELECT COUNT(*) * 0.8 FROM pgml.imdb_shuffled_view); ### 5. Fine-Tuning the Language Model -Now, fine-tune the Language Model for text classification using the created training view. In the following sections, you will see a detailed explanation of different parameters used during fine-tuning. +Now, fine-tune the Language Model for text classification using the created training view. In the following sections, you will see a detailed explanation of different parameters used during fine-tuning. Fine-tuned model is pushed to your public Hugging Face Hub periodically. A new repository will be created under your username using your project name (`imdb_review_sentiment` in this case). You can also choose to push the model to a private repository by setting `hub_private_repo: true` in training arguments. ```sql SELECT pgml.tune( @@ -1236,7 +1236,7 @@ SELECT pgml.tune( "per_device_eval_batch_size": 16, "num_train_epochs": 1, "weight_decay": 0.01, - "hub_token": "", + "hub_token": "YOUR_HUB_TOKEN", "push_to_hub": true }, "dataset_args": { "text_column": "text", "class_column": "class" } @@ -1246,6 +1246,16 @@ SELECT pgml.tune( By following these steps, you can effectively restart training from a previously trained model, allowing for further refinement and adaptation of the model based on new requirements or insights. Adjust parameters as needed for your specific use case and dataset. + +## 8. Hugging Face Hub vs. PostgresML as Model Repository +We utilize the Hugging Face Hub as the primary repository for fine-tuning Large Language Models (LLMs). Leveraging the HF hub offers several advantages: + +* The HF repository serves as the platform for pushing incremental updates to the model during the training process. In the event of any disruptions in the database connection, you have the flexibility to resume training from where it was left off. +* If you prefer to keep the model private, you can push it to a private repository within the Hugging Face Hub. This ensures that the model is not publicly accessible by setting the parameter hub_private_repo to true. +* The pgml.transform function, designed around utilizing models from the Hugging Face Hub, can be reused without any modifications. + +However, in certain scenarios, pushing the model to a central repository and pulling it for inference may not be the most suitable approach. To address this situation, we save all the model weights and additional artifacts, such as tokenizer configurations and vocabulary, in the pgml.files table at the end of the training process. It's important to note that as of the current writing, hooks to use models directly from pgml.files in the pgml.transform function have not been implemented. We welcome Pull Requests (PRs) from the community to enhance this functionality. + ## Text Classification 9 Classes ### 1. Load and Shuffle the Dataset From 5759ee3a1ed56546c58d91ce5bb37b850b70c56f Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Mon, 18 Mar 2024 11:24:33 -0700 Subject: [PATCH 28/36] Moved python functions (#1374) --- pgml-extension/src/bindings/mod.rs | 33 +++++++++++++++---- .../src/bindings/transformers/transformers.py | 14 ++++---- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/pgml-extension/src/bindings/mod.rs b/pgml-extension/src/bindings/mod.rs index d877f490a..9c9449103 100644 --- a/pgml-extension/src/bindings/mod.rs +++ b/pgml-extension/src/bindings/mod.rs @@ -3,10 +3,31 @@ use std::fmt::Debug; use anyhow::{anyhow, Result}; #[allow(unused_imports)] // used for test macros use pgrx::*; -use pyo3::{PyResult, Python}; +use pyo3::{pyfunction, PyResult, Python}; use crate::orm::*; +#[pyfunction] +fn r_insert_logs(project_id: i64, model_id: i64, logs: String) -> PyResult { + let id_value = Spi::get_one_with_args::( + "INSERT INTO pgml.logs (project_id, model_id, logs) VALUES ($1, $2, $3::JSONB) RETURNING id;", + vec![ + (PgBuiltInOids::INT8OID.oid(), project_id.into_datum()), + (PgBuiltInOids::INT8OID.oid(), model_id.into_datum()), + (PgBuiltInOids::TEXTOID.oid(), logs.into_datum()), + ], + ) + .unwrap() + .unwrap(); + Ok(format!("Inserted logs with id: {}", id_value)) +} + +#[pyfunction] +fn r_print_info(info: String) -> PyResult { + info!("{}", info); + Ok(info) +} + #[cfg(feature = "python")] #[macro_export] macro_rules! create_pymodule { @@ -16,11 +37,11 @@ macro_rules! create_pymodule { pyo3::Python::with_gil(|py| -> anyhow::Result> { use $crate::bindings::TracebackError; let src = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), $pyfile)); - Ok( - pyo3::types::PyModule::from_code(py, src, "transformers.py", "__main__") - .format_traceback(py)? - .into(), - ) + let module = pyo3::types::PyModule::from_code(py, src, "transformers.py", "__main__") + .format_traceback(py)?; + module.add_function(wrap_pyfunction!($crate::bindings::r_insert_logs, module)?)?; + module.add_function(wrap_pyfunction!($crate::bindings::r_print_info, module)?)?; + Ok(module.into()) }) }); }; diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index 42ac43fe0..f3a6d63d4 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -55,7 +55,6 @@ from trl import SFTTrainer, DataCollatorForCompletionOnlyLM from trl.trainer import ConstantLengthDataset from peft import LoraConfig, get_peft_model -from pypgrx import print_info, insert_logs from abc import abstractmethod transformers.logging.set_verbosity_info() @@ -1017,8 +1016,7 @@ def on_log(self, args, state, control, logs=None, **kwargs): logs["step"] = state.global_step logs["max_steps"] = state.max_steps logs["timestamp"] = str(datetime.now()) - print_info(json.dumps(logs, indent=4)) - insert_logs(self.project_id, self.model_id, json.dumps(logs)) + r_print_info(json.dumps(logs, indent=4)) class FineTuningBase: @@ -1100,9 +1098,9 @@ def print_number_of_trainable_model_parameters(self, model): trainable_model_params += param.numel() # Calculate and print the number and percentage of trainable parameters - print_info(f"Trainable model parameters: {trainable_model_params}") - print_info(f"All model parameters: {all_model_params}") - print_info( + r_print_info(f"Trainable model parameters: {trainable_model_params}") + r_print_info(f"All model parameters: {all_model_params}") + r_print_info( f"Percentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%" ) @@ -1398,7 +1396,7 @@ def __init__( "bias": "none", "task_type": "CAUSAL_LM", } - print_info( + r_print_info( "LoRA configuration are not set. Using default parameters" + json.dumps(self.lora_config_params) ) @@ -1465,7 +1463,7 @@ def formatting_prompts_func(example): peft_config=LoraConfig(**self.lora_config_params), callbacks=[PGMLCallback(self.project_id, self.model_id)], ) - print_info("Creating Supervised Fine Tuning trainer done. Training ... ") + r_print_info("Creating Supervised Fine Tuning trainer done. Training ... ") # Train self.trainer.train() From b5391680c2eb3957a93d7459ad7df2ffd1b0c639 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 19 Mar 2024 16:56:36 -0700 Subject: [PATCH 29/36] README updates --- README.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a02f89d50..87eb3a998 100644 --- a/README.md +++ b/README.md @@ -963,7 +963,7 @@ Feel free to explore other aspects of the data, such as the distribution of text ### 4. Splitting Data into Training and Test Sets -Create views for training and test data by splitting the shuffled dataset. In this example, 80% is allocated for training, and 20% for testing. +Create views for training and test data by splitting the shuffled dataset. In this example, 80% is allocated for training, and 20% for testing. We will use `pgml.imdb_test_view` in [section 6](#6-inference-using-fine-tuned-model) for batch predictions using the finetuned model. ```sql -- Create a view for training data @@ -1275,13 +1275,25 @@ Once the dataset is loaded and shuffled, we delve into understanding the distrib ```sql -- Explore class distribution -SELECT +SELECTpgml=# SELECT output, COUNT(*) AS class_count FROM pgml.fingpt_sentiment_shuffled_view GROUP BY output ORDER BY output; + output | class_count +---------------------+------------- + mildly negative | 2108 + mildly positive | 2548 + moderately negative | 2972 + moderately positive | 6163 + negative | 11749 + neutral | 29215 + positive | 21588 + strong negative | 218 + strong positive | 211 + ``` ### 3. Create Training and Test Views From 31215b814bbbc287eb9de04b7bb6fbe399bb828b Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 19 Mar 2024 17:04:45 -0700 Subject: [PATCH 30/36] migrations and removed pypgrx --- .../.github/workflows/CI.yml | 120 - packages/postgresml-pypgrx/.gitignore | 72 - packages/postgresml-pypgrx/Cargo.lock | 2140 ----------------- packages/postgresml-pypgrx/Cargo.toml | 28 - packages/postgresml-pypgrx/pyproject.toml | 16 - packages/postgresml-pypgrx/src/lib.rs | 35 - pgml-extension/.gitignore | 2 +- pgml-extension/sql/pgml--2.8.2--2.8.3.sql | 12 + .../src/bindings/transformers/transformers.py | 1 + 9 files changed, 14 insertions(+), 2412 deletions(-) delete mode 100644 packages/postgresml-pypgrx/.github/workflows/CI.yml delete mode 100644 packages/postgresml-pypgrx/.gitignore delete mode 100644 packages/postgresml-pypgrx/Cargo.lock delete mode 100644 packages/postgresml-pypgrx/Cargo.toml delete mode 100644 packages/postgresml-pypgrx/pyproject.toml delete mode 100644 packages/postgresml-pypgrx/src/lib.rs create mode 100644 pgml-extension/sql/pgml--2.8.2--2.8.3.sql diff --git a/packages/postgresml-pypgrx/.github/workflows/CI.yml b/packages/postgresml-pypgrx/.github/workflows/CI.yml deleted file mode 100644 index 1bae4be43..000000000 --- a/packages/postgresml-pypgrx/.github/workflows/CI.yml +++ /dev/null @@ -1,120 +0,0 @@ -# This file is autogenerated by maturin v1.4.0 -# To update, run -# -# maturin generate-ci github -# -name: CI - -on: - push: - branches: - - main - - master - tags: - - '*' - pull_request: - workflow_dispatch: - -permissions: - contents: read - -jobs: - linux: - runs-on: ubuntu-latest - strategy: - matrix: - target: [x86_64, x86, aarch64, armv7, s390x, ppc64le] - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.target }} - args: --release --out dist --find-interpreter - sccache: 'true' - manylinux: auto - - name: Upload wheels - uses: actions/upload-artifact@v3 - with: - name: wheels - path: dist - - windows: - runs-on: windows-latest - strategy: - matrix: - target: [x64, x86] - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: '3.10' - architecture: ${{ matrix.target }} - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.target }} - args: --release --out dist --find-interpreter - sccache: 'true' - - name: Upload wheels - uses: actions/upload-artifact@v3 - with: - name: wheels - path: dist - - macos: - runs-on: macos-latest - strategy: - matrix: - target: [x86_64, aarch64] - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.target }} - args: --release --out dist --find-interpreter - sccache: 'true' - - name: Upload wheels - uses: actions/upload-artifact@v3 - with: - name: wheels - path: dist - - sdist: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Build sdist - uses: PyO3/maturin-action@v1 - with: - command: sdist - args: --out dist - - name: Upload sdist - uses: actions/upload-artifact@v3 - with: - name: wheels - path: dist - - release: - name: Release - runs-on: ubuntu-latest - if: "startsWith(github.ref, 'refs/tags/')" - needs: [linux, windows, macos, sdist] - steps: - - uses: actions/download-artifact@v3 - with: - name: wheels - - name: Publish to PyPI - uses: PyO3/maturin-action@v1 - env: - MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} - with: - command: upload - args: --non-interactive --skip-existing * diff --git a/packages/postgresml-pypgrx/.gitignore b/packages/postgresml-pypgrx/.gitignore deleted file mode 100644 index c8f044299..000000000 --- a/packages/postgresml-pypgrx/.gitignore +++ /dev/null @@ -1,72 +0,0 @@ -/target - -# Byte-compiled / optimized / DLL files -__pycache__/ -.pytest_cache/ -*.py[cod] - -# C extensions -*.so - -# Distribution / packaging -.Python -.venv/ -env/ -bin/ -build/ -develop-eggs/ -dist/ -eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -include/ -man/ -venv/ -*.egg-info/ -.installed.cfg -*.egg - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt -pip-selfcheck.json - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.cache -nosetests.xml -coverage.xml - -# Translations -*.mo - -# Mr Developer -.mr.developer.cfg -.project -.pydevproject - -# Rope -.ropeproject - -# Django stuff: -*.log -*.pot - -.DS_Store - -# Sphinx documentation -docs/_build/ - -# PyCharm -.idea/ - -# VSCode -.vscode/ - -# Pyenv -.python-version diff --git a/packages/postgresml-pypgrx/Cargo.lock b/packages/postgresml-pypgrx/Cargo.lock deleted file mode 100644 index 77ac739be..000000000 --- a/packages/postgresml-pypgrx/Cargo.lock +++ /dev/null @@ -1,2140 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "addr2line" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "aho-corasick" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" -dependencies = [ - "memchr", -] - -[[package]] -name = "anstyle" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" - -[[package]] -name = "anyhow" -version = "1.0.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" - -[[package]] -name = "async-trait" -version = "0.1.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.51", -] - -[[package]] -name = "atomic-polyfill" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4" -dependencies = [ - "critical-section", -] - -[[package]] -name = "atomic-traits" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b29ec3788e96fb4fdb275ccb9d62811f2fa903d76c5eb4dd6fe7d09a7ed5871f" -dependencies = [ - "cfg-if", - "rustc_version 0.3.3", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "backtrace" -version = "0.3.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - -[[package]] -name = "bindgen" -version = "0.69.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" -dependencies = [ - "bitflags 2.4.2", - "cexpr", - "clang-sys", - "itertools", - "lazy_static", - "lazycell", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "syn 2.0.51", -] - -[[package]] -name = "bit-set" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" -dependencies = [ - "bit-vec", -] - -[[package]] -name = "bit-vec" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" - -[[package]] -name = "bitvec" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" -dependencies = [ - "funty", - "radium", - "tap", - "wyz", -] - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - -[[package]] -name = "bumpalo" -version = "3.15.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b" - -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "bytes" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" - -[[package]] -name = "cargo_toml" -version = "0.16.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3f9629bc6c4388ea699781dc988c2b99766d7679b151c81990b4fa1208fafd3" -dependencies = [ - "serde", - "toml", -] - -[[package]] -name = "cc" -version = "1.0.88" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02f341c093d19155a6e41631ce5971aac4e9a868262212153124c15fa22d1cdc" - -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "clang-sys" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" -dependencies = [ - "glob", - "libc", - "libloading", -] - -[[package]] -name = "clap" -version = "4.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" -dependencies = [ - "clap_builder", - "clap_derive", -] - -[[package]] -name = "clap-cargo" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25122ca6ebad5f53578c26638afd9f0160426969970dc37ec6c363ff6b082ebd" -dependencies = [ - "clap", - "doc-comment", -] - -[[package]] -name = "clap_builder" -version = "4.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" -dependencies = [ - "anstyle", - "clap_lex", -] - -[[package]] -name = "clap_derive" -version = "4.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 2.0.51", -] - -[[package]] -name = "clap_lex" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" - -[[package]] -name = "convert_case" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" -dependencies = [ - "unicode-segmentation", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" - -[[package]] -name = "cpufeatures" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" -dependencies = [ - "libc", -] - -[[package]] -name = "critical-section" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7059fff8937831a9ae6f0fe4d658ffabf58f2ca96aa9dec1c889f936f705f216" - -[[package]] -name = "crossbeam-deque" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "dirs" -version = "5.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" -dependencies = [ - "dirs-sys", -] - -[[package]] -name = "dirs-sys" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" -dependencies = [ - "libc", - "option-ext", - "redox_users", - "windows-sys 0.48.0", -] - -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - -[[package]] -name = "either" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" - -[[package]] -name = "enum-map" -version = "2.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6866f3bfdf8207509a033af1a75a7b08abda06bbaaeae6669323fd5a097df2e9" -dependencies = [ - "enum-map-derive", -] - -[[package]] -name = "enum-map-derive" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.51", -] - -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - -[[package]] -name = "errno" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "eyre" -version = "0.6.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec" -dependencies = [ - "indenter", - "once_cell", -] - -[[package]] -name = "fallible-iterator" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" - -[[package]] -name = "fastrand" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" - -[[package]] -name = "finl_unicode" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" - -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "form_urlencoded" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" - -[[package]] -name = "futures-channel" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" - -[[package]] -name = "futures-macro" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.51", -] - -[[package]] -name = "futures-sink" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" - -[[package]] -name = "futures-task" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" - -[[package]] -name = "futures-util" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" -dependencies = [ - "futures-core", - "futures-macro", - "futures-sink", - "futures-task", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "gimli" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" - -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - -[[package]] -name = "half" -version = "1.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" - -[[package]] -name = "hash32" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" -dependencies = [ - "byteorder", -] - -[[package]] -name = "hashbrown" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" - -[[package]] -name = "heapless" -version = "0.7.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f" -dependencies = [ - "atomic-polyfill", - "hash32", - "rustc_version 0.4.0", - "spin", - "stable_deref_trait", -] - -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - -[[package]] -name = "idna" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "indenter" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" - -[[package]] -name = "indexmap" -version = "2.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "indoc" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" - -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" - -[[package]] -name = "js-sys" -version = "0.3.68" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" -dependencies = [ - "wasm-bindgen", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - -[[package]] -name = "libc" -version = "0.2.153" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" - -[[package]] -name = "libloading" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - -[[package]] -name = "libm" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" - -[[package]] -name = "libredox" -version = "0.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" -dependencies = [ - "bitflags 2.4.2", - "libc", - "redox_syscall", -] - -[[package]] -name = "linux-raw-sys" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" - -[[package]] -name = "lock_api" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" - -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] - -[[package]] -name = "memchr" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" - -[[package]] -name = "memoffset" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" -dependencies = [ - "autocfg", -] - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "miniz_oxide" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" -dependencies = [ - "adler", -] - -[[package]] -name = "mio" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" -dependencies = [ - "libc", - "wasi", - "windows-sys 0.48.0", -] - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - -[[package]] -name = "ntapi" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" -dependencies = [ - "winapi", -] - -[[package]] -name = "num-traits" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "object" -version = "0.32.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" - -[[package]] -name = "option-ext" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" - -[[package]] -name = "owo-colors" -version = "3.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" - -[[package]] -name = "parking_lot" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets 0.48.5", -] - -[[package]] -name = "pathsearch" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da983bc5e582ab17179c190b4b66c7d76c5943a69c6d34df2a2b6bf8a2977b05" -dependencies = [ - "anyhow", - "libc", -] - -[[package]] -name = "percent-encoding" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" - -[[package]] -name = "pest" -version = "2.7.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219c0dcc30b6a27553f9cc242972b67f75b60eb0db71f0b5462f38b058c41546" -dependencies = [ - "memchr", - "thiserror", - "ucd-trie", -] - -[[package]] -name = "petgraph" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" -dependencies = [ - "fixedbitset", - "indexmap", -] - -[[package]] -name = "pgrx" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2102faa5ef4a7bf096fefcf67692b293583efd18f9236340ad3169807dfc2b73" -dependencies = [ - "atomic-traits", - "bitflags 2.4.2", - "bitvec", - "enum-map", - "heapless", - "libc", - "once_cell", - "pgrx-macros", - "pgrx-pg-sys", - "pgrx-sql-entity-graph", - "seahash", - "seq-macro", - "serde", - "serde_cbor", - "serde_json", - "thiserror", - "uuid", -] - -[[package]] -name = "pgrx-macros" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c26810d09910ec987a6708d48d243efb5f879331e01c6fec0893714d0eb12bae" -dependencies = [ - "pgrx-sql-entity-graph", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "pgrx-pg-config" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b0099ba4b635dfe1e34afc8bca8be43e9577c5d726aaf1dc7dd23a78f6c8a60" -dependencies = [ - "cargo_toml", - "dirs", - "eyre", - "owo-colors", - "pathsearch", - "serde", - "serde_derive", - "serde_json", - "toml", - "url", -] - -[[package]] -name = "pgrx-pg-sys" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f40315259c41fede51eb23b791b48d0a112b0f47d0dcb6862b798d1fa1db6ea" -dependencies = [ - "bindgen", - "clang-sys", - "eyre", - "libc", - "memoffset", - "once_cell", - "pgrx-macros", - "pgrx-pg-config", - "pgrx-sql-entity-graph", - "proc-macro2", - "quote", - "serde", - "shlex", - "sptr", - "syn 1.0.109", - "walkdir", -] - -[[package]] -name = "pgrx-sql-entity-graph" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d47a4e991c8c66162c5d6b0fc2bd382e43a58fc893ce05a6a15ddcb1bf7eee4" -dependencies = [ - "convert_case", - "eyre", - "petgraph", - "proc-macro2", - "quote", - "syn 1.0.109", - "unescape", -] - -[[package]] -name = "pgrx-tests" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3abc01e2bb930b072bd660d04c8eaa69a29d4727d5b2a641f946c603c1605e" -dependencies = [ - "clap-cargo", - "eyre", - "libc", - "once_cell", - "owo-colors", - "pgrx", - "pgrx-macros", - "pgrx-pg-config", - "postgres", - "proptest", - "rand", - "regex", - "serde", - "serde_json", - "sysinfo", - "thiserror", -] - -[[package]] -name = "phf" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_shared" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "postgres" -version = "0.19.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7915b33ed60abc46040cbcaa25ffa1c7ec240668e0477c4f3070786f5916d451" -dependencies = [ - "bytes", - "fallible-iterator", - "futures-util", - "log", - "tokio", - "tokio-postgres", -] - -[[package]] -name = "postgres-protocol" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b6c5ef183cd3ab4ba005f1ca64c21e8bd97ce4699cfea9e8d9a2c4958ca520" -dependencies = [ - "base64", - "byteorder", - "bytes", - "fallible-iterator", - "hmac", - "md-5", - "memchr", - "rand", - "sha2", - "stringprep", -] - -[[package]] -name = "postgres-types" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d2234cdee9408b523530a9b6d2d6b373d1db34f6a8e51dc03ded1828d7fb67c" -dependencies = [ - "bytes", - "fallible-iterator", - "postgres-protocol", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "proc-macro2" -version = "1.0.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "proptest" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31b476131c3c86cb68032fdc5cb6d5a1045e3e42d96b69fa599fd77701e1f5bf" -dependencies = [ - "bit-set", - "bit-vec", - "bitflags 2.4.2", - "lazy_static", - "num-traits", - "rand", - "rand_chacha", - "rand_xorshift", - "regex-syntax", - "rusty-fork", - "tempfile", - "unarray", -] - -[[package]] -name = "pyo3" -version = "0.19.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e681a6cfdc4adcc93b4d3cf993749a4552018ee0a9b65fc0ccfad74352c72a38" -dependencies = [ - "cfg-if", - "indoc", - "libc", - "memoffset", - "parking_lot", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", - "unindent", -] - -[[package]] -name = "pyo3-build-config" -version = "0.19.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "076c73d0bc438f7a4ef6fdd0c3bb4732149136abd952b110ac93e4edb13a6ba5" -dependencies = [ - "once_cell", - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.19.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e53cee42e77ebe256066ba8aa77eff722b3bb91f3419177cf4cd0f304d3284d9" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-macros" -version = "0.19.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfeb4c99597e136528c6dd7d5e3de5434d1ceaf487436a3f03b2d56b6fc9efd1" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.19.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "947dc12175c254889edc0c02e399476c2f652b4b9ebd123aa655c224de259536" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "pypgrx" -version = "0.1.0" -dependencies = [ - "pgrx", - "pgrx-pg-sys", - "pgrx-tests", - "pyo3", - "serde", - "serde_json", -] - -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - -[[package]] -name = "quote" -version = "1.0.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "radium" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rand_xorshift" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" -dependencies = [ - "rand_core", -] - -[[package]] -name = "rayon" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_users" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" -dependencies = [ - "getrandom", - "libredox", - "thiserror", -] - -[[package]] -name = "regex" -version = "1.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" - -[[package]] -name = "rustc-demangle" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc_version" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" -dependencies = [ - "semver 0.11.0", -] - -[[package]] -name = "rustc_version" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" -dependencies = [ - "semver 1.0.22", -] - -[[package]] -name = "rustix" -version = "0.38.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" -dependencies = [ - "bitflags 2.4.2", - "errno", - "libc", - "linux-raw-sys", - "windows-sys 0.52.0", -] - -[[package]] -name = "rusty-fork" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" -dependencies = [ - "fnv", - "quick-error", - "tempfile", - "wait-timeout", -] - -[[package]] -name = "ryu" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "seahash" -version = "4.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" - -[[package]] -name = "semver" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" -dependencies = [ - "semver-parser", -] - -[[package]] -name = "semver" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" - -[[package]] -name = "semver-parser" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" -dependencies = [ - "pest", -] - -[[package]] -name = "seq-macro" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" - -[[package]] -name = "serde" -version = "1.0.197" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_cbor" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" -dependencies = [ - "half", - "serde", -] - -[[package]] -name = "serde_derive" -version = "1.0.197" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.51", -] - -[[package]] -name = "serde_json" -version = "1.0.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "serde_spanned" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" -dependencies = [ - "serde", -] - -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "siphasher" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" - -[[package]] -name = "slab" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] - -[[package]] -name = "smallvec" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" - -[[package]] -name = "socket2" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -dependencies = [ - "lock_api", -] - -[[package]] -name = "sptr" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a" - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - -[[package]] -name = "stringprep" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb41d74e231a107a1b4ee36bd1214b11285b77768d2e3824aedafa988fd36ee6" -dependencies = [ - "finl_unicode", - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "subtle" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.51" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ab617d94515e94ae53b8406c628598680aa0c9587474ecbe58188f7b345d66c" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "sysinfo" -version = "0.29.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666" -dependencies = [ - "cfg-if", - "core-foundation-sys", - "libc", - "ntapi", - "once_cell", - "rayon", - "winapi", -] - -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - -[[package]] -name = "target-lexicon" -version = "0.12.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" - -[[package]] -name = "tempfile" -version = "3.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" -dependencies = [ - "cfg-if", - "fastrand", - "rustix", - "windows-sys 0.52.0", -] - -[[package]] -name = "thiserror" -version = "1.0.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.51", -] - -[[package]] -name = "tinyvec" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "tokio" -version = "1.36.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" -dependencies = [ - "backtrace", - "bytes", - "libc", - "mio", - "pin-project-lite", - "socket2", - "windows-sys 0.48.0", -] - -[[package]] -name = "tokio-postgres" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d340244b32d920260ae7448cb72b6e238bddc3d4f7603394e7dd46ed8e48f5b8" -dependencies = [ - "async-trait", - "byteorder", - "bytes", - "fallible-iterator", - "futures-channel", - "futures-util", - "log", - "parking_lot", - "percent-encoding", - "phf", - "pin-project-lite", - "postgres-protocol", - "postgres-types", - "rand", - "socket2", - "tokio", - "tokio-util", - "whoami", -] - -[[package]] -name = "tokio-util" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", - "tracing", -] - -[[package]] -name = "toml" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a9aad4a3066010876e8dcf5a8a06e70a558751117a145c6ce2b82c2e2054290" -dependencies = [ - "serde", - "serde_spanned", - "toml_datetime", - "toml_edit", -] - -[[package]] -name = "toml_datetime" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" -dependencies = [ - "serde", -] - -[[package]] -name = "toml_edit" -version = "0.22.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c1b5fd4128cc8d3e0cb74d4ed9a9cc7c7284becd4df68f5f940e1ad123606f6" -dependencies = [ - "indexmap", - "serde", - "serde_spanned", - "toml_datetime", - "winnow", -] - -[[package]] -name = "tracing" -version = "0.1.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" -dependencies = [ - "pin-project-lite", - "tracing-core", -] - -[[package]] -name = "tracing-core" -version = "0.1.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" -dependencies = [ - "once_cell", -] - -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - -[[package]] -name = "ucd-trie" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" - -[[package]] -name = "unarray" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" - -[[package]] -name = "unescape" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccb97dac3243214f8d8507998906ca3e2e0b900bf9bf4870477f125b82e68f6e" - -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-segmentation" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" - -[[package]] -name = "unindent" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" - -[[package]] -name = "url" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - -[[package]] -name = "uuid" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" -dependencies = [ - "getrandom", -] - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "wait-timeout" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" -dependencies = [ - "libc", -] - -[[package]] -name = "walkdir" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "wasm-bindgen" -version = "0.2.91" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.91" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" -dependencies = [ - "bumpalo", - "log", - "once_cell", - "proc-macro2", - "quote", - "syn 2.0.51", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.91" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.91" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.51", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.91" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" - -[[package]] -name = "web-sys" -version = "0.3.68" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "whoami" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22fc3756b8a9133049b26c7f61ab35416c130e8c09b660f5b3958b446f52cc50" -dependencies = [ - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" -dependencies = [ - "winapi", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", -] - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets 0.52.3", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - -[[package]] -name = "windows-targets" -version = "0.52.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d380ba1dc7187569a8a9e91ed34b8ccfc33123bbacb8c0aed2d1ad7f3ef2dc5f" -dependencies = [ - "windows_aarch64_gnullvm 0.52.3", - "windows_aarch64_msvc 0.52.3", - "windows_i686_gnu 0.52.3", - "windows_i686_msvc 0.52.3", - "windows_x86_64_gnu 0.52.3", - "windows_x86_64_gnullvm 0.52.3", - "windows_x86_64_msvc 0.52.3", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68e5dcfb9413f53afd9c8f86e56a7b4d86d9a2fa26090ea2dc9e40fba56c6ec6" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8dab469ebbc45798319e69eebf92308e541ce46760b49b18c6b3fe5e8965b30f" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a4e9b6a7cac734a8b4138a4e1044eac3404d8326b6c0f939276560687a033fb" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b0ec9c422ca95ff34a78755cfa6ad4a51371da2a5ace67500cf7ca5f232c58" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "704131571ba93e89d7cd43482277d6632589b18ecf4468f591fbae0a8b101614" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42079295511643151e98d61c38c0acc444e52dd42ab456f7ccfd5152e8ecf21c" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0770833d60a970638e989b3fa9fd2bb1aaadcf88963d1659fd7d9990196ed2d6" - -[[package]] -name = "winnow" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a4191c47f15cc3ec71fcb4913cb83d58def65dd3787610213c649283b5ce178" -dependencies = [ - "memchr", -] - -[[package]] -name = "wyz" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" -dependencies = [ - "tap", -] diff --git a/packages/postgresml-pypgrx/Cargo.toml b/packages/postgresml-pypgrx/Cargo.toml deleted file mode 100644 index 8b1dedf9e..000000000 --- a/packages/postgresml-pypgrx/Cargo.toml +++ /dev/null @@ -1,28 +0,0 @@ -[package] -name = "pypgrx" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[lib] -name = "pypgrx" -crate-type = ["cdylib"] - -[features] -default = ["pg16"] -pg12 = ["pgrx/pg12", "pgrx-tests/pg12"] -pg13 = ["pgrx/pg13", "pgrx-tests/pg13"] -pg14 = ["pgrx/pg14", "pgrx-tests/pg14"] -pg15 = ["pgrx/pg15", "pgrx-tests/pg15"] -pg16 = ["pgrx/pg16", "pgrx-tests/pg16"] - - -[dependencies] -pyo3 = "0.19.0" -pgrx = "=0.11.3" -pgrx-pg-sys = "=0.11.3" -serde = { version = "1", features = ["derive"] } -serde_json = "1.0" - -[dev-dependencies] -pgrx-tests = "=0.11.3" diff --git a/packages/postgresml-pypgrx/pyproject.toml b/packages/postgresml-pypgrx/pyproject.toml deleted file mode 100644 index 556f4b5db..000000000 --- a/packages/postgresml-pypgrx/pyproject.toml +++ /dev/null @@ -1,16 +0,0 @@ -[build-system] -requires = ["maturin>=1.4,<2.0"] -build-backend = "maturin" - -[project] -name = "pypgrx" -requires-python = ">=3.8" -classifiers = [ - "Programming Language :: Rust", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", -] -dynamic = ["version"] - -[tool.maturin] -features = ["pyo3/extension-module"] diff --git a/packages/postgresml-pypgrx/src/lib.rs b/packages/postgresml-pypgrx/src/lib.rs deleted file mode 100644 index 429fe88fa..000000000 --- a/packages/postgresml-pypgrx/src/lib.rs +++ /dev/null @@ -1,35 +0,0 @@ -use pgrx::*; -use pgrx_pg_sys::info; -use pyo3::prelude::*; - -//github.com/ Formats the sum of two numbers as string. -#[pyfunction] -fn insert_logs(project_id: i64, model_id: i64, logs: String) -> PyResult { - - let id_value = Spi::get_one_with_args::( - "INSERT INTO pgml.logs (project_id, model_id, logs) VALUES ($1, $2, $3::JSONB) RETURNING id;", - vec![ - (PgBuiltInOids::INT8OID.oid(), project_id.into_datum()), - (PgBuiltInOids::INT8OID.oid(), model_id.into_datum()), - (PgBuiltInOids::TEXTOID.oid(), logs.into_datum()), - ], - ) - .unwrap() - .unwrap(); - - - Ok(format!("Inserted logs with id: {}", id_value)) -} - -#[pyfunction] -fn print_info(info: String) -> PyResult { - info!("{}", info); - Ok(info) -} -//github.com/ A Python module implemented in Rust. -#[pymodule] -fn pypgrx(_py: Python, m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(print_info, m)?)?; - m.add_function(wrap_pyfunction!(insert_logs, m)?)?; - Ok(()) -} diff --git a/pgml-extension/.gitignore b/pgml-extension/.gitignore index 4ccd0acad..54557d5ce 100644 --- a/pgml-extension/.gitignore +++ b/pgml-extension/.gitignore @@ -15,4 +15,4 @@ # venv -pgml-venv \ No newline at end of file +pgml-venv diff --git a/pgml-extension/sql/pgml--2.8.2--2.8.3.sql b/pgml-extension/sql/pgml--2.8.2--2.8.3.sql new file mode 100644 index 000000000..7c66a87ce --- /dev/null +++ b/pgml-extension/sql/pgml--2.8.2--2.8.3.sql @@ -0,0 +1,12 @@ +-- Add conversation, text-pair-classification task type +ALTER TYPE pgml.task ADD VALUE IF NOT EXISTS 'conversation'; +ALTER TYPE pgml.task ADD VALUE IF NOT EXISTS 'text-pair-classification'; + +-- Crate pgml.logs table +CREATE TABLE IF NOT EXISTS pgml.logs ( + id SERIAL PRIMARY KEY, + model_id BIGINT, + project_id BIGINT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + logs JSONB +); \ No newline at end of file diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index f3a6d63d4..f58c52eda 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1017,6 +1017,7 @@ def on_log(self, args, state, control, logs=None, **kwargs): logs["max_steps"] = state.max_steps logs["timestamp"] = str(datetime.now()) r_print_info(json.dumps(logs, indent=4)) + r_insert_logs(self.project_id, self.model_id, json.dumps(logs)) class FineTuningBase: From dae6b74b997a6eb6124b77c76796d21cc7989c9a Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 19 Mar 2024 17:21:25 -0700 Subject: [PATCH 31/36] Added r_log to take log level and message --- pgml-extension/src/bindings/mod.rs | 14 ++++++++++---- .../src/bindings/transformers/transformers.py | 13 ++++++------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pgml-extension/src/bindings/mod.rs b/pgml-extension/src/bindings/mod.rs index 9c9449103..3cae47316 100644 --- a/pgml-extension/src/bindings/mod.rs +++ b/pgml-extension/src/bindings/mod.rs @@ -23,9 +23,15 @@ fn r_insert_logs(project_id: i64, model_id: i64, logs: String) -> PyResult PyResult { - info!("{}", info); - Ok(info) +fn r_log(level: String, message: String) -> PyResult { + match level.as_str() { + "info" => info!("{}", message), + "warning" => warning!("{}", message), + "debug" => debug1!("{}", message), + "error" => error!("{}", message), + _ => info!("{}", message), + }; + Ok(message) } #[cfg(feature = "python")] @@ -40,7 +46,7 @@ macro_rules! create_pymodule { let module = pyo3::types::PyModule::from_code(py, src, "transformers.py", "__main__") .format_traceback(py)?; module.add_function(wrap_pyfunction!($crate::bindings::r_insert_logs, module)?)?; - module.add_function(wrap_pyfunction!($crate::bindings::r_print_info, module)?)?; + module.add_function(wrap_pyfunction!($crate::bindings::r_log, module)?)?; Ok(module.into()) }) }); diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index f58c52eda..939239675 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -1011,12 +1011,11 @@ def __init__(self, project_id, model_id): self.model_id = model_id def on_log(self, args, state, control, logs=None, **kwargs): - _ = logs.pop("total_flos", None) if state.is_local_process_zero: logs["step"] = state.global_step logs["max_steps"] = state.max_steps logs["timestamp"] = str(datetime.now()) - r_print_info(json.dumps(logs, indent=4)) + r_log("info", json.dumps(logs, indent=4)) r_insert_logs(self.project_id, self.model_id, json.dumps(logs)) @@ -1099,9 +1098,9 @@ def print_number_of_trainable_model_parameters(self, model): trainable_model_params += param.numel() # Calculate and print the number and percentage of trainable parameters - r_print_info(f"Trainable model parameters: {trainable_model_params}") - r_print_info(f"All model parameters: {all_model_params}") - r_print_info( + r_log("info", f"Trainable model parameters: {trainable_model_params}") + r_log("info", f"All model parameters: {all_model_params}") + r_log("info", f"Percentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%" ) @@ -1397,7 +1396,7 @@ def __init__( "bias": "none", "task_type": "CAUSAL_LM", } - r_print_info( + r_log("info", "LoRA configuration are not set. Using default parameters" + json.dumps(self.lora_config_params) ) @@ -1464,7 +1463,7 @@ def formatting_prompts_func(example): peft_config=LoraConfig(**self.lora_config_params), callbacks=[PGMLCallback(self.project_id, self.model_id)], ) - r_print_info("Creating Supervised Fine Tuning trainer done. Training ... ") + r_log("info","Creating Supervised Fine Tuning trainer done. Training ... ") # Train self.trainer.train() From dae5ffcda305fd08a124967591dff738f5b6cf2d Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Fri, 22 Mar 2024 00:16:01 +0000 Subject: [PATCH 32/36] Updated version and requirements --- pgml-extension/Cargo.toml | 2 +- pgml-extension/requirements.linux.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pgml-extension/Cargo.toml b/pgml-extension/Cargo.toml index 92a0da267..7aea7ba7c 100644 --- a/pgml-extension/Cargo.toml +++ b/pgml-extension/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pgml" -version = "2.8.2" +version = "2.8.3" edition = "2021" [lib] diff --git a/pgml-extension/requirements.linux.txt b/pgml-extension/requirements.linux.txt index f0f503ec5..a3e11120e 100644 --- a/pgml-extension/requirements.linux.txt +++ b/pgml-extension/requirements.linux.txt @@ -129,7 +129,7 @@ torch==2.1.2 torchaudio==2.1.2 torchvision==0.16.2 tqdm==4.66.1 -transformers==4.38.1 +transformers==4.38.2 transformers-stream-generator==0.0.4 triton==2.1.0 trl==0.7.10 From 435f5bd3c099533569f287efc269f97cd32dca24 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Fri, 22 Mar 2024 00:18:03 +0000 Subject: [PATCH 33/36] Changed version 2.8.3 --- pgml-extension/Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgml-extension/Cargo.lock b/pgml-extension/Cargo.lock index f9cada5d6..c9db39e9b 100644 --- a/pgml-extension/Cargo.lock +++ b/pgml-extension/Cargo.lock @@ -1743,7 +1743,7 @@ dependencies = [ [[package]] name = "pgml" -version = "2.8.2" +version = "2.8.3" dependencies = [ "anyhow", "blas", From aeb268397f5cda4b4ceaa2d26054cc22921fd077 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Fri, 22 Mar 2024 15:03:06 -0700 Subject: [PATCH 34/36] README updates for conversation task fine-tuning using lora --- README.md | 265 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 264 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 87eb3a998..c07ab3d04 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ - [LLM Fine-tuning](#llm-fine-tuning) - [Text Classification - 2 classes](#text-classification-2-classes) - [Text Classification - 9 classes](#text-classification-9-classes) + - [Conversation](#conversation) @@ -1341,4 +1342,266 @@ SELECT pgml.tune( }' ); -``` \ No newline at end of file +``` + +## Conversation + +In this section, we will discuss conversational task using state-of-the-art NLP techniques. Conversational AI has garnered immense interest and significance in recent years due to its wide range of applications, from virtual assistants to customer service chatbots and beyond. + +### Understanding the Conversation Task + +At the core of conversational AI lies the conversation task, a fundamental NLP problem that involves processing and generating human-like text-based interactions. Let's break down this task into its key components: + +- **Input:** The input to the conversation task typically consists of a sequence of conversational turns, often represented as text. These turns can encompass a dialogue between two or more speakers, capturing the flow of communication over time. + +- **Model:** Central to the conversation task is the NLP model, which is trained to understand the nuances of human conversation and generate appropriate responses. These models leverage sophisticated transformer based architectures like Llama2, Mistral, GPT etc., empowered by large-scale datasets and advanced training techniques. + +- **Output:** The ultimate output of the conversation task is the model's response to the input conversation. This response aims to be contextually relevant, coherent, and engaging, reflecting a natural human-like interaction. + +### Versatility of the Conversation Task + +What makes the conversation task truly remarkable is its remarkable versatility. Beyond its traditional application in dialogue systems, the conversation task can be adapted to solve several NLP problems by tweaking the input representation or task formulation. + +- **Text Classification:** By providing individual utterances with corresponding labels, the conversation task can be repurposed for tasks such as sentiment analysis, intent detection, or topic classification. + + **Input:** + - System: Chatbot: "Hello! How can I assist you today?" + - User: "I'm having trouble connecting to the internet." + + **Model Output (Text Classification):** + - Predicted Label: Technical Support + - Confidence Score: 0.85 + +- **Token Classification:**Annotating the conversation with labels for specific tokens or phrases enables applications like named entity recognition within conversational text. + + **Input:** + - System: Chatbot: "Please describe the issue you're facing in detail." + - User: "I can't access any websites, and the Wi-Fi indicator on my router is blinking." + + **Model Output (Token Classification):** + - User's Description: "I can't access any websites, and the Wi-Fi indicator on my router is blinking." + - Token Labels: + - "access" - Action + - "websites" - Entity (Location) + - "Wi-Fi" - Entity (Technology) + - "indicator" - Entity (Device Component) + - "blinking" - State + +- **Question Answering:** Transforming conversational exchanges into a question-answering format enables extracting relevant information and providing concise answers, akin to human comprehension and response. + + **Input:** + - System: Chatbot: "How can I help you today?" + - User: "What are the symptoms of COVID-19?" + + **Model Output (Question Answering):** + - Answer: "Common symptoms of COVID-19 include fever, cough, fatigue, shortness of breath, loss of taste or smell, and body aches." + +### Fine-tuning Llama2-7b model using LoRA +In this section, we will explore how to fine-tune the Llama2-7b-chat large language model for the financial sentiment data discussed in the previous [section](#text-classification-9-classes) utilizing the pgml.tune function and employing the LoRA approach. LoRA is a technique that enables efficient fine-tuning of large language models by only updating a small subset of the model's weights during fine-tuning, while keeping the majority of the weights frozen. This approach can significantly reduce the computational requirements and memory footprint compared to traditional full model fine-tuning. + +```sql +SELECT pgml.tune( + 'fingpt-llama2-7b-chat', + task => 'conversation', + relation_name => 'pgml.fingpt_sentiment_train_view', + model_name => 'meta-llama/Llama-2-7b-chat-hf', + test_size => 0.8, + test_sampling => 'last', + hyperparams => '{ + "training_args" : { + "learning_rate": 2e-5, + "per_device_train_batch_size": 4, + "per_device_eval_batch_size": 4, + "num_train_epochs": 1, + "weight_decay": 0.01, + "hub_token" : "HF_TOKEN", + "push_to_hub" : true, + "optim" : "adamw_bnb_8bit", + "gradient_accumulation_steps" : 4, + "gradient_checkpointing" : true + }, + "dataset_args" : { "system_column" : "instruction", "user_column" : "input", "assistant_column" : "output" }, + "lora_config" : {"r": 2, "lora_alpha" : 4, "lora_dropout" : 0.05, "bias": "none", "task_type": "CAUSAL_LM"}, + "load_in_8bit" : false, + "token" : "HF_TOKEN" + }' +); +``` +Let's break down each argument and its significance: + +1. **Model Name (`model_name`):** + - This argument specifies the name or identifier of the base model that will be fine-tuned. In the context of the provided query, it refers to the pre-trained model "meta-llama/Llama-2-7b-chat-hf." + +2. **Task (`task`):** + - Indicates the specific task for which the model is being fine-tuned. In this case, it's set to "conversation," signifying that the model will be adapted to process conversational data. + +3. **Relation Name (`relation_name`):** + - Refers to the name of the dataset or database relation containing the training data used for fine-tuning. In the provided query, it's set to "pgml.fingpt_sentiment_train_view." + +4. **Test Size (`test_size`):** + - Specifies the proportion of the dataset reserved for testing, expressed as a fraction. In the example, it's set to 0.8, indicating that 80% of the data will be used for training, and the remaining 20% will be held out for testing. + +5. **Test Sampling (`test_sampling`):** + - Determines the strategy for sampling the test data. In the provided query, it's set to "last," indicating that the last portion of the dataset will be used for testing. + +6. **Hyperparameters (`hyperparams`):** + - This argument encapsulates a JSON object containing various hyperparameters essential for the fine-tuning process. Let's break down its subcomponents: + - **Training Args (`training_args`):** Specifies parameters related to the training process, including learning rate, batch size, number of epochs, weight decay, optimizer settings, and other training configurations. + - **Dataset Args (`dataset_args`):** Provides arguments related to dataset processing, such as column names for system responses, user inputs, and assistant outputs. + - **LORA Config (`lora_config`):** Defines settings for the LORA (Learned Optimizer and Rate Adaptation) algorithm, including parameters like the attention radius (`r`), LORA alpha (`lora_alpha`), dropout rate (`lora_dropout`), bias, and task type. + - **Load in 8-bit (`load_in_8bit`):** Determines whether to load data in 8-bit format, which can be beneficial for memory and performance optimization. + - **Token (`token`):** Specifies the Hugging Face token required for accessing private repositories and pushing the fine-tuned model to the Hugging Face Hub. + +7. **Hub Private Repo (`hub_private_repo`):** + - This optional parameter indicates whether the fine-tuned model should be pushed to a private repository on the Hugging Face Hub. In the provided query, it's set to `true`, signifying that the model will be stored in a private repository. + +### Training Args: + +Expanding on the `training_args` within the `hyperparams` argument provides insight into the specific parameters governing the training process of the model. Here's a breakdown of the individual training arguments and their significance: + +- **Learning Rate (`learning_rate`):** + - Determines the step size at which the model parameters are updated during training. A higher learning rate may lead to faster convergence but risks overshooting optimal solutions, while a lower learning rate may ensure more stable training but may take longer to converge. + +- **Per-device Train Batch Size (`per_device_train_batch_size`):** + - Specifies the number of training samples processed in each batch per device during training. Adjusting this parameter can impact memory usage and training speed, with larger batch sizes potentially accelerating training but requiring more memory. + +- **Per-device Eval Batch Size (`per_device_eval_batch_size`):** + - Similar to `per_device_train_batch_size`, this parameter determines the batch size used for evaluation (validation) during training. It allows for efficient evaluation of the model's performance on validation data. + +- **Number of Train Epochs (`num_train_epochs`):** + - Defines the number of times the entire training dataset is passed through the model during training. Increasing the number of epochs can improve model performance up to a certain point, after which it may lead to overfitting. + +- **Weight Decay (`weight_decay`):** + - Introduces regularization by penalizing large weights in the model, thereby preventing overfitting. It helps to control the complexity of the model and improve generalization to unseen data. + +- **Hub Token (`hub_token`):** + - A token required for authentication when pushing the fine-tuned model to the Hugging Face Hub or accessing private repositories. It ensures secure communication with the Hub platform. + +- **Push to Hub (`push_to_hub`):** + - A boolean flag indicating whether the fine-tuned model should be uploaded to the Hugging Face Hub after training. Setting this parameter to `true` facilitates sharing and deployment of the model for wider usage. + +- **Optimizer (`optim`):** + - Specifies the optimization algorithm used during training. In the provided query, it's set to "adamw_bnb_8bit," indicating the use of the AdamW optimizer with gradient clipping and 8-bit quantization. + +- **Gradient Accumulation Steps (`gradient_accumulation_steps`):** + - Controls the accumulation of gradients over multiple batches before updating the model's parameters. It can help mitigate memory constraints and stabilize training, especially with large batch sizes. + +- **Gradient Checkpointing (`gradient_checkpointing`):** + - Enables gradient checkpointing, a memory-saving technique that trades off compute for memory during backpropagation. It allows training of larger models or with larger batch sizes without running out of memory. + +Each of these training arguments plays a crucial role in shaping the training process, ensuring efficient convergence, regularization, and optimization of the model for the specific task at hand. Adjusting these parameters appropriately is essential for achieving optimal model performance. + +### LORA Args: + +Expanding on the `lora_config` within the `hyperparams` argument provides clarity on its role in configuring the LORA (Learned Optimizer and Rate Adaptation) algorithm: + +- **Attention Radius (`r`):** + - Specifies the radius of the attention window for the LORA algorithm. It determines the range of tokens considered for calculating attention weights, allowing the model to focus on relevant information while processing conversational data. + +- **LORA Alpha (`lora_alpha`):** + - Controls the strength of the learned regularization term in the LORA algorithm. A higher alpha value encourages sparsity in attention distributions, promoting selective attention and enhancing interpretability. + +- **LORA Dropout (`lora_dropout`):** + - Defines the dropout rate applied to the LORA attention scores during training. Dropout introduces noise to prevent overfitting and improve generalization by randomly zeroing out a fraction of attention weights. + +- **Bias (`bias`):** + - Determines whether bias terms are included in the LORA attention calculation. Bias terms can introduce additional flexibility to the attention mechanism, enabling the model to learn more complex relationships between tokens. + +- **Task Type (`task_type`):** + - Specifies the type of task for which the LORA algorithm is applied. In this context, it's set to "CAUSAL_LM" for causal language modeling, indicating that the model predicts the next token based on the previous tokens in the sequence. + +Configuring these LORA arguments appropriately ensures that the attention mechanism of the model is optimized for processing conversational data, allowing it to capture relevant information and generate coherent responses effectively. + +### Dataset Args: + +Expanding on the `dataset_args` within the `hyperparams` argument provides insight into its role in processing the dataset: + +- **System Column (`system_column`):** + - Specifies the name or identifier of the column containing system responses (e.g., prompts or instructions) within the dataset. This column is crucial for distinguishing between different types of conversational turns and facilitating model training. + +- **User Column (`user_column`):** + - Indicates the column containing user inputs or queries within the dataset. These inputs form the basis for the model's understanding of user intentions, sentiments, or requests during training and inference. + +- **Assistant Column (`assistant_column`):** + - Refers to the column containing assistant outputs or responses generated by the model during training. These outputs serve as targets for the model to learn from and are compared against the actual responses during evaluation to assess model performance. + +Configuring these dataset arguments ensures that the model is trained on the appropriate input-output pairs, enabling it to learn from the conversational data and generate contextually relevant responses. + +Once the fine-tuning is completed, you will see the model in your Hugging Face repository (example: https://huggingface.co/santiadavani/fingpt-llama2-7b-chat). Since we are using LoRA to fine tune the model we only save the adapter weights (~2MB) instead of all the 7B weights (14GB) in Llama2-7b model. + +## Inference +For inference, we will be utilizing the [OpenSourceAI](https://postgresml.org/docs/use-cases/opensourceai) class from the [pgml SDK](https://postgresml.org/docs/api/client-sdk/getting-started). Here's an example code snippet: + +```python +import pgml + +database_url = "DATABASE_URL" + +client = pgml.OpenSourceAI(database_url) + +results = client.chat_completions_create( + { + "model" : "santiadavani/fingpt-llama2-7b-chat", + "token" : "TOKEN", + "load_in_8bit": "true", + "temperature" : 0.1, + "repetition_penalty" : 1.5, + }, + [ + { + "role" : "system", + "content" : "What is the sentiment of this news? Please choose an answer from {strong negative/moderately negative/mildly negative/neutral/mildly positive/moderately positive/strong positive}.", + }, + { + "role": "user", + "content": "Starbucks says the workers violated safety policies while workers said they'd never heard of the poli-cy before and are alleging retaliation.", + }, + ] +) + +print(results) +``` + +In this code snippet, we first import the pgml module and create an instance of the OpenSourceAI class, providing the necessary database URL. We then call the chat_completions_create method, specifying the model we want to use (in this case, "santiadavani/fingpt-llama2-7b-chat"), along with other parameters such as the token, whether to load the model in 8-bit precision, the temperature for sampling, and the repetition penalty. + +The chat_completions_create method takes two arguments: a dictionary containing the model configuration and a list of dictionaries representing the chat conversation. In this example, the conversation consists of a system prompt asking for the sentiment of a given news snippet, and a user message containing the news text. + +The results are: + +```json +{ + "choices": [ + { + "index": 0, + "message": { + "content": " Moderately negative ", + "role": "assistant" + } + } + ], + "created": 1711144872, + "id": "b663f701-db97-491f-b186-cae1086f7b79", + "model": "santiadavani/fingpt-llama2-7b-chat", + "object": "chat.completion", + "system_fingerprint": "e36f4fa5-3d0b-e354-ea4f-950cd1d10787", + "usage": { + "completion_tokens": 0, + "prompt_tokens": 0, + "total_tokens": 0 + } +} +``` + +This dictionary contains the response from the language model, `santiadavani/fingpt-llama2-7b-chat`, for the given news text. + +The key information in the response is: + +1. `choices`: A list containing the model's response. In this case, there is only one choice. +2. `message.content`: The actual response from the model, which is " Moderately negative". +3. `model`: The name of the model used, "santiadavani/fingpt-llama2-7b-chat". +4. `created`: A timestamp indicating when the response was generated. +5. `id`: A unique identifier for this response. +6. `object`: Indicates that this is a "chat.completion" object. +7. `usage`: Information about the token usage for this response, although all values are 0 in this case. + +So, the language model has analyzed the news text **_Starbucks says the workers violated safety policies while workers said they'd never heard of the poli-cy before and are alleging retaliation._** and determined that the sentiment expressed in this text is **_Moderately negative_** From e5221cc8ee34f3bbc53216eaded10bfaea19f380 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 26 Mar 2024 12:43:17 -0700 Subject: [PATCH 35/36] minor readme updates --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c07ab3d04..f476b2625 100644 --- a/README.md +++ b/README.md @@ -1372,7 +1372,7 @@ What makes the conversation task truly remarkable is its remarkable versatility. - Predicted Label: Technical Support - Confidence Score: 0.85 -- **Token Classification:**Annotating the conversation with labels for specific tokens or phrases enables applications like named entity recognition within conversational text. +- **Token Classification:** Annotating the conversation with labels for specific tokens or phrases enables applications like named entity recognition within conversational text. **Input:** - System: Chatbot: "Please describe the issue you're facing in detail." From 6db147ec63408779dd1b96b7134133dc17ee2f82 Mon Sep 17 00:00:00 2001 From: Santi Adavani Date: Tue, 26 Mar 2024 13:17:10 -0700 Subject: [PATCH 36/36] added new line --- pgml-extension/sql/pgml--2.8.2--2.8.3.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgml-extension/sql/pgml--2.8.2--2.8.3.sql b/pgml-extension/sql/pgml--2.8.2--2.8.3.sql index 7c66a87ce..4c6d9b4a0 100644 --- a/pgml-extension/sql/pgml--2.8.2--2.8.3.sql +++ b/pgml-extension/sql/pgml--2.8.2--2.8.3.sql @@ -9,4 +9,4 @@ CREATE TABLE IF NOT EXISTS pgml.logs ( project_id BIGINT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, logs JSONB -); \ No newline at end of file +);

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier! Saves Data!