From 2cf260aa046230fac27ffe0258568043744985d5 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Wed, 8 Nov 2023 13:09:36 -0800 Subject: [PATCH 1/6] Updated to support streaming --- pgml-sdks/pgml/src/languages/javascript.rs | 67 ++++++- pgml-sdks/pgml/src/languages/python.rs | 76 ++++++-- pgml-sdks/pgml/src/transformer_pipeline.rs | 168 +++++++++++++++++- .../rust-bridge-macros/src/javascript.rs | 3 - .../rust-bridge-macros/src/python.rs | 27 ++- 5 files changed, 298 insertions(+), 43 deletions(-) diff --git a/pgml-sdks/pgml/src/languages/javascript.rs b/pgml-sdks/pgml/src/languages/javascript.rs index 2830ff8a1..f668180b6 100644 --- a/pgml-sdks/pgml/src/languages/javascript.rs +++ b/pgml-sdks/pgml/src/languages/javascript.rs @@ -1,8 +1,11 @@ +use futures::StreamExt; use neon::prelude::*; use rust_bridge::javascript::{FromJsType, IntoJsResult}; +use std::sync::Arc; use crate::{ pipeline::PipelineSyncData, + transformer_pipeline::TransformerStream, types::{DateTime, Json}, }; @@ -16,8 +19,9 @@ impl IntoJsResult for DateTime { self, cx: &mut C, ) -> JsResult<'b, Self::Output> { - let date = neon::types::JsDate::new(cx, self.0.assume_utc().unix_timestamp() as f64 * 1000.0) - .expect("Error converting to JS Date"); + let date = + neon::types::JsDate::new(cx, self.0.assume_utc().unix_timestamp() as f64 * 1000.0) + .expect("Error converting to JS Date"); Ok(date) } } @@ -69,6 +73,65 @@ impl IntoJsResult for PipelineSyncData { } } +#[derive(Clone)] +struct TransformerStreamArcMutex(Arc>); + +impl Finalize for TransformerStreamArcMutex {} + +fn transform_stream_iterate_next(mut cx: FunctionContext) -> JsResult { + let this = cx.this(); + let s: Handle> = this + .get(&mut cx, "s") + .expect("Error getting self in transformer_stream_iterate_next"); + let mut b: &JsBox = &s; + let ts: &TransformerStreamArcMutex = &s; + let ts: TransformerStreamArcMutex = ts.clone(); + + let channel = cx.channel(); + let (deferred, promise) = cx.promise(); + crate::get_or_set_runtime().spawn(async move { + let mut ts = ts.0.lock().await; + let v = ts.next().await; + deferred + .try_settle_with(&channel, move |mut cx| { + let o = cx.empty_object(); + if let Some(v) = v { + let v: String = v.expect("Error calling next on TransformerStream"); + let v = cx.string(v); + let d = cx.boolean(false); + o.set(&mut cx, "value", v) + .expect("Error setting object value in transformer_sream_iterate_next"); + o.set(&mut cx, "done", d) + .expect("Error setting object value in transformer_sream_iterate_next"); + } else { + let d = cx.boolean(true); + o.set(&mut cx, "done", d) + .expect("Error setting object value in transformer_sream_iterate_next"); + } + Ok(o) + }) + .expect("Error sending js"); + }); + Ok(promise) +} + +impl IntoJsResult for TransformerStream { + type Output = JsObject; + fn into_js_result<'a, 'b, 'c: 'b, C: Context<'c>>( + self, + cx: &mut C, + ) -> JsResult<'b, Self::Output> { + let o = cx.empty_object(); + let f: Handle = JsFunction::new(cx, transform_stream_iterate_next)?; + o.set(cx, "next", f)?; + let s = cx.boxed(TransformerStreamArcMutex(Arc::new( + tokio::sync::Mutex::new(self), + ))); + o.set(cx, "s", s)?; + Ok(o) + } +} + //////////////////////////////////////////////////////////////////////////////// // JS To Rust ////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// diff --git a/pgml-sdks/pgml/src/languages/python.rs b/pgml-sdks/pgml/src/languages/python.rs index 3d81c9377..77a7a6328 100644 --- a/pgml-sdks/pgml/src/languages/python.rs +++ b/pgml-sdks/pgml/src/languages/python.rs @@ -1,65 +1,99 @@ +use futures::StreamExt; use pyo3::conversion::IntoPy; use pyo3::types::{PyDict, PyFloat, PyInt, PyList, PyString}; use pyo3::{prelude::*, types::PyBool}; +use std::sync::Arc; use rust_bridge::python::CustomInto; -use crate::{pipeline::PipelineSyncData, types::Json}; +use crate::{pipeline::PipelineSyncData, transformer_pipeline::TransformerStream, types::Json}; //////////////////////////////////////////////////////////////////////////////// // Rust to PY ////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -impl ToPyObject for Json { - fn to_object(&self, py: Python) -> PyObject { +impl IntoPy for Json { + fn into_py(self, py: Python) -> PyObject { match &self.0 { - serde_json::Value::Bool(x) => x.to_object(py), + serde_json::Value::Bool(x) => x.into_py(py), serde_json::Value::Number(x) => { if x.is_f64() { x.as_f64() .expect("Error converting to f64 in impl ToPyObject for Json") - .to_object(py) + .into_py(py) } else { x.as_i64() .expect("Error converting to i64 in impl ToPyObject for Json") - .to_object(py) + .into_py(py) } } - serde_json::Value::String(x) => x.to_object(py), + serde_json::Value::String(x) => x.into_py(py), serde_json::Value::Array(x) => { let list = PyList::empty(py); for v in x.iter() { - list.append(Json(v.clone()).to_object(py)).unwrap(); + list.append(Json(v.clone()).into_py(py)).unwrap(); } - list.to_object(py) + list.into_py(py) } serde_json::Value::Object(x) => { let dict = PyDict::new(py); for (k, v) in x.iter() { - dict.set_item(k, Json(v.clone()).to_object(py)).unwrap(); + dict.set_item(k, Json(v.clone()).into_py(py)).unwrap(); } - dict.to_object(py) + dict.into_py(py) } serde_json::Value::Null => py.None(), } } } -impl IntoPy for Json { +impl IntoPy for PipelineSyncData { fn into_py(self, py: Python) -> PyObject { - self.to_object(py) + Json::from(self).into_py(py) } } -impl ToPyObject for PipelineSyncData { - fn to_object(&self, py: Python) -> PyObject { - Json::from(self.clone()).to_object(py) +#[pyclass] +#[derive(Clone)] +struct TransformerStreamPython { + wrapped: Arc>, +} + +#[pymethods] +impl TransformerStreamPython { + fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __anext__<'p>(mut slf: PyRefMut<'_, Self>, py: Python<'p>) -> PyResult> { + let ts = slf.wrapped.clone(); + let fut = pyo3_asyncio::tokio::future_into_py(py, async move { + let mut ts = ts.lock().await; + if let Some(o) = ts.next().await { + Ok(Some(Python::with_gil(|py| { + o.expect("Error calling next on TransformerStream") + .to_object(py) + }))) + } else { + Err(pyo3::exceptions::PyStopAsyncIteration::new_err( + "stream exhausted", + )) + } + })?; + Ok(Some(fut.into())) } } -impl IntoPy for PipelineSyncData { +impl IntoPy for TransformerStream { fn into_py(self, py: Python) -> PyObject { - self.to_object(py) + let f: Py = Py::new( + py, + TransformerStreamPython { + wrapped: Arc::new(tokio::sync::Mutex::new(self)), + }, + ) + .expect("Error converting TransformerStream to TransformerStreamPython"); + f.to_object(py) } } @@ -115,6 +149,12 @@ impl FromPyObject<'_> for PipelineSyncData { } } +impl FromPyObject<'_> for TransformerStream { + fn extract(ob: &PyAny) -> PyResult { + panic!("We must implement this, but this is impossible to be reached") + } +} + //////////////////////////////////////////////////////////////////////////////// // Rust to Rust ////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// diff --git a/pgml-sdks/pgml/src/transformer_pipeline.rs b/pgml-sdks/pgml/src/transformer_pipeline.rs index f28e3106b..0c3d31049 100644 --- a/pgml-sdks/pgml/src/transformer_pipeline.rs +++ b/pgml-sdks/pgml/src/transformer_pipeline.rs @@ -1,5 +1,11 @@ -use rust_bridge::{alias, alias_methods}; -use sqlx::Row; +use futures::Stream; +use rust_bridge::{alias, alias_manual, alias_methods}; +use sqlx::{postgres::PgRow, Row}; +use sqlx::{Postgres, Transaction}; +use std::collections::VecDeque; +use std::future::Future; +use std::pin::Pin; +use std::task::Poll; use tracing::instrument; /// Provides access to builtin database methods @@ -14,7 +20,106 @@ use crate::{get_or_initialize_pool, types::Json}; #[cfg(feature = "python")] use crate::types::JsonPython; -#[alias_methods(new, transform)] +#[derive(alias_manual)] +pub struct TransformerStream { + transaction: Option>, + future: Option, sqlx::Error>> + Send + 'static>>>, + commit: Option> + Send + 'static>>>, + done: bool, + query: String, + db_batch_size: i32, + results: VecDeque, +} + +impl std::fmt::Debug for TransformerStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TransformerStream").finish() + } +} + +impl Clone for TransformerStream { + fn clone(&self) -> Self { + panic!("We shouldn't be doing this"); + } +} + +impl TransformerStream { + fn new(transaction: Transaction<'static, Postgres>, db_batch_size: i32) -> Self { + let query = format!("FETCH {} FROM c", db_batch_size); + Self { + transaction: Some(transaction), + future: None, + commit: None, + done: false, + query, + db_batch_size, + results: VecDeque::new(), + } + } +} + +impl Stream for TransformerStream { + type Item = Result; + + fn poll_next( + mut self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + if self.done { + if let Some(c) = self.commit.as_mut() { + if let Poll::Ready(_) = c.as_mut().poll(cx) { + self.commit = None; + } + } + } else { + if self.future.is_none() { + unsafe { + let s = self.as_mut().get_unchecked_mut(); + let s: *mut Self = s; + let s = Box::leak(Box::from_raw(s)); + s.future = Some(Box::pin( + sqlx::query(&s.query).fetch_all(s.transaction.as_mut().unwrap()), + )); + } + } + + if let Poll::Ready(o) = self.as_mut().future.as_mut().unwrap().as_mut().poll(cx) { + let rows = o?; + if rows.len() < self.db_batch_size as usize { + self.done = true; + unsafe { + let s = self.as_mut().get_unchecked_mut(); + let transaction = std::mem::take(&mut s.transaction).unwrap(); + s.commit = Some(Box::pin(transaction.commit())); + } + } else { + unsafe { + let s = self.as_mut().get_unchecked_mut(); + let s: *mut Self = s; + let s = Box::leak(Box::from_raw(s)); + s.future = Some(Box::pin( + sqlx::query(&s.query).fetch_all(s.transaction.as_mut().unwrap()), + )); + } + } + for r in rows.into_iter() { + self.results.push_back(r) + } + } + } + + if !self.results.is_empty() { + let r = self.results.pop_front().unwrap(); + Poll::Ready(Some(Ok(r.get::(0)))) + } else if self.done { + Poll::Ready(None) + } else { + Poll::Pending + } + } +} + +#[alias_methods(new, transform, transform_stream)] impl TransformerPipeline { pub fn new( task: &str, @@ -54,12 +159,37 @@ impl TransformerPipeline { let results = results.get(0).unwrap().get::(0); Ok(Json(results)) } + + #[instrument(skip(self))] + pub async fn transform_stream( + &self, + inputs: Vec, + args: Option, + batch_size: Option, + ) -> anyhow::Result { + let pool = get_or_initialize_pool(&self.database_url).await?; + let args = args.unwrap_or_default(); + let batch_size = batch_size.unwrap_or(10); + + let mut transaction = pool.begin().await?; + sqlx::query( + "DECLARE c CURSOR FOR SELECT pgml.transform_stream(task => $1, inputs => $2, args => $3)", + ) + .bind(&self.task) + .bind(inputs) + .bind(&args) + .execute(&mut *transaction) + .await?; + + Ok(TransformerStream::new(transaction, batch_size)) + } } #[cfg(test)] mod tests { use super::*; use crate::internal_init_logger; + use futures::StreamExt; #[sqlx::test] async fn transformer_pipeline_can_transform() -> anyhow::Result<()> { @@ -99,4 +229,36 @@ mod tests { assert!(results.as_array().is_some()); Ok(()) } + + #[sqlx::test] + async fn transformer_can_transform_stream() -> anyhow::Result<()> { + internal_init_logger(None, None).ok(); + let t = TransformerPipeline::new( + "text-generation", + Some("TheBloke/zephyr-7B-beta-GGUF".to_string()), + Some( + serde_json::json!({ + "model_file": "zephyr-7b-beta.Q5_K_M.gguf", "model_type": "mistral" + }) + .into(), + ), + None, + ); + let mut stream = t + .transform_stream( + vec!["AI is going to".to_string()], + Some( + serde_json::json!({ + "max_new_tokens": 10 + }) + .into(), + ), + None, + ) + .await?; + while let Some(o) = stream.next().await { + o?; + } + Ok(()) + } } diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs index 5d7f76b02..b38fe2dfc 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs @@ -235,10 +235,7 @@ pub fn generate_javascript_methods( let middle = if method.is_async { quote! { - // let runtime = crate::get_or_set_runtime(); - // let x = runtime.block_on(#wrapped_call); let x = #wrapped_call.await; - } } else { quote! { diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs index 3060656d0..3d34c9a7c 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs @@ -14,22 +14,22 @@ from typing import List, Dict, Optional, Self, Any /// This function assumes the user has already impliemented: /// - `FromPyObject` for the wrapped type -/// - `ToPyObject` for the wrapped type /// - `IntoPy` for the wrapped type pub fn generate_alias_manual(parsed: DeriveInput) -> proc_macro::TokenStream { let name_ident = format_ident!("{}Python", parsed.ident); let wrapped_type_ident = parsed.ident; + let (impl_generics, ty_generics, where_clause) = parsed.generics.split_for_impl(); let expanded = quote! { #[cfg(feature = "python")] #[derive(Clone, Debug)] - pub struct #name_ident { - pub wrapped: #wrapped_type_ident + pub struct #name_ident #ty_generics { + pub wrapped: #wrapped_type_ident #ty_generics } #[cfg(feature = "python")] - impl rust_bridge::python::CustomInto<#name_ident> for #wrapped_type_ident { - fn custom_into(self) -> #name_ident { + impl #impl_generics rust_bridge::python::CustomInto<#name_ident #ty_generics> for #wrapped_type_ident #ty_generics #where_clause { + fn custom_into(self) -> #name_ident #ty_generics { #name_ident { wrapped: self, } @@ -37,15 +37,15 @@ pub fn generate_alias_manual(parsed: DeriveInput) -> proc_macro::TokenStream { } #[cfg(feature = "python")] - impl rust_bridge::python::CustomInto<#wrapped_type_ident> for #name_ident { - fn custom_into(self) -> #wrapped_type_ident { + impl #impl_generics rust_bridge::python::CustomInto<#wrapped_type_ident #ty_generics> for #name_ident #ty_generics #where_clause { + fn custom_into(self) -> #wrapped_type_ident #ty_generics { self.wrapped } } // From Python to Rust #[cfg(feature = "python")] - impl pyo3::conversion::FromPyObject<'_> for #name_ident { + impl #impl_generics pyo3::conversion::FromPyObject<'_> for #name_ident #ty_generics #where_clause { fn extract(obj: &pyo3::PyAny) -> pyo3::PyResult { Ok(Self { wrapped: obj.extract()? @@ -55,17 +55,10 @@ pub fn generate_alias_manual(parsed: DeriveInput) -> proc_macro::TokenStream { // From Rust to Python #[cfg(feature = "python")] - impl pyo3::conversion::ToPyObject for #name_ident { - fn to_object(&self, py: pyo3::Python) -> pyo3::PyObject { - use pyo3::conversion::ToPyObject; - self.wrapped.to_object(py) - } - } - #[cfg(feature = "python")] - impl pyo3::conversion::IntoPy for #name_ident { + impl #impl_generics pyo3::conversion::IntoPy for #name_ident #ty_generics #where_clause { fn into_py(self, py: pyo3::Python) -> pyo3::PyObject { use pyo3::conversion::ToPyObject; - self.wrapped.to_object(py) + self.wrapped.into_py(py) } } }; From 231e2292e18deb867febc59b5427f39bd0ac4fdd Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Wed, 8 Nov 2023 15:11:34 -0800 Subject: [PATCH 2/6] Updated some signatures added some very basic tests and cleaned some stuff up --- .../javascript/tests/typescript-tests/test.ts | 22 +++++++++++++++++++ pgml-sdks/pgml/python/tests/test.py | 21 ++++++++++++++++++ pgml-sdks/pgml/src/languages/javascript.rs | 1 - pgml-sdks/pgml/src/languages/python.rs | 4 ++-- pgml-sdks/pgml/src/transformer_pipeline.rs | 8 +++---- 5 files changed, 49 insertions(+), 7 deletions(-) diff --git a/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts b/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts index 07ce62093..affb314fa 100644 --- a/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts +++ b/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts @@ -280,6 +280,28 @@ it("can order documents", async () => { await collection.archive(); }); +/////////////////////////////////////////////////// +// Transformer Pipeline Tests ///////////////////// +/////////////////////////////////////////////////// + +it("can transformer pipeline", async () => { + const t = pgml.newTransformerPipeline("text-generation"); + const it = await t.transform(["AI is going to"], {max_new_tokens: 5}); + expect(it.length).toBeGreaterThan(0) +}); + +it("can transformer pipeline stream", async () => { + const t = pgml.newTransformerPipeline("text-generation"); + const it = await t.transform_stream("AI is going to", {max_new_tokens: 5}); + let result = await it.next(); + let output = []; + while (!result.done) { + output.push(result.value); + result = await it.next(); + } + expect(output.length).toBeGreaterThan(0) +}); + /////////////////////////////////////////////////// // Test migrations //////////////////////////////// /////////////////////////////////////////////////// diff --git a/pgml-sdks/pgml/python/tests/test.py b/pgml-sdks/pgml/python/tests/test.py index 673b2b876..97ca155f5 100644 --- a/pgml-sdks/pgml/python/tests/test.py +++ b/pgml-sdks/pgml/python/tests/test.py @@ -298,6 +298,27 @@ async def test_order_documents(): await collection.archive() +################################################### +## Transformer Pipeline Tests ##################### +################################################### + + +@pytest.mark.asyncio +async def test_transformer_pipeline(): + t = pgml.TransformerPipeline("text-generation") + it = await t.transform(["AI is going to"], {"max_new_tokens": 5}) + assert (len(it)) > 0 + +@pytest.mark.asyncio +async def test_transformer_pipeline_stream(): + t = pgml.TransformerPipeline("text-generation") + it = await t.transform_stream("AI is going to", {"max_new_tokens": 5}) + total = [] + async for c in it: + total.append(c) + assert (len(total)) > 0 + + ################################################### ## Migration tests ################################ ################################################### diff --git a/pgml-sdks/pgml/src/languages/javascript.rs b/pgml-sdks/pgml/src/languages/javascript.rs index f668180b6..1aafd654b 100644 --- a/pgml-sdks/pgml/src/languages/javascript.rs +++ b/pgml-sdks/pgml/src/languages/javascript.rs @@ -83,7 +83,6 @@ fn transform_stream_iterate_next(mut cx: FunctionContext) -> JsResult let s: Handle> = this .get(&mut cx, "s") .expect("Error getting self in transformer_stream_iterate_next"); - let mut b: &JsBox = &s; let ts: &TransformerStreamArcMutex = &s; let ts: TransformerStreamArcMutex = ts.clone(); diff --git a/pgml-sdks/pgml/src/languages/python.rs b/pgml-sdks/pgml/src/languages/python.rs index 77a7a6328..2cf1bcf9c 100644 --- a/pgml-sdks/pgml/src/languages/python.rs +++ b/pgml-sdks/pgml/src/languages/python.rs @@ -65,7 +65,7 @@ impl TransformerStreamPython { slf } - fn __anext__<'p>(mut slf: PyRefMut<'_, Self>, py: Python<'p>) -> PyResult> { + fn __anext__<'p>(slf: PyRefMut<'_, Self>, py: Python<'p>) -> PyResult> { let ts = slf.wrapped.clone(); let fut = pyo3_asyncio::tokio::future_into_py(py, async move { let mut ts = ts.lock().await; @@ -150,7 +150,7 @@ impl FromPyObject<'_> for PipelineSyncData { } impl FromPyObject<'_> for TransformerStream { - fn extract(ob: &PyAny) -> PyResult { + fn extract(_ob: &PyAny) -> PyResult { panic!("We must implement this, but this is impossible to be reached") } } diff --git a/pgml-sdks/pgml/src/transformer_pipeline.rs b/pgml-sdks/pgml/src/transformer_pipeline.rs index 0c3d31049..af553fe75 100644 --- a/pgml-sdks/pgml/src/transformer_pipeline.rs +++ b/pgml-sdks/pgml/src/transformer_pipeline.rs @@ -163,7 +163,7 @@ impl TransformerPipeline { #[instrument(skip(self))] pub async fn transform_stream( &self, - inputs: Vec, + input: &str, args: Option, batch_size: Option, ) -> anyhow::Result { @@ -173,10 +173,10 @@ impl TransformerPipeline { let mut transaction = pool.begin().await?; sqlx::query( - "DECLARE c CURSOR FOR SELECT pgml.transform_stream(task => $1, inputs => $2, args => $3)", + "DECLARE c CURSOR FOR SELECT pgml.transform_stream(task => $1, input => $2, args => $3)", ) .bind(&self.task) - .bind(inputs) + .bind(input) .bind(&args) .execute(&mut *transaction) .await?; @@ -246,7 +246,7 @@ mod tests { ); let mut stream = t .transform_stream( - vec!["AI is going to".to_string()], + "AI is going to", Some( serde_json::json!({ "max_new_tokens": 10 From bd7f9dc6740f6f2515e1dfa7681e563d3ed4a504 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Wed, 8 Nov 2023 15:18:59 -0800 Subject: [PATCH 3/6] Cleaned up some stuff --- pgml-sdks/pgml/src/transformer_pipeline.rs | 6 ------ pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs | 1 - 2 files changed, 7 deletions(-) diff --git a/pgml-sdks/pgml/src/transformer_pipeline.rs b/pgml-sdks/pgml/src/transformer_pipeline.rs index af553fe75..f7b5f417f 100644 --- a/pgml-sdks/pgml/src/transformer_pipeline.rs +++ b/pgml-sdks/pgml/src/transformer_pipeline.rs @@ -37,12 +37,6 @@ impl std::fmt::Debug for TransformerStream { } } -impl Clone for TransformerStream { - fn clone(&self) -> Self { - panic!("We shouldn't be doing this"); - } -} - impl TransformerStream { fn new(transaction: Transaction<'static, Postgres>, db_batch_size: i32) -> Self { let query = format!("FETCH {} FROM c", db_batch_size); diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs index 3d34c9a7c..73b0b01c2 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs @@ -22,7 +22,6 @@ pub fn generate_alias_manual(parsed: DeriveInput) -> proc_macro::TokenStream { let expanded = quote! { #[cfg(feature = "python")] - #[derive(Clone, Debug)] pub struct #name_ident #ty_generics { pub wrapped: #wrapped_type_ident #ty_generics } From 43a779b34928b8c69a0860ef24828da85a30cca2 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Thu, 9 Nov 2023 08:51:05 -0800 Subject: [PATCH 4/6] Removed python stub file which does not need to be in the repo --- pgml-sdks/pgml/python/pgml/pgml.pyi | 96 ----------------------------- 1 file changed, 96 deletions(-) delete mode 100644 pgml-sdks/pgml/python/pgml/pgml.pyi diff --git a/pgml-sdks/pgml/python/pgml/pgml.pyi b/pgml-sdks/pgml/python/pgml/pgml.pyi deleted file mode 100644 index 5352132a9..000000000 --- a/pgml-sdks/pgml/python/pgml/pgml.pyi +++ /dev/null @@ -1,96 +0,0 @@ - -def init_logger(level: Optional[str] = "", format: Optional[str] = "") -> None -async def migrate() -> None - -Json = Any -DateTime = int - -# Top of file key: A12BECOD! -from typing import List, Dict, Optional, Self, Any - - -class Builtins: - def __init__(self, database_url: Optional[str] = "Default set in Rust. Please check the documentation.") -> Self - ... - def query(self, query: str) -> QueryRunner - ... - async def transform(self, task: Json, inputs: List[str], args: Optional[Json] = Any) -> Json - ... - -class Collection: - def __init__(self, name: str, database_url: Optional[str] = "Default set in Rust. Please check the documentation.") -> Self - ... - async def add_pipeline(self, pipeline: Pipeline) -> None - ... - async def remove_pipeline(self, pipeline: Pipeline) -> None - ... - async def enable_pipeline(self, pipeline: Pipeline) -> None - ... - async def disable_pipeline(self, pipeline: Pipeline) -> None - ... - async def upsert_documents(self, documents: List[Json], args: Optional[Json] = Any) -> None - ... - async def get_documents(self, args: Optional[Json] = Any) -> List[Json] - ... - async def delete_documents(self, filter: Json) -> None - ... - async def vector_search(self, query: str, pipeline: Pipeline, query_parameters: Optional[Json] = Any, top_k: Optional[int] = 1) -> List[tuple[float, str, Json]] - ... - async def archive(self) -> None - ... - def query(self) -> QueryBuilder - ... - async def get_pipelines(self) -> List[Pipeline] - ... - async def get_pipeline(self, name: str) -> Pipeline - ... - async def exists(self) -> bool - ... - async def upsert_directory(self, path: str, args: Json) -> None - ... - async def upsert_file(self, path: str) -> None - ... - -class Model: - def __init__(self, name: Optional[str] = "Default set in Rust. Please check the documentation.", source: Optional[str] = "Default set in Rust. Please check the documentation.", parameters: Optional[Json] = Any, database_url: Optional[str] = "Default set in Rust. Please check the documentation.") -> Self - ... - -class Pipeline: - def __init__(self, name: str, model: Optional[Model] = Any, splitter: Optional[Splitter] = Any, parameters: Optional[Json] = Any) -> Self - ... - async def get_status(self) -> PipelineSyncData - ... - async def to_dict(self) -> Json - ... - -class QueryBuilder: - def limit(self, limit: int) -> Self - ... - def filter(self, filter: Json) -> Self - ... - def vector_recall(self, query: str, pipeline: Pipeline, query_parameters: Optional[Json] = Any) -> Self - ... - async def fetch_all(self) -> List[tuple[float, str, Json]] - ... - def to_full_string(self) -> str - ... - -class QueryRunner: - async def fetch_all(self) -> Json - ... - async def execute(self) -> None - ... - def bind_string(self, bind_value: str) -> Self - ... - def bind_int(self, bind_value: int) -> Self - ... - def bind_float(self, bind_value: float) -> Self - ... - def bind_bool(self, bind_value: bool) -> Self - ... - def bind_json(self, bind_value: Json) -> Self - ... - -class Splitter: - def __init__(self, name: Optional[str] = "Default set in Rust. Please check the documentation.", parameters: Optional[Json] = Any) -> Self - ... From 7322c70e7fb366909896e0c8cbdc7dbf45ff04db Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Thu, 9 Nov 2023 08:54:29 -0800 Subject: [PATCH 5/6] Cleaned up rust bridge macros --- .../rust-bridge-macros/src/python.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs index 73b0b01c2..b0df89c51 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs @@ -18,17 +18,16 @@ from typing import List, Dict, Optional, Self, Any pub fn generate_alias_manual(parsed: DeriveInput) -> proc_macro::TokenStream { let name_ident = format_ident!("{}Python", parsed.ident); let wrapped_type_ident = parsed.ident; - let (impl_generics, ty_generics, where_clause) = parsed.generics.split_for_impl(); let expanded = quote! { #[cfg(feature = "python")] - pub struct #name_ident #ty_generics { - pub wrapped: #wrapped_type_ident #ty_generics + pub struct #name_ident { + pub wrapped: #wrapped_type_ident } #[cfg(feature = "python")] - impl #impl_generics rust_bridge::python::CustomInto<#name_ident #ty_generics> for #wrapped_type_ident #ty_generics #where_clause { - fn custom_into(self) -> #name_ident #ty_generics { + impl rust_bridge::python::CustomInto<#name_ident> for #wrapped_type_ident { + fn custom_into(self) -> #name_ident { #name_ident { wrapped: self, } @@ -36,15 +35,15 @@ pub fn generate_alias_manual(parsed: DeriveInput) -> proc_macro::TokenStream { } #[cfg(feature = "python")] - impl #impl_generics rust_bridge::python::CustomInto<#wrapped_type_ident #ty_generics> for #name_ident #ty_generics #where_clause { - fn custom_into(self) -> #wrapped_type_ident #ty_generics { + impl rust_bridge::python::CustomInto<#wrapped_type_ident> for #name_ident { + fn custom_into(self) -> #wrapped_type_ident { self.wrapped } } // From Python to Rust #[cfg(feature = "python")] - impl #impl_generics pyo3::conversion::FromPyObject<'_> for #name_ident #ty_generics #where_clause { + impl pyo3::conversion::FromPyObject<'_> for #name_ident { fn extract(obj: &pyo3::PyAny) -> pyo3::PyResult { Ok(Self { wrapped: obj.extract()? @@ -54,7 +53,7 @@ pub fn generate_alias_manual(parsed: DeriveInput) -> proc_macro::TokenStream { // From Rust to Python #[cfg(feature = "python")] - impl #impl_generics pyo3::conversion::IntoPy for #name_ident #ty_generics #where_clause { + impl pyo3::conversion::IntoPy for #name_ident { fn into_py(self, py: pyo3::Python) -> pyo3::PyObject { use pyo3::conversion::ToPyObject; self.wrapped.into_py(py) From 01b67b2c9d21f3ddacf49fce2f7d1dd33532934d Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Thu, 9 Nov 2023 10:08:43 -0800 Subject: [PATCH 6/6] Bumped package versions --- pgml-sdks/pgml/Cargo.toml | 2 +- pgml-sdks/pgml/javascript/package.json | 2 +- pgml-sdks/pgml/pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pgml-sdks/pgml/Cargo.toml b/pgml-sdks/pgml/Cargo.toml index 7404acc8d..5db2bbefd 100644 --- a/pgml-sdks/pgml/Cargo.toml +++ b/pgml-sdks/pgml/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pgml" -version = "0.9.5" +version = "0.9.6" edition = "2021" authors = ["PosgresML "] homepage = "https://postgresml.org/" diff --git a/pgml-sdks/pgml/javascript/package.json b/pgml-sdks/pgml/javascript/package.json index 1126b1782..93d41f9ab 100644 --- a/pgml-sdks/pgml/javascript/package.json +++ b/pgml-sdks/pgml/javascript/package.json @@ -1,6 +1,6 @@ { "name": "pgml", - "version": "0.9.5", + "version": "0.9.6", "description": "Open Source Alternative for Building End-to-End Vector Search Applications without OpenAI & Pinecone", "keywords": [ "postgres", diff --git a/pgml-sdks/pgml/pyproject.toml b/pgml-sdks/pgml/pyproject.toml index ffd3b959d..df80ecb74 100644 --- a/pgml-sdks/pgml/pyproject.toml +++ b/pgml-sdks/pgml/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "maturin" [project] name = "pgml" requires-python = ">=3.7" -version = "0.9.5" +version = "0.9.6" description = "Python SDK is designed to facilitate the development of scalable vector search applications on PostgreSQL databases." authors = [ {name = "PostgresML", email = "team@postgresml.org"}, pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy