Content-Length: 40394 | pFad | http://github.com/postgresml/postgresml/pull/1201.patch
thub.com
From e4ae88620fbb23545383f68215adae394f9ba607 Mon Sep 17 00:00:00 2001
From: Lev
Date: Tue, 28 Nov 2023 13:43:11 -0800
Subject: [PATCH 1/9] Python CLI
---
pgml-sdks/pgml/Cargo.lock | 276 ++++++++++++++++++++++-
pgml-sdks/pgml/Cargo.toml | 5 +
pgml-sdks/pgml/python/pgml/__main__.py | 8 +
pgml-sdks/pgml/src/builtins.rs | 2 +-
pgml-sdks/pgml/src/cli.rs | 295 +++++++++++++++++++++++++
pgml-sdks/pgml/src/lib.rs | 3 +
pgml-sdks/pgml/src/sql/fdw.sql | 22 ++
pgml-sdks/pgml/src/sql/fdw_drop.sql | 9 +
pgml-sdks/pgml/src/utils.rs | 4 +-
9 files changed, 614 insertions(+), 10 deletions(-)
create mode 100644 pgml-sdks/pgml/python/pgml/__main__.py
create mode 100644 pgml-sdks/pgml/src/cli.rs
create mode 100644 pgml-sdks/pgml/src/sql/fdw.sql
create mode 100644 pgml-sdks/pgml/src/sql/fdw_drop.sql
diff --git a/pgml-sdks/pgml/Cargo.lock b/pgml-sdks/pgml/Cargo.lock
index c9bff6f4d..93babebf0 100644
--- a/pgml-sdks/pgml/Cargo.lock
+++ b/pgml-sdks/pgml/Cargo.lock
@@ -60,6 +60,54 @@ dependencies = [
"libc",
]
+[[package]]
+name = "anstream"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "317b9a89c1868f5ea6ff1d9539a69f45dffc21ce321ac1fd1160dfa48c8e2140"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
+dependencies = [
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628"
+dependencies = [
+ "anstyle",
+ "windows-sys 0.48.0",
+]
+
[[package]]
name = "anyhow"
version = "1.0.71"
@@ -110,6 +158,12 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+[[package]]
+name = "bitflags"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
+
[[package]]
name = "block-buffer"
version = "0.10.4"
@@ -164,6 +218,63 @@ dependencies = [
"winapi",
]
+[[package]]
+name = "clap"
+version = "4.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41fffed7514f420abec6d183b1d3acfd9099c79c3a10a06ade4f8203f1411272"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63361bae7eef3771745f02d8d892bec2fee5f6e34af316ba556e7f97a7069ff1"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.28",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
+
+[[package]]
+name = "colored"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2674ec482fbc38012cf31e6c42ba0177b431a0cb6f15fe40efa5aab1bda516f6"
+dependencies = [
+ "is-terminal",
+ "lazy_static",
+ "windows-sys 0.48.0",
+]
+
[[package]]
name = "console"
version = "0.15.7"
@@ -269,6 +380,31 @@ dependencies = [
"cfg-if",
]
+[[package]]
+name = "crossterm"
+version = "0.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
+dependencies = [
+ "bitflags 1.3.2",
+ "crossterm_winapi",
+ "libc",
+ "mio",
+ "parking_lot 0.12.1",
+ "signal-hook",
+ "signal-hook-mio",
+ "winapi",
+]
+
+[[package]]
+name = "crossterm_winapi"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
+dependencies = [
+ "winapi",
+]
+
[[package]]
name = "crypto-common"
version = "0.1.6"
@@ -279,6 +415,16 @@ dependencies = [
"typenum",
]
+[[package]]
+name = "ctrlc"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a011bbe2c35ce9c1f143b7af6f94f29a167beb4cd1d29e6740ce836f723120e"
+dependencies = [
+ "nix",
+ "windows-sys 0.48.0",
+]
+
[[package]]
name = "darling"
version = "0.14.4"
@@ -351,6 +497,12 @@ version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
+[[package]]
+name = "dyn-clone"
+version = "1.0.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "545b22097d44f8a9581187cdf93de7a71e4722bf51200cfaba810865b49a495d"
+
[[package]]
name = "either"
version = "1.8.1"
@@ -811,6 +963,22 @@ dependencies = [
"syn 2.0.28",
]
+[[package]]
+name = "inquire"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c33e7c1ddeb15c9abcbfef6029d8e29f69b52b6d6c891031b88ed91b5065803b"
+dependencies = [
+ "bitflags 1.3.2",
+ "crossterm",
+ "dyn-clone",
+ "lazy_static",
+ "newline-converter",
+ "thiserror",
+ "unicode-segmentation",
+ "unicode-width",
+]
+
[[package]]
name = "instant"
version = "0.1.12"
@@ -837,6 +1005,17 @@ version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
+[[package]]
+name = "is-terminal"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
+dependencies = [
+ "hermit-abi 0.3.2",
+ "rustix 0.38.3",
+ "windows-sys 0.48.0",
+]
+
[[package]]
name = "itertools"
version = "0.10.5"
@@ -895,6 +1074,12 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829"
+
[[package]]
name = "lock_api"
version = "0.4.10"
@@ -997,6 +1182,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2"
dependencies = [
"libc",
+ "log",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.48.0",
]
@@ -1060,6 +1246,26 @@ dependencies = [
"smallvec",
]
+[[package]]
+name = "newline-converter"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f71d09d5c87634207f894c6b31b6a2b2c64ea3bdcf71bd5599fdbbe1600c00f"
+dependencies = [
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "nix"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
+dependencies = [
+ "bitflags 1.3.2",
+ "cfg-if",
+ "libc",
+]
+
[[package]]
name = "nom"
version = "7.1.3"
@@ -1117,7 +1323,7 @@ version = "0.10.55"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d"
dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
"cfg-if",
"foreign-types",
"libc",
@@ -1238,8 +1444,13 @@ dependencies = [
"anyhow",
"async-trait",
"chrono",
+ "clap",
+ "colored",
+ "ctrlc",
"futures",
"indicatif",
+ "inquire",
+ "is-terminal",
"itertools",
"lopdf",
"md5",
@@ -1451,7 +1662,7 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
]
[[package]]
@@ -1460,7 +1671,7 @@ version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
]
[[package]]
@@ -1574,11 +1785,24 @@ version = "0.37.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84f3f8f960ed3b5a59055428714943298bf3fa2d4a1d53135084e0544829d995"
dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
"errno",
"io-lifetimes",
"libc",
- "linux-raw-sys",
+ "linux-raw-sys 0.3.8",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "rustix"
+version = "0.38.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac5ffa1efe7548069688cd7028f32591853cd7b5b756d41bcffd2353e4fc75b4"
+dependencies = [
+ "bitflags 2.4.1",
+ "errno",
+ "libc",
+ "linux-raw-sys 0.4.11",
"windows-sys 0.48.0",
]
@@ -1697,7 +1921,7 @@ version = "2.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8"
dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
"core-foundation",
"core-foundation-sys",
"libc",
@@ -1803,6 +2027,36 @@ dependencies = [
"lazy_static",
]
+[[package]]
+name = "signal-hook"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801"
+dependencies = [
+ "libc",
+ "signal-hook-registry",
+]
+
+[[package]]
+name = "signal-hook-mio"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af"
+dependencies = [
+ "libc",
+ "mio",
+ "signal-hook",
+]
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1"
+dependencies = [
+ "libc",
+]
+
[[package]]
name = "slab"
version = "0.4.8"
@@ -1864,7 +2118,7 @@ dependencies = [
"ahash 0.7.6",
"atoi",
"base64 0.13.1",
- "bitflags",
+ "bitflags 1.3.2",
"byteorder",
"bytes",
"crc",
@@ -2012,7 +2266,7 @@ dependencies = [
"cfg-if",
"fastrand",
"redox_syscall 0.3.5",
- "rustix",
+ "rustix 0.37.26",
"windows-sys 0.48.0",
]
@@ -2324,6 +2578,12 @@ dependencies = [
"percent-encoding",
]
+[[package]]
+name = "utf8parse"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
+
[[package]]
name = "uuid"
version = "1.3.4"
diff --git a/pgml-sdks/pgml/Cargo.toml b/pgml-sdks/pgml/Cargo.toml
index 5db2bbefd..d5f685973 100644
--- a/pgml-sdks/pgml/Cargo.toml
+++ b/pgml-sdks/pgml/Cargo.toml
@@ -38,6 +38,11 @@ serde = "1.0.181"
futures = "0.3.28"
walkdir = "2.4.0"
lopdf = { version = "0.31.0", features = ["nom_parser"] }
+clap = { version = "4", features = ["derive"]}
+is-terminal = "0.4"
+colored = "2"
+ctrlc = "3"
+inquire = "0.6"
[features]
default = []
diff --git a/pgml-sdks/pgml/python/pgml/__main__.py b/pgml-sdks/pgml/python/pgml/__main__.py
new file mode 100644
index 000000000..f3f2ec981
--- /dev/null
+++ b/pgml-sdks/pgml/python/pgml/__main__.py
@@ -0,0 +1,8 @@
+import asyncio
+from pgml import cli
+
+async def main():
+ await cli()
+
+asyncio.run(main())
+
diff --git a/pgml-sdks/pgml/src/builtins.rs b/pgml-sdks/pgml/src/builtins.rs
index 188948c72..db023b951 100644
--- a/pgml-sdks/pgml/src/builtins.rs
+++ b/pgml-sdks/pgml/src/builtins.rs
@@ -101,7 +101,7 @@ mod tests {
let query = "SELECT * from pgml.collections";
let results = builtins.query(query).fetch_all().await?;
assert!(results.as_array().is_some());
- Ok(())
+ Ok(())
}
#[sqlx::test]
diff --git a/pgml-sdks/pgml/src/cli.rs b/pgml-sdks/pgml/src/cli.rs
new file mode 100644
index 000000000..9cc4171cf
--- /dev/null
+++ b/pgml-sdks/pgml/src/cli.rs
@@ -0,0 +1,295 @@
+use clap::{Parser, Subcommand};
+use colored::Colorize;
+use inquire::Text;
+use is_terminal::IsTerminal;
+use itertools::Itertools;
+use pyo3::exceptions::PyRuntimeError;
+use pyo3::prelude::*;
+use sqlx::{Acquire, Executor};
+use std::io::Write;
+
+//github.com/ PostgresML CLI
+#[derive(Parser, Debug, Clone)]
+#[command(author, version, about, long_about = None, name = "pgml", bin_name = "pgml")]
+struct Python {
+ //github.com/ We're running this as `python -m`, this argument is ignored
+ #[arg(short)]
+ module: Option,
+
+ #[command(subcommand)]
+ subcomand: Subcommands,
+}
+
+#[derive(Subcommand, Debug, Clone)]
+enum Subcommands {
+ //github.com/ Connect your PostgresML database to another PostgreSQL database.
+ Connect {
+ //github.com/ Name for this connection. Allows to configure multiple connections
+ //github.com/ from PostgresML to any number of databases.
+ #[arg(long)]
+ name: Option,
+
+ //github.com/ Host name or IP address of your database.
+ //github.com/ The database must be reachable from our cloud via a private link
+ //github.com/ or the Internet.
+ #[arg(long)]
+ host: Option,
+
+ //github.com/ The port on which the database server is running.
+ #[arg(long)]
+ port: Option,
+
+ //github.com/ A user that has read permissions to your schemas and tables.
+ #[arg(long)]
+ user: Option,
+
+ //github.com/ The password for the user.
+ #[arg(long)]
+ password: Option,
+
+ //github.com/ The name of the Postgres database.
+ #[arg(long)]
+ database_name: Option,
+
+ //github.com/ If you're using another schema that's not public,
+ //github.com/ you can specify it here.
+ #[arg(long)]
+ schema: Option,
+
+ //github.com/ Don't do anything, just print the commands.
+ #[arg(long, default_value = "false")]
+ dry_run: bool,
+
+ //github.com/ Drop the connection before creating it.
+ #[arg(long, default_value = "false")]
+ drop: bool,
+
+ //github.com/ DATABASE_URL for your PostgresML database.
+ #[arg(long)]
+ database_url: Option,
+ },
+}
+
+enum Level {
+ Happy,
+ Sad,
+ #[allow(dead_code)]
+ Concerned,
+}
+
+#[pyfunction]
+pub fn cli(py: pyo3::Python) -> pyo3::PyResult<&pyo3::PyAny> {
+ ctrlc::set_handler(move || {
+ println!("");
+ std::process::exit(1);
+ })
+ .expect("failed to set ctrl-c handler");
+
+ pyo3_asyncio::tokio::future_into_py(py, async move {
+ match cli_internal().await {
+ Ok(_) => Ok(()),
+ Err(err) => Err(PyRuntimeError::new_err(format!("{}", err))),
+ }
+ })
+}
+
+async fn cli_internal() -> anyhow::Result<()> {
+ let args = Python::parse();
+
+ match args.subcomand {
+ Subcommands::Connect {
+ name,
+ host,
+ port,
+ user,
+ password,
+ database_name,
+ dry_run,
+ schema,
+ drop,
+ database_url,
+ } => {
+ connect(
+ name,
+ host,
+ port,
+ user,
+ password,
+ database_name,
+ schema,
+ dry_run,
+ drop,
+ database_url,
+ )
+ .await?;
+ }
+ };
+
+ Ok(())
+}
+
+async fn execute_sql(sql: &str) -> anyhow::Result<()> {
+ let pool = crate::get_or_initialize_pool(&None).await?;
+ let mut connection = pool.acquire().await?;
+ let mut transaction = connection.begin().await?;
+
+ for query in sql.split(";") {
+ transaction.execute(query).await?;
+ }
+
+ transaction.commit().await?;
+
+ Ok(())
+}
+
+async fn connect(
+ name: Option,
+ host: Option,
+ port: Option,
+ user: Option,
+ password: Option,
+ database_name: Option,
+ schema: Option,
+ dry_run: bool,
+ drop: bool,
+ database_url: Option,
+) -> anyhow::Result<()> {
+ println!("");
+ println!("The connector will configure a Postgres Foreign Data Wrapper connection");
+ println!("from PostgresML to your Postgres database of choice. If we're missing any details,");
+ println!("we'll ask for them now.");
+ println!("");
+
+ if std::env::var("DATABASE_URL").is_err() && database_url.is_none() {
+ println!("Looks like the DATABASE_URL environment variable is not set.");
+ println!("We need it to be able to connect to your PostgresML database.");
+ println!("");
+ let database_url = user_input!(None::, "DATABASE_URL");
+ std::env::set_var("DATABASE_URL", database_url);
+ println!("");
+ } else if let Some(database_url) = database_url {
+ std::env::set_var("DATABASE_URL", database_url);
+ }
+
+ let name = user_input!(name, "Connection name", Some("production"));
+ let host = user_input!(host, "PostgreSQL host");
+ let port = user_input!(port, "PostgreSQL port", Some("5432"));
+ let user = user_input!(user, "PostgreSQL user", Some("postgres"));
+ let password = user_input!(password, "Password");
+ let database_name = user_input!(database_name, "PostgreSQL database", Some("postgres"));
+ let schema = user_input!(schema, "PostgreSQL schema", Some("public"));
+
+ let sql = include_str!("sql/fdw.sql")
+ .replace("{host}", &host)
+ .replace("{port}", &port)
+ .replace("{user}", &user)
+ .replace("{password}", &password)
+ .replace("{database_name}", &database_name)
+ .replace("{db_name}", &name)
+ .replace("{schema}", &schema);
+ let drop_sql = include_str!("sql/fdw_drop.sql")
+ .replace("{db_name}", &name)
+ .replace("{schema}", &schema);
+
+ if dry_run {
+ println!("");
+ if drop {
+ println!("{}", syntax_highlight(&drop_sql));
+ }
+ println!("{}", syntax_highlight(&sql));
+ println!("");
+ } else {
+ println!("");
+ print!("Everything looks good, creating connection...");
+ std::io::stdout().flush().unwrap();
+
+ if drop {
+ match execute_sql(&drop_sql).await {
+ Ok(_) => (),
+ Err(err) => {
+ println!("{}", colorize("error", Level::Sad));
+ println!("{}", err);
+ std::process::exit(1);
+ }
+ };
+ }
+
+ match execute_sql(&sql).await {
+ Ok(_) => {
+ println!("{}", colorize("done", Level::Happy));
+ println!("");
+ println!("You can now use your PostgreSQL tables inside your PostgresML database.");
+ println!("If you connect with psql, you can view your tables by updating your search_path:");
+ println!("");
+ println!(
+ "{}",
+ syntax_highlight(&format!("SET search_path TO {}_public, public;", name))
+ );
+ println!("");
+ }
+ Err(err) => {
+ println!("{}", colorize("error", Level::Sad));
+ println!("{}", err);
+ }
+ };
+ }
+
+ Ok(())
+}
+
+fn syntax_highlight(text: &str) -> String {
+ if !std::io::stdout().is_terminal() {
+ return text.to_owned();
+ }
+
+ text.split(" ")
+ .into_iter()
+ .map(|word| {
+ let uppercase = word.chars().all(|c| c.is_ascii_uppercase());
+
+ if uppercase {
+ word.cyan().to_string()
+ } else {
+ word.to_owned()
+ }
+ })
+ .join(" ")
+}
+
+fn colorize(text: &str, level: Level) -> String {
+ if !std::io::stdout().is_terminal() {
+ return text.to_owned();
+ }
+
+ match level {
+ Level::Happy => text.green().to_string(),
+ Level::Sad => text.red().to_string(),
+ Level::Concerned => text.yellow().to_string(),
+ }
+}
+
+macro_rules! user_input {
+ ($var:expr, $prompt:expr, $default:expr) => {{
+ if $var.is_none() {
+ let prompt = format!("{}:", $prompt);
+ let prompt = if let Some(default) = $default {
+ Text::new(&prompt).with_default(default).prompt()?
+ } else {
+ Text::new(&prompt).prompt()?
+ };
+ prompt.to_string()
+ } else {
+ $var.unwrap()
+ }
+ }};
+
+ ($var:expr, $prompt:expr) => {{
+ user_input!($var, $prompt, None)
+ }};
+
+ ($var:expr) => {{
+ user_input!($var, strginfy!($var))
+ }};
+}
+
+use user_input;
diff --git a/pgml-sdks/pgml/src/lib.rs b/pgml-sdks/pgml/src/lib.rs
index cd0eaaeef..5da2c3f1f 100644
--- a/pgml-sdks/pgml/src/lib.rs
+++ b/pgml-sdks/pgml/src/lib.rs
@@ -13,6 +13,8 @@ use tracing::Level;
use tracing_subscriber::FmtSubscriber;
mod builtins;
+#[cfg(feature = "python")]
+mod cli;
mod collection;
mod filter_builder;
mod languages;
@@ -146,6 +148,7 @@ fn migrate(py: pyo3::Python) -> pyo3::PyResult<&pyo3::PyAny> {
fn pgml(_py: pyo3::Python, m: &pyo3::types::PyModule) -> pyo3::PyResult<()> {
m.add_function(pyo3::wrap_pyfunction!(init_logger, m)?)?;
m.add_function(pyo3::wrap_pyfunction!(migrate, m)?)?;
+ m.add_function(pyo3::wrap_pyfunction!(cli::cli, m)?)?;
m.add_class::()?;
m.add_class::()?;
m.add_class::()?;
diff --git a/pgml-sdks/pgml/src/sql/fdw.sql b/pgml-sdks/pgml/src/sql/fdw.sql
new file mode 100644
index 000000000..059336bc4
--- /dev/null
+++ b/pgml-sdks/pgml/src/sql/fdw.sql
@@ -0,0 +1,22 @@
+
+ CREATE SERVER "{db_name}"
+ FOREIGN DATA WRAPPER postgres_fdw
+ OPTIONS (
+ host '{host}',
+ port '{port}',
+ dbname '{database_name}'
+ );
+
+ CREATE USER MAPPING
+ FOR CURRENT_USER
+ SERVER "{db_name}"
+ OPTIONS (
+ user '{user}',
+ password '{password}'
+ );
+
+ CREATE SCHEMA "{db_name}_{schema}";
+
+ IMPORT FOREIGN SCHEMA "{schema}"
+ FROM SERVER "{db_name}"
+ INTO "{db_name}_{schema}";
diff --git a/pgml-sdks/pgml/src/sql/fdw_drop.sql b/pgml-sdks/pgml/src/sql/fdw_drop.sql
new file mode 100644
index 000000000..52631e002
--- /dev/null
+++ b/pgml-sdks/pgml/src/sql/fdw_drop.sql
@@ -0,0 +1,9 @@
+
+ DROP SCHEMA IF EXISTS "{db_name}_{schema}" CASCADE;
+
+ DROP USER MAPPING IF EXISTS
+ FOR CURRENT_USER
+ SERVER "{db_name}"
+ CASCADE;
+
+ DROP SERVER IF EXISTS "{db_name}" CASCADE;
diff --git a/pgml-sdks/pgml/src/utils.rs b/pgml-sdks/pgml/src/utils.rs
index c8a2f8039..a8c040bc9 100644
--- a/pgml-sdks/pgml/src/utils.rs
+++ b/pgml-sdks/pgml/src/utils.rs
@@ -49,7 +49,9 @@ pub fn get_file_contents(path: &Path) -> anyhow::Result {
"pdf" => {
let doc = Document::load(path)
.with_context(|| format!("Error reading PDF file: {}", path.display()))?;
- doc.get_pages().into_keys().map(|page_number| {
+ doc.get_pages()
+ .into_keys()
+ .map(|page_number| {
doc.extract_text(&[page_number]).with_context(|| {
format!("Error extracting content from PDF file: {}", path.display())
})
From b5c2af39c379ef7aaeea1724b29aa13be72148e5 Mon Sep 17 00:00:00 2001
From: Lev
Date: Tue, 28 Nov 2023 13:46:21 -0800
Subject: [PATCH 2/9] version
---
pgml-sdks/pgml/Cargo.lock | 2 +-
pgml-sdks/pgml/Cargo.toml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/pgml-sdks/pgml/Cargo.lock b/pgml-sdks/pgml/Cargo.lock
index 93babebf0..b377c4167 100644
--- a/pgml-sdks/pgml/Cargo.lock
+++ b/pgml-sdks/pgml/Cargo.lock
@@ -1439,7 +1439,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
[[package]]
name = "pgml"
-version = "0.9.6"
+version = "0.10.0"
dependencies = [
"anyhow",
"async-trait",
diff --git a/pgml-sdks/pgml/Cargo.toml b/pgml-sdks/pgml/Cargo.toml
index d5f685973..ac7dc23ed 100644
--- a/pgml-sdks/pgml/Cargo.toml
+++ b/pgml-sdks/pgml/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "pgml"
-version = "0.9.6"
+version = "0.10.0"
edition = "2021"
authors = ["PosgresML "]
homepage = "https://postgresml.org/"
From dd054832d19ec5e9f2d7e0ba6375dad621ec03a7 Mon Sep 17 00:00:00 2001
From: Lev Kokotov
Date: Wed, 29 Nov 2023 15:55:17 -0800
Subject: [PATCH 3/9] JS CLI
---
pgml-sdks/pgml/javascript/package-lock.json | 4 +-
pgml-sdks/pgml/src/cli.rs | 74 +++++++++++++++++++--
pgml-sdks/pgml/src/lib.rs | 14 +++-
pgml-sdks/pgml/src/sql/fdw_drop.sql | 3 +-
4 files changed, 83 insertions(+), 12 deletions(-)
diff --git a/pgml-sdks/pgml/javascript/package-lock.json b/pgml-sdks/pgml/javascript/package-lock.json
index 08aedb865..9ab5f611e 100644
--- a/pgml-sdks/pgml/javascript/package-lock.json
+++ b/pgml-sdks/pgml/javascript/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "pgml",
- "version": "0.8.1",
+ "version": "0.9.6",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "pgml",
- "version": "0.8.1",
+ "version": "0.9.6",
"license": "MIT",
"devDependencies": {
"@types/node": "^20.3.1",
diff --git a/pgml-sdks/pgml/src/cli.rs b/pgml-sdks/pgml/src/cli.rs
index 9cc4171cf..9c1733347 100644
--- a/pgml-sdks/pgml/src/cli.rs
+++ b/pgml-sdks/pgml/src/cli.rs
@@ -3,12 +3,15 @@ use colored::Colorize;
use inquire::Text;
use is_terminal::IsTerminal;
use itertools::Itertools;
+#[cfg(feature = "python")]
use pyo3::exceptions::PyRuntimeError;
+#[cfg(feature = "python")]
use pyo3::prelude::*;
use sqlx::{Acquire, Executor};
use std::io::Write;
//github.com/ PostgresML CLI
+// #[cfg(feature = "python")]
#[derive(Parser, Debug, Clone)]
#[command(author, version, about, long_about = None, name = "pgml", bin_name = "pgml")]
struct Python {
@@ -17,7 +20,20 @@ struct Python {
module: Option,
#[command(subcommand)]
- subcomand: Subcommands,
+ subcommand: Subcommands,
+}
+
+//github.com/ PostgresML CLI
+// #[cfg(feature = "python")]
+#[derive(Parser, Debug, Clone)]
+#[command(author, version, about, long_about = None, name = "pgml", bin_name = "pgml")]
+struct Javascript {
+ //github.com/ Ignore this argument, we're running as `node`.
+ #[arg(name = "pgmlcli")]
+ pgmlcli: Option,
+
+ #[command(subcommand)]
+ subcommand: Subcommands,
}
#[derive(Subcommand, Debug, Clone)]
@@ -77,6 +93,7 @@ enum Level {
Concerned,
}
+#[cfg(feature = "python")]
#[pyfunction]
pub fn cli(py: pyo3::Python) -> pyo3::PyResult<&pyo3::PyAny> {
ctrlc::set_handler(move || {
@@ -93,10 +110,57 @@ pub fn cli(py: pyo3::Python) -> pyo3::PyResult<&pyo3::PyAny> {
})
}
+#[cfg(feature = "javascript")]
+pub fn cli(
+ mut cx: neon::context::FunctionContext,
+) -> neon::result::JsResult {
+ ctrlc::set_handler(move || {
+ println!("");
+ std::process::exit(1);
+ })
+ .expect("failed to set ctrl-c handler");
+
+ use neon::prelude::*;
+ use rust_bridge::javascript::IntoJsResult;
+ let channel = cx.channel();
+ let (deferred, promise) = cx.promise();
+ deferred
+ .try_settle_with(&channel, move |mut cx| {
+ let runtime = crate::get_or_set_runtime();
+ let x = runtime.block_on(cli_internal());
+ let x = match x {
+ Ok(x) => x,
+ Err(e) => {
+ // Node has its own ctrl-c handler, so we need to handle it here.
+ if e.to_string()
+ .contains("Operation was interrupted by the user")
+ {
+ std::process::exit(1);
+ } else {
+ panic!("{e}");
+ }
+ }
+ };
+ x.into_js_result(&mut cx)
+ })
+ .expect("Error sending js");
+ Ok(promise)
+}
+
async fn cli_internal() -> anyhow::Result<()> {
- let args = Python::parse();
+ #[cfg(feature = "python")]
+ let subcommand = {
+ let args = Python::parse();
+ args.subcommand
+ };
+
+ #[cfg(feature = "javascript")]
+ let subcommand = {
+ let args = Javascript::parse();
+ args.subcommand
+ };
- match args.subcomand {
+ match subcommand {
Subcommands::Connect {
name,
host,
@@ -161,8 +225,8 @@ async fn connect(
println!("");
if std::env::var("DATABASE_URL").is_err() && database_url.is_none() {
- println!("Looks like the DATABASE_URL environment variable is not set.");
- println!("We need it to be able to connect to your PostgresML database.");
+ println!("Required DATABASE_URL environment variable is not set.");
+ println!("We need it to connect to your PostgresML database.");
println!("");
let database_url = user_input!(None::, "DATABASE_URL");
std::env::set_var("DATABASE_URL", database_url);
diff --git a/pgml-sdks/pgml/src/lib.rs b/pgml-sdks/pgml/src/lib.rs
index 5da2c3f1f..e8056dd9f 100644
--- a/pgml-sdks/pgml/src/lib.rs
+++ b/pgml-sdks/pgml/src/lib.rs
@@ -4,7 +4,7 @@
//!
//! With this SDK, you can seamlessly manage various database tables related to documents, text chunks, text splitters, LLM (Language Model) models, and embeddings. By leveraging the SDK's capabilities, you can efficiently index LLM embeddings using PgVector for fast and accurate queries.
-use sqlx::PgPool;
+use sqlx::{postgres::PgPoolOptions, PgPool};
use std::collections::HashMap;
use std::env;
use std::sync::RwLock;
@@ -13,7 +13,6 @@ use tracing::Level;
use tracing_subscriber::FmtSubscriber;
mod builtins;
-#[cfg(feature = "python")]
mod cli;
mod collection;
mod filter_builder;
@@ -62,7 +61,15 @@ async fn get_or_initialize_pool(database_url: &Option) -> anyhow::Result
if let Some(pool) = pools.get(url) {
Ok(pool.clone())
} else {
- let pool = PgPool::connect_lazy(url)?;
+ let timeout = std::env::var("PGML_CHECKOUT_TIMEOUT")
+ .unwrap_or_else(|_| "5000".to_string())
+ .parse::()
+ .expect("Error parsing PGML_CHECKOUT_TIMEOUT, expected an integer");
+
+ let pool = PgPoolOptions::new()
+ .acquire_timeout(std::time::Duration::from_millis(timeout))
+ .connect_lazy(&url)?;
+
pools.insert(url.to_string(), pool.clone());
Ok(pool)
}
@@ -195,6 +202,7 @@ fn migrate(
fn main(mut cx: neon::context::ModuleContext) -> neon::result::NeonResult<()> {
cx.export_function("init_logger", init_logger)?;
cx.export_function("migrate", migrate)?;
+ cx.export_function("cli", cli::cli)?;
cx.export_function("newCollection", collection::CollectionJavascript::new)?;
cx.export_function("newModel", model::ModelJavascript::new)?;
cx.export_function("newSplitter", splitter::SplitterJavascript::new)?;
diff --git a/pgml-sdks/pgml/src/sql/fdw_drop.sql b/pgml-sdks/pgml/src/sql/fdw_drop.sql
index 52631e002..d1f0deb9f 100644
--- a/pgml-sdks/pgml/src/sql/fdw_drop.sql
+++ b/pgml-sdks/pgml/src/sql/fdw_drop.sql
@@ -3,7 +3,6 @@
DROP USER MAPPING IF EXISTS
FOR CURRENT_USER
- SERVER "{db_name}"
- CASCADE;
+ SERVER "{db_name}";
DROP SERVER IF EXISTS "{db_name}" CASCADE;
From d95e1ee2554bf9f7fc03fdeb940bde7804288d6c Mon Sep 17 00:00:00 2001
From: Lev Kokotov
Date: Wed, 29 Nov 2023 15:58:20 -0800
Subject: [PATCH 4/9] Javascript CLI
---
pgml-sdks/pgml/javascript-cli/index.js | 3 ++
.../pgml/javascript-cli/package-lock.json | 32 +++++++++++++++++++
pgml-sdks/pgml/javascript-cli/package.json | 27 ++++++++++++++++
3 files changed, 62 insertions(+)
create mode 100755 pgml-sdks/pgml/javascript-cli/index.js
create mode 100644 pgml-sdks/pgml/javascript-cli/package-lock.json
create mode 100644 pgml-sdks/pgml/javascript-cli/package.json
diff --git a/pgml-sdks/pgml/javascript-cli/index.js b/pgml-sdks/pgml/javascript-cli/index.js
new file mode 100755
index 000000000..165c4f768
--- /dev/null
+++ b/pgml-sdks/pgml/javascript-cli/index.js
@@ -0,0 +1,3 @@
+#!/usr/bin/env node
+const pgml = require("pgml");
+pgml.cli().then(() => {});
diff --git a/pgml-sdks/pgml/javascript-cli/package-lock.json b/pgml-sdks/pgml/javascript-cli/package-lock.json
new file mode 100644
index 000000000..650c18fc4
--- /dev/null
+++ b/pgml-sdks/pgml/javascript-cli/package-lock.json
@@ -0,0 +1,32 @@
+{
+ "name": "pgml-cli",
+ "version": "0.10.0",
+ "lockfileVersion": 3,
+ "requires": true,
+ "packages": {
+ "": {
+ "name": "pgml-cli",
+ "version": "0.10.0",
+ "license": "MIT",
+ "dependencies": {
+ "pgml": "file:../javascript"
+ },
+ "bin": {
+ "pgml": "index.js"
+ }
+ },
+ "../javascript": {
+ "name": "pgml",
+ "version": "0.9.6",
+ "license": "MIT",
+ "devDependencies": {
+ "@types/node": "^20.3.1",
+ "cargo-cp-artifact": "^0.1"
+ }
+ },
+ "node_modules/pgml": {
+ "resolved": "../javascript",
+ "link": true
+ }
+ }
+}
diff --git a/pgml-sdks/pgml/javascript-cli/package.json b/pgml-sdks/pgml/javascript-cli/package.json
new file mode 100644
index 000000000..26f8c03db
--- /dev/null
+++ b/pgml-sdks/pgml/javascript-cli/package.json
@@ -0,0 +1,27 @@
+{
+ "name": "pgml-cli",
+ "version": "0.10.0",
+ "description": "Open Source Alternative for Building End-to-End Vector Search Applications without OpenAI & Pinecone",
+ "keywords": [
+ "postgres",
+ "machine learning",
+ "vector databases",
+ "embeddings"
+ ],
+ "bin": {
+ "pgml": "index.js"
+ },
+ "author": {
+ "name": "PostgresML",
+ "email": "team@postgresml.org",
+ "url": "https://postgresml.org"
+ },
+ "repository": {
+ "type": "git",
+ "url": "https://github.com/postgresml/postgresml"
+ },
+ "license": "MIT",
+ "dependencies": {
+ "pgml": "file:../javascript"
+ }
+}
From b9c4eb5caf80daf70fe282d32002dd2b7b9f58f4 Mon Sep 17 00:00:00 2001
From: Lev Kokotov
Date: Wed, 29 Nov 2023 16:01:37 -0800
Subject: [PATCH 5/9] fix deps
---
pgml-sdks/pgml/javascript-cli/package.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pgml-sdks/pgml/javascript-cli/package.json b/pgml-sdks/pgml/javascript-cli/package.json
index 26f8c03db..e4df2a5e5 100644
--- a/pgml-sdks/pgml/javascript-cli/package.json
+++ b/pgml-sdks/pgml/javascript-cli/package.json
@@ -22,6 +22,6 @@
},
"license": "MIT",
"dependencies": {
- "pgml": "file:../javascript"
+ "pgml": "0.10.0"
}
}
From f3c78e13d30f59a9863f2b2a68c97c5c9f8901ba Mon Sep 17 00:00:00 2001
From: Lev Kokotov
Date: Wed, 29 Nov 2023 16:02:48 -0800
Subject: [PATCH 6/9] desc
---
pgml-sdks/pgml/javascript-cli/package.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pgml-sdks/pgml/javascript-cli/package.json b/pgml-sdks/pgml/javascript-cli/package.json
index e4df2a5e5..791b52e6a 100644
--- a/pgml-sdks/pgml/javascript-cli/package.json
+++ b/pgml-sdks/pgml/javascript-cli/package.json
@@ -1,7 +1,7 @@
{
"name": "pgml-cli",
"version": "0.10.0",
- "description": "Open Source Alternative for Building End-to-End Vector Search Applications without OpenAI & Pinecone",
+ "description": "CLI for PostgresML, the GPU-powered AI application database.",
"keywords": [
"postgres",
"machine learning",
From 2500645de926b4938f0da390689e68eed30454f8 Mon Sep 17 00:00:00 2001
From: Lev Kokotov
Date: Wed, 29 Nov 2023 16:05:50 -0800
Subject: [PATCH 7/9] cleanup
---
pgml-sdks/pgml/python/pgml/__main__.py | 3 +--
pgml-sdks/pgml/src/cli.rs | 4 ++--
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/pgml-sdks/pgml/python/pgml/__main__.py b/pgml-sdks/pgml/python/pgml/__main__.py
index f3f2ec981..9bbdae756 100644
--- a/pgml-sdks/pgml/python/pgml/__main__.py
+++ b/pgml-sdks/pgml/python/pgml/__main__.py
@@ -2,7 +2,6 @@
from pgml import cli
async def main():
- await cli()
+ await cli()
asyncio.run(main())
-
diff --git a/pgml-sdks/pgml/src/cli.rs b/pgml-sdks/pgml/src/cli.rs
index 9c1733347..359006c9f 100644
--- a/pgml-sdks/pgml/src/cli.rs
+++ b/pgml-sdks/pgml/src/cli.rs
@@ -11,7 +11,7 @@ use sqlx::{Acquire, Executor};
use std::io::Write;
//github.com/ PostgresML CLI
-// #[cfg(feature = "python")]
+#[cfg(feature = "python")]
#[derive(Parser, Debug, Clone)]
#[command(author, version, about, long_about = None, name = "pgml", bin_name = "pgml")]
struct Python {
@@ -24,7 +24,7 @@ struct Python {
}
//github.com/ PostgresML CLI
-// #[cfg(feature = "python")]
+#[cfg(feature = "javascript")]
#[derive(Parser, Debug, Clone)]
#[command(author, version, about, long_about = None, name = "pgml", bin_name = "pgml")]
struct Javascript {
From bbef1b8835439839fa239805b65ebc12dc475145 Mon Sep 17 00:00:00 2001
From: Lev Kokotov
Date: Wed, 29 Nov 2023 16:12:25 -0800
Subject: [PATCH 8/9] syntax highlight hah
---
pgml-sdks/pgml/src/sql/fdw.sql | 4 ++--
pgml-sdks/pgml/src/sql/fdw_drop.sql | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/pgml-sdks/pgml/src/sql/fdw.sql b/pgml-sdks/pgml/src/sql/fdw.sql
index 059336bc4..75ae64f00 100644
--- a/pgml-sdks/pgml/src/sql/fdw.sql
+++ b/pgml-sdks/pgml/src/sql/fdw.sql
@@ -7,8 +7,8 @@
dbname '{database_name}'
);
- CREATE USER MAPPING
- FOR CURRENT_USER
+ CREATE USER MAPPING
+ FOR CURRENT_USER
SERVER "{db_name}"
OPTIONS (
user '{user}',
diff --git a/pgml-sdks/pgml/src/sql/fdw_drop.sql b/pgml-sdks/pgml/src/sql/fdw_drop.sql
index d1f0deb9f..8735489ee 100644
--- a/pgml-sdks/pgml/src/sql/fdw_drop.sql
+++ b/pgml-sdks/pgml/src/sql/fdw_drop.sql
@@ -1,7 +1,7 @@
DROP SCHEMA IF EXISTS "{db_name}_{schema}" CASCADE;
- DROP USER MAPPING IF EXISTS
+ DROP USER MAPPING IF EXISTS
FOR CURRENT_USER
SERVER "{db_name}";
From c4c1c50fdcc69871412a2ec2be37aae5f0c24197 Mon Sep 17 00:00:00 2001
From: Lev
Date: Fri, 1 Dec 2023 16:48:24 -0800
Subject: [PATCH 9/9] version bump
---
pgml-sdks/pgml/pyproject.toml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pgml-sdks/pgml/pyproject.toml b/pgml-sdks/pgml/pyproject.toml
index df80ecb74..0d1843cbb 100644
--- a/pgml-sdks/pgml/pyproject.toml
+++ b/pgml-sdks/pgml/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "maturin"
[project]
name = "pgml"
requires-python = ">=3.7"
-version = "0.9.6"
+version = "0.10.0"
description = "Python SDK is designed to facilitate the development of scalable vector search applications on PostgreSQL databases."
authors = [
{name = "PostgresML", email = "team@postgresml.org"},
--- a PPN by Garber Painting Akron. With Image Size Reduction included!Fetched URL: http://github.com/postgresml/postgresml/pull/1201.patch
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy