From 73331fcc5d78766462609f4cce89d684dfae5469 Mon Sep 17 00:00:00 2001
From: Dan <daniel@datafold.com>
Date: Wed, 22 Nov 2023 15:32:37 -0700
Subject: [PATCH] revert databricks information_schema

would need to update process_table_schema as well
---
 data_diff/databases/databricks.py | 69 ++++++++++++++-----------------
 1 file changed, 30 insertions(+), 39 deletions(-)

diff --git a/data_diff/databases/databricks.py b/data_diff/databases/databricks.py
index de5ea8b7..c755cfa9 100644
--- a/data_diff/databases/databricks.py
+++ b/data_diff/databases/databricks.py
@@ -139,47 +139,38 @@ def create_connection(self):
             raise ConnectionError(*e.args) from e
 
     def query_table_schema(self, path: DbPath) -> Dict[str, tuple]:
+        # Databricks has INFORMATION_SCHEMA only for Databricks Runtime, not for Databricks SQL.
+        # https://docs.databricks.com/spark/latest/spark-sql/language-manual/information-schema/columns.html
+        # So, to obtain information about schema, we should use another approach.
+
         conn = self.create_connection()
-        table_schema = {}
 
-        try:
-            table_schema = super().query_table_schema(path)
-        except:
-            logging.warning("Failed to get schema from information_schema, falling back to legacy approach.")
-
-        if not table_schema:
-            # This legacy approach can cause bugs. e.g. VARCHAR(255) -> VARCHAR(255)
-            # and not the expected VARCHAR
-
-            # I don't think we'll fall back to this approach, but if so, see above
-            catalog, schema, table = self._normalize_table_path(path)
-            with conn.cursor() as cursor:
-                cursor.columns(catalog_name=catalog, schema_name=schema, table_name=table)
-                try:
-                    rows = cursor.fetchall()
-                finally:
-                    conn.close()
-                if not rows:
-                    raise RuntimeError(f"{self.name}: Table '{'.'.join(path)}' does not exist, or has no columns")
-
-                table_schema = {r.COLUMN_NAME: (r.COLUMN_NAME, r.TYPE_NAME, r.DECIMAL_DIGITS, None, None) for r in rows}
-                assert len(table_schema) == len(rows)
-                return table_schema
-        else:
-            return table_schema
-
-    def select_table_schema(self, path: DbPath) -> str:
-        """Provide SQL for selecting the table schema as (name, type, date_prec, num_prec)"""
-        database, schema, name = self._normalize_table_path(path)
-        info_schema_path = ["information_schema", "columns"]
-        if database:
-            info_schema_path.insert(0, database)
-
-        return (
-            "SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale "
-            f"FROM {'.'.join(info_schema_path)} "
-            f"WHERE table_name = '{name}' AND table_schema = '{schema}'"
-        )
+        catalog, schema, table = self._normalize_table_path(path)
+        with conn.cursor() as cursor:
+            cursor.columns(catalog_name=catalog, schema_name=schema, table_name=table)
+            try:
+                rows = cursor.fetchall()
+            finally:
+                conn.close()
+            if not rows:
+                raise RuntimeError(f"{self.name}: Table '{'.'.join(path)}' does not exist, or has no columns")
+
+            d = {r.COLUMN_NAME: (r.COLUMN_NAME, r.TYPE_NAME, r.DECIMAL_DIGITS, None, None) for r in rows}
+            assert len(d) == len(rows)
+            return d
+
+    # def select_table_schema(self, path: DbPath) -> str:
+    #     """Provide SQL for selecting the table schema as (name, type, date_prec, num_prec)"""
+    #     database, schema, name = self._normalize_table_path(path)
+    #     info_schema_path = ["information_schema", "columns"]
+    #     if database:
+    #         info_schema_path.insert(0, database)
+
+    #     return (
+    #         "SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale "
+    #         f"FROM {'.'.join(info_schema_path)} "
+    #         f"WHERE table_name = '{name}' AND table_schema = '{schema}'"
+    #     )
 
     def _process_table_schema(
         self, path: DbPath, raw_schema: Dict[str, tuple], filter_columns: Sequence[str], where: str = None

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://clevelandohioweatherforecast.com/php-proxy/index.php?q=https://patch-diff.githubusercontent.com/raw/datafold/data-diff/pull/782.patch" target="_blank">Alternative Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/index.php?u=https://patch-diff.githubusercontent.com/raw/datafold/data-diff/pull/782.patch" target="_blank">pFad Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/v3index.php?u=https://patch-diff.githubusercontent.com/raw/datafold/data-diff/pull/782.patch" target="_blank">pFad v3 Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/v4index.php?u=https://patch-diff.githubusercontent.com/raw/datafold/data-diff/pull/782.patch" target="_blank">pFad v4 Proxy</a></p></body>
</html>