HTML Code
HTML Code
Python Commands
print("Welcome to
print() Outputs data to console Python!") # Prints:
Welcome to Python!
for i in range(3):
range() Generates a sequence of numbers print(i) # Prints: 0, 1,
2
import math;
import Imports a module or library print(math.pi) # Prints:
3.141592653589793
x = 10; if x > 5:
print("Big") else:
if/elif/else Conditional logic
print("Small") # Prints:
Big
with open("example.txt",
"w") as f:
open() Opens a file for reading/writing
f.write("Hello") #
Creates file with text
my_list = [];
my_list.append(5);
list.append() Adds an item to a list
print(my_list) # Prints:
[5]
my_dict = {"key":
"value"};
dict.get() Retrieves value from dictionary
print(my_dict.get("key"))
# Prints: value
PySpark Commands
Displays first n
df.show() df.show(3) # Shows first 3 rows
rows of DataFrame
Displays
df.printSchema() df.printSchema() # Shows column names and types
DataFrame schema
df.withColumnRenamed("old_name", "new_name").show()
df.withColumnRenamed() Renames a column
# Renames column
Drops specified
df.drop() df.drop("salary").show() # Drops salary column
columns
Registers
df.createOrReplaceTempView("temp_table") # Creates
df.createOrReplaceTempView() DataFrame as SQL
SQL view
table
Runs SQL query on spark.sql("SELECT name FROM temp_table WHERE age >
spark.sql()
DataFrame 30").show() # Runs SQL query
SQL Commands
SELECT department,
AVG(salary) FROM
GROUP BY Groups rows for aggregation employees GROUP BY
department # Avg salary
per dept
SELECT department,
COUNT(*) FROM employees
GROUP BY department
HAVING Filters grouped results
HAVING COUNT(*) > 5 #
Depts with > 5
employees
SELECT e.name,
d.dept_name FROM
employees e JOIN
JOIN Combines rows from multiple tables
departments d ON
e.dept_id = d.id #
Joins tables
SELECT e.name,
d.dept_name FROM
employees e LEFT JOIN
LEFT JOIN Includes all rows from left table
departments d ON
e.dept_id = d.id # Left
join
SELECT * FROM employees
LIMIT Restricts number of returned rows LIMIT 5 # Returns first
5 rows