0% found this document useful (0 votes)

8 views6 pages

Cleaning Data

The document contains a Jupyter notebook that processes a sales dataset using pandas. It includes reading a CSV file, checking data types, cleaning the 'Revenue' column, and performing descriptive statistics. Additionally, it attempts to visualize data but encounters an error due to an undefined library.

Uploaded by

juanfe86

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

8 views6 pages

Cleaning Data

Uploaded by

juanfe86

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 6

{

"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"sales = pd.read_csv('C:\\\\Users\\\\juanf\\\\OneDrive\\\\Escritorio\\\\
sales.csv')\n",
"sales.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"SalesOrderID int64\n",
"Revenue object\n",
"Quantity int64\n",
"dtype: object"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 2 entries, 0 to 1\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 SalesOrderID 2 non-null int64 \n",
" 1 Revenue 2 non-null object\n",
" 2 Quantity 2 non-null int64 \n",
"dtypes: int64(2), object(1)\n",
"memory usage: 180.0+ bytes\n"
]
}
],
"source": [
"sales.info()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'23153$1457$'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales['Revenue'].sum()\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"sales['Revenue']=sales['Revenue'].str.strip('$')\n",
"sales['Revenue']=sales['Revenue'].astype('int')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"assert sales['Revenue'].dtype == 'int'\n",
"assert 1+1==2"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SalesOrderID</th>\n",
" <th>Revenue</th>\n",
" <th>Quantity</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2.000000</td>\n",
" <td>2.000000</td>\n",
" <td>2.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>43659.500000</td>\n",
" <td>12305.000000</td>\n",
" <td>7.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.707107</td>\n",
" <td>15341.388725</td>\n",
" <td>7.071068</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>43659.000000</td>\n",
" <td>1457.000000</td>\n",
" <td>2.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>43659.250000</td>\n",
" <td>6881.000000</td>\n",
" <td>4.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>43659.500000</td>\n",
" <td>12305.000000</td>\n",
" <td>7.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>43659.750000</td>\n",
" <td>17729.000000</td>\n",
" <td>9.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>43660.000000</td>\n",
" <td>23153.000000</td>\n",
" <td>12.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" SalesOrderID Revenue Quantity\n",
"count 2.000000 2.000000 2.000000\n",
"mean 43659.500000 12305.000000 7.000000\n",
"std 0.707107 15341.388725 7.071068\n",
"min 43659.000000 1457.000000 2.000000\n",
"25% 43659.250000 6881.000000 4.500000\n",
"50% 43659.500000 12305.000000 7.000000\n",
"75% 43659.750000 17729.000000 9.500000\n",
"max 43660.000000 23153.000000 12.000000"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales.describe()\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'plt' is not defined",
"output_type": "error",
"traceback": [
"\
u001b[1;31m------------------------------------------------------------------------
---\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback
(most recent call last)",
"Cell \u001b[1;32mIn[17], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \
u001b[43mplt\u001b[49m\u001b[38;5;241m.\u001b[39mhist(movies[\u001b[38;5;124m'\
u001b[39m\u001b[38;5;124mavg_rating\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\
u001b[0;32m 2\u001b[0m plt\u001b[38;5;241m.\u001b[39mtitle(\u001b[38;5;124m'\
u001b[39m\u001b[38;5;124mwhatever\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
"\u001b[1;31mNameError\u001b[0m: name 'plt' is not defined"
]
}
],
"source": [
"plt.hist(movies['avg_rating'])\n",
"plt.title('whatever')\n",
"> dt.date.today()\n",
"movies.drop(movies[movies['avg_rating']>5].index, inplace = True)\n",
"assert movies['avg_rating'].max()<=5\n",
"movies.loc[movies['avg_rating']>5,'avg_rating']=5\n",
"user_signups['subscription_date'] =
pd.to_datetime(user_signups['subscription_date']).dt.date\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Uniqueness constraints (chapter)\n",
"duplicates = height_weight.duplicated()\n",
"height_weight[duplicates]\n",
"\n",
"column_names = ['first_name','last_name','address']\n",
"duplicates = height_weight.duplicated(subset = column_names, keep = False)\n",
"height_weight[duplicates].sort_values(by = 'first_name')\n",
"\n",
"height_weight.drop_duplicates(inplace = True)\n",
"\n",
"column_names = ['first_name','last_name','address']\n",
"summaries = {'height':'max','weight':'mean'}\n",
"height_weight = height_weight.groupby(by =
column_names).agg(summaries).reset_index()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Origin",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "8ab9798c9dbdc377a44780d03c1df422753fd5bdb3f454154c68fcd412ec471b"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Essential n8n Playbook
From Everand
Essential n8n Playbook
Leandro Calado
No ratings yet
Honda Monkey z50j Workshop Manual
100% (5)
Honda Monkey z50j Workshop Manual
276 pages
Pathogen Inctivation in Donated Blood
No ratings yet
Pathogen Inctivation in Donated Blood
30 pages
Gold Exp A1 U1to3 Review Lang Test B
No ratings yet
Gold Exp A1 U1to3 Review Lang Test B
3 pages
2021-08 Structural Loads A New and Unexpected Roof Snow Drift
No ratings yet
2021-08 Structural Loads A New and Unexpected Roof Snow Drift
3 pages
Surveying Instruments
100% (1)
Surveying Instruments
21 pages
Sales - Project - v3 (2) .Ipynb
No ratings yet
Sales - Project - v3 (2) .Ipynb
1,230 pages
Exploratory Data Analysis BCG - Ipynb
No ratings yet
Exploratory Data Analysis BCG - Ipynb
273 pages
Exploratory Data Analysis BCG - Ipynb
No ratings yet
Exploratory Data Analysis BCG - Ipynb
260 pages
Simple Linear Regression PDF
No ratings yet
Simple Linear Regression PDF
40 pages
1 4-EDA Ipynb
No ratings yet
1 4-EDA Ipynb
12 pages
TCS Stock Data - Live and Latest-Checkpoint - Ipynb
No ratings yet
TCS Stock Data - Live and Latest-Checkpoint - Ipynb
172 pages
Time Series Forecasting Jupyter Code - Ipynb
No ratings yet
Time Series Forecasting Jupyter Code - Ipynb
2,484 pages
Stock Price Prediction - Ipynb
No ratings yet
Stock Price Prediction - Ipynb
62 pages
Az4 Ipynb
No ratings yet
Az4 Ipynb
17 pages
Stoxydom Prediction - Ipynb
No ratings yet
Stoxydom Prediction - Ipynb
112 pages
1 Linear Regression - Ipynb
No ratings yet
1 Linear Regression - Ipynb
16 pages
Vecinos Mas Cercanos Ejercicio Propuesto PDF
No ratings yet
Vecinos Mas Cercanos Ejercicio Propuesto PDF
945 pages
KnnImputer Ipynb
No ratings yet
KnnImputer Ipynb
6 pages
1 Introduction To Statsmodels
No ratings yet
1 Introduction To Statsmodels
28 pages
Arima Text
No ratings yet
Arima Text
49 pages
1.vecinos Mas Cercanos Ejercicio Propuesto PDF
No ratings yet
1.vecinos Mas Cercanos Ejercicio Propuesto PDF
1,053 pages
House Prices - Ipynb
No ratings yet
House Prices - Ipynb
23 pages
Stock Price Prediction Project Using TensorFlow - Ipynb
No ratings yet
Stock Price Prediction Project Using TensorFlow - Ipynb
186 pages
RecommendationSystem - R5 - Project7 - Amazon Product - Ipynb
No ratings yet
RecommendationSystem - R5 - Project7 - Amazon Product - Ipynb
112 pages
Sesion 01b Pandas V1.ipynb
No ratings yet
Sesion 01b Pandas V1.ipynb
197 pages
Market Prediction - Ipynb
No ratings yet
Market Prediction - Ipynb
42 pages
01 Quantopian Research Basics
No ratings yet
01 Quantopian Research Basics
25 pages
Task 1
No ratings yet
Task 1
5 pages
Heart Disease Prediction - Ipynb
No ratings yet
Heart Disease Prediction - Ipynb
207 pages
Supply Chain Analytics
No ratings yet
Supply Chain Analytics
20 pages
Codigo Phyton
No ratings yet
Codigo Phyton
8 pages
Coca Cola Stock Analysis - Ipynb
No ratings yet
Coca Cola Stock Analysis - Ipynb
197 pages
Linear Regression With Python - Part 1
No ratings yet
Linear Regression With Python - Part 1
167 pages
ML Lab-1
No ratings yet
ML Lab-1
5 pages
Import As Import As Import As Import: Pandas PD Numpy NP Matplotlib - Pyplot PLT Sklearn DF PD - Read - CSV DF
No ratings yet
Import As Import As Import As Import: Pandas PD Numpy NP Matplotlib - Pyplot PLT Sklearn DF PD - Read - CSV DF
9 pages
2 3-SVM Ipynb
No ratings yet
2 3-SVM Ipynb
111 pages
Untitled 0
No ratings yet
Untitled 0
537 pages
MLT Ann Lab 2
No ratings yet
MLT Ann Lab 2
7 pages
Credit Card Fraud Detection V29.Ipynb
No ratings yet
Credit Card Fraud Detection V29.Ipynb
976 pages
Heart Attack - Ipynb
No ratings yet
Heart Attack - Ipynb
162 pages
信用卡欺诈数据分析 01分类样本不平衡
No ratings yet
信用卡欺诈数据分析 01分类样本不平衡
16 pages
Projet 2 Classification Des Crédits
No ratings yet
Projet 2 Classification Des Crédits
24 pages
VoThaiThaoNhi ECON209 F2024 Lab 2
No ratings yet
VoThaiThaoNhi ECON209 F2024 Lab 2
10 pages
Data Science Cohort 1 Assignment 1.ipynb
No ratings yet
Data Science Cohort 1 Assignment 1.ipynb
53 pages
DACLUSTER
No ratings yet
DACLUSTER
9 pages
Week 4
No ratings yet
Week 4
13 pages
Project 2
No ratings yet
Project 2
40 pages
RegresiÃ N Lineal Con Python - Ipynb
No ratings yet
RegresiÃ N Lineal Con Python - Ipynb
83 pages
Shared Bike Demand Analysis - Ipynb
No ratings yet
Shared Bike Demand Analysis - Ipynb
390 pages
02-Pandas Data Visualization Exercises
No ratings yet
02-Pandas Data Visualization Exercises
53 pages
Krishna
No ratings yet
Krishna
278 pages
Ejercicio Bayes - Ipynb
No ratings yet
Ejercicio Bayes - Ipynb
148 pages
Code
No ratings yet
Code
7 pages
Marketing Campaigns Analysis - Ipynb
No ratings yet
Marketing Campaigns Analysis - Ipynb
138 pages
Copy of Final Project
No ratings yet
Copy of Final Project
16 pages
DS Task 1.ipynb
No ratings yet
DS Task 1.ipynb
92 pages
2
No ratings yet
2
2 pages
BD WPS2
No ratings yet
BD WPS2
23 pages
Pandas Tutorial - Top 40 Useful Tricks - Ipynb
No ratings yet
Pandas Tutorial - Top 40 Useful Tricks - Ipynb
316 pages
Diabetes Prediction - Ipynb
No ratings yet
Diabetes Prediction - Ipynb
69 pages
Data Mining - Project
100% (2)
Data Mining - Project
11 pages
Pandas
No ratings yet
Pandas
91 pages
No Ph.D. Game Design With Three.js
From Everand
No Ph.D. Game Design With Three.js
Nikiforos Kontopoulos
No ratings yet
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
Api 5l Grade B Pipe
No ratings yet
Api 5l Grade B Pipe
3 pages
Cambridge International AS & A Level: Biology 9700/41 October/November 2022
No ratings yet
Cambridge International AS & A Level: Biology 9700/41 October/November 2022
19 pages
Dalmatiaquarnero01jackson PDF
No ratings yet
Dalmatiaquarnero01jackson PDF
488 pages
Class 6 Sample Paper
No ratings yet
Class 6 Sample Paper
5 pages
The Phantom Airmanrter
100% (1)
The Phantom Airmanrter
29 pages
EN - User Manual - SmartGuard 63A S0
No ratings yet
EN - User Manual - SmartGuard 63A S0
105 pages
Analytical Chemistry
No ratings yet
Analytical Chemistry
7 pages
Assignment1 - Google PDF
No ratings yet
Assignment1 - Google PDF
4 pages
Mastercam 2017 Instructor Guides
No ratings yet
Mastercam 2017 Instructor Guides
530 pages
Product Manual: 2-Wire HART 7 Temperature Transmitter
No ratings yet
Product Manual: 2-Wire HART 7 Temperature Transmitter
57 pages
HMDS Mechanism Kinetics
No ratings yet
HMDS Mechanism Kinetics
14 pages
KTU-S5 To S8-draftsyllabus-CIVILENGG-Final PDF
No ratings yet
KTU-S5 To S8-draftsyllabus-CIVILENGG-Final PDF
130 pages
Mr. Faisal Zia 5kW System
No ratings yet
Mr. Faisal Zia 5kW System
11 pages
Empowerment Technologies Q1 - Module 1
No ratings yet
Empowerment Technologies Q1 - Module 1
4 pages
Cbse Class 10 Science Assertion - Reason Questions For Term 2 Exam 2022
0% (1)
Cbse Class 10 Science Assertion - Reason Questions For Term 2 Exam 2022
2 pages
2 Memory Storage
No ratings yet
2 Memory Storage
26 pages
Effect Sizes Means
No ratings yet
Effect Sizes Means
10 pages
7 Asia Location, Extent, Political and Physical Features
No ratings yet
7 Asia Location, Extent, Political and Physical Features
11 pages
Thermal Analysis of Perforated Pin-Fins Heat Sink
No ratings yet
Thermal Analysis of Perforated Pin-Fins Heat Sink
9 pages
Rand
No ratings yet
Rand
2 pages
The DDS Tutorial: Release
No ratings yet
The DDS Tutorial: Release
35 pages
Year 7 Formative Assessment 4 - CHN
No ratings yet
Year 7 Formative Assessment 4 - CHN
8 pages
Piagetian Lesson Plan
No ratings yet
Piagetian Lesson Plan
6 pages
HR Practices in Google
100% (6)
HR Practices in Google
12 pages
Concepts of Language Learning
No ratings yet
Concepts of Language Learning
36 pages

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Cleaning Data

Uploaded by

Cleaning Data

Uploaded by

{

You might also like

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.