IMDB
IMDB
import pandas as pd
import matplotlib.pyplot as plt
Out[3]:
IMDB
Title Genre Premiere Runtime Language
Score
August 5,
0 Enter the Anime Documentary 58 2.5 English/Japanese
2019
August 21,
1 Dark Forces Thriller 81 2.6 Spanish
2020
Science December
2 The App 79 2.6 Italian
fiction/Drama 26, 2019
January 19,
3 The Open House Horror thriller 94 3.2 English
2018
October 30,
4 Kaali Khuhi Mystery 90 3.4 Hindi
2020
Springsteen on December
581 One-man show 153 8.5 English
Broadway 16, 2018
In [4]: df.info
In [6]: df.isnull().sum()
Out[6]: Title 0
Genre 0
Premiere 0
Runtime 0
IMDB Score 0
Language 0
dtype: int64
In [7]: df.columns
In [23]: language=df['Language'].value_counts().sort_values(ascending=False)
language=language[:10]
language
Out[23]: Language
English 401
Hindi 33
Spanish 31
French 20
Italian 14
Portuguese 12
Indonesian 9
Korean 6
Japanese 6
German 5
Name: count, dtype: int64
In [36]: Genre=df["Genre"].value_counts().sort_values(ascending=False)
Genre=Genre[:10]
Genre
Out[36]: Genre
Documentary 159
Drama 77
Comedy 49
Romantic comedy 39
Thriller 33
Comedy-drama 14
Crime drama 11
Biopic 9
Horror 9
Action 7
Name: count, dtype: int64
In [37]: plt.plot(language,Genre)
plt.xlabel("Langauge Count")
plt.ylabel("Genre Count")
plt.title("Language vs Genre")