Demo entry 6646136

Starting code

   

Submitted by anonymous on Oct 15, 2017 at 13:30
Language: Python. Code size: 3.3 kB.

# -*- coding: utf-8 -*-
"""
Created on Sun Aug 30 10:50:43 2015

@author: jotaraul
"""
import pandas
import numpy
# any additional libraries would be imported here

data = pandas.read_csv('gapminder_dataset.csv', low_memory=False)

# print some information about the dataset
print "[Working the the 'gapminder' dataset!]"
print "Number of observations within the dataset: " + str(len(data)) #number of observations (rows)
print "Number of variables                      : " + str(len(data.columns)) # number of variables (columns)

# upper-case all DataFrame column names
data.columns = map(str.upper, data.columns) 

#setting variables you will be working with to numeric
data['CO2EMISSIONS'] = pandas.to_numeric(data['CO2EMISSIONS'], errors="coerce")
data['OILPERPERSON'] = pandas.to_numeric(data['OILPERPERSON'], errors="coerce")
data['URBANRATE'] = pandas.to_numeric(data['URBANRATE'], errors="coerce")
data['RELECTRICPERPERSON'] = pandas.to_numeric(data['RELECTRICPERPERSON'], errors="coerce")

#counts and percentages (i.e. frequency distributions) for each variable

# CO2 EMISSIONS VARIABLE

# Since this dataset has only continuous variables, we need to discretize 
# them within bins for a frequency analysis
print "\n[Frequency analysis for the 'CO2EMISSIONS' variable]"

filter_values = [0,1e7,1e8,1e9,1e10,1e11,1e12]
out = pandas.cut(data['CO2EMISSIONS'], bins = filter_values)

# Now we are ready!
c1 = pandas.value_counts(out,sort=False)
print "Number of observations with values: " + str(numpy.sum(c1))
print "Counts for the 2006 cumulative CO2 emission (metric tons) since 1751:"
print (c1)

p1 = pandas.value_counts(out,sort=False,normalize=True)
print "Percentages for cumulative CO2 emission:"
print (p1)

# OIL PER PERSON VARIABLE
print "\n[Frequency analysis for the 'OILPERPERSON' variable]"
filter_values = numpy.arange(0, 14, 1)
out = pandas.cut(data['OILPERPERSON'], bins = filter_values)

c2 = pandas.value_counts(out,sort=False)
print "Number of observations with values: " + str(numpy.sum(c2))
print "Counts for the 2010 oil Consumption per capita (tonnes per year and person):"
print (c2)

p2 = pandas.value_counts(out,sort=False,normalize=True)
print "Percentages for oiul consumption"
print (p2)

# URBAN RATE VARIABLE
print "\n[Frequency analysis for the 'URBANRATE' variable]"
filter_values = numpy.arange(0, 100, 10)
out = pandas.cut(data['URBANRATE'], bins = filter_values)

c3 = pandas.value_counts(out,sort=False)
print "Number of observations with values: " + str(numpy.sum(c3))
print "Counts for the percentage of urban population in countries in 2008 (%):"
print (c3)

p3 = pandas.value_counts(out,sort=False,normalize=True)
print "Percentages of urban population"
print (p3)

# RELELECTRICPERSON VARIABLE

print "\n[Frequency analysis for the 'RELECTRICPERPERSON' variable]"
filter_values = numpy.arange(0, 12500, 1000)
out = pandas.cut(data['RELECTRICPERPERSON'], bins = filter_values)

c4 = pandas.value_counts(out,sort=False)
print "Number of observations with values: " + str(numpy.sum(c4))
print "Counts for 2008 the residential electricity consumption, per person (kWh):"
print (c4)

p4 = pandas.value_counts(out,sort=False,normalize=True)
print "Percentages for residential electricity consumption"
print (p4)

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).