Weighted histogram with diferent classes of observations

Analysing the distribution of the firm’s productivities within an economy helps to observe the efficiency of that market. For instance, an economy with great dispertion in its productivity distribution could be less competitive than a market with a lower level of dispertion. In other words, competitive markets tend to show firms with similar productivities.

In this analysis I supposed that all firms are similar. In reality, this is not the case. In the real world firms are very different each others. Retail firms are labor-intensive compared with the minning industry or Utilities. Thus, those so called efficient firms within a particular sector could tend to show a productivity level quiet different that those efficient firms from other industrial sectors.

A good practice in studiyng the firms productivty dispersion within an economy is to generate a histogram that shows how the industries are distributed in the histogram.

library(markdown) 
library(ggplot2) 
library(matrixStats) #install.packages("matrixStats")
library(plotrix) #weighted histograms
library(questionr) #weighted table(frequency)
library(foreign) #Import in any format (STATA old version .dta)
library(reshape2)
library(DT)
library(dplyr)

setwd("C:/Users/fgreve/Dropbox/CNP/ELE")
ELE4= read.dta("ELE4.dta")

UF2014 = 23960.60
UF2015 = 25022.00

ELE4$Y2014 <- (ELE4$C001-ELE4$C006)/10^2*UF2015/UF2014 #Ingresos actividad principal de la empresa 2014 (Miles de $)
ELE4$Y2015 <- (ELE4$C037-ELE4$C042)/10^2*UF2015/UF2015 #Ingresos actividad principal de la empresa 2014 (Miles de $)

ELE4$K2014 <- ELE4$C023/10^2 #Activos fijos (terrenos, edificios, maquinarias) 2014 (Miles de $)
ELE4$K2015 <- ELE4$C059/10^2 #Activos fijos (terrenos, edificios, maquinarias) 2015 (Miles de $)
ELE4$L2015 <- ELE4$I020 + ELE4$I021 #trab. total H + trab. total M 2015
ELE4$FE_V <- ELE4$FE_Ventas_ELE4
ELE4$FE_E <- ELE4$FE_Empresas_ELE4

ELE4 <- subset(ELE4, ELE4$Y2015 !=0) #elimina obs con PIB=0
ELE4 <- subset(ELE4, ELE4$L2015 !=0) #elimina obs con L=0

ELE4$LP <- ELE4$Y2015 / ELE4$L2015 #productividad laboral

ELE4_A <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="A")
ELE4_B <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="B")
ELE4_C <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="C")
ELE4_X <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="X")
ELE4_F <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="F")
ELE4_G <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="G")
ELE4_H <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="H")
ELE4_I <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="I")
ELE4_J <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="J")
ELE4_K <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="K")
ELE4_M <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="M")
ELE4_Y <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="Y")
ELE4_Z <- subset(ELE4, subset=ELE4$CIIUfinal_ELE4=="Z")

ELE4_r01 <- subset(ELE4, subset=ELE4$Region_ELE4==1)
ELE4_r02 <- subset(ELE4, subset=ELE4$Region_ELE4==2)
ELE4_r03 <- subset(ELE4, subset=ELE4$Region_ELE4==3)
ELE4_r04 <- subset(ELE4, subset=ELE4$Region_ELE4==4)
ELE4_r05 <- subset(ELE4, subset=ELE4$Region_ELE4==5)
ELE4_r06 <- subset(ELE4, subset=ELE4$Region_ELE4==6)
ELE4_r07 <- subset(ELE4, subset=ELE4$Region_ELE4==7)
ELE4_r08 <- subset(ELE4, subset=ELE4$Region_ELE4==8)
ELE4_r09 <- subset(ELE4, subset=ELE4$Region_ELE4==9)
ELE4_r10 <- subset(ELE4, subset=ELE4$Region_ELE4==10)
ELE4_r11 <- subset(ELE4, subset=ELE4$Region_ELE4==11)
ELE4_r12 <- subset(ELE4, subset=ELE4$Region_ELE4==12)
ELE4_r13 <- subset(ELE4, subset=ELE4$Region_ELE4==13)
ELE4_r14 <- subset(ELE4, subset=ELE4$Region_ELE4==14)
ELE4_r15 <- subset(ELE4, subset=ELE4$Region_ELE4==15)

f <- ELE4

attach(f)
f$sector[CIIUfinal_ELE4=="A"] <- "Agriculture, Silviculture y Fishery"
f$sector[CIIUfinal_ELE4=="B"] <- "Minning"
f$sector[CIIUfinal_ELE4=="C"] <- "Manufacture"
f$sector[CIIUfinal_ELE4=="X"] <- "Utilities"
f$sector[CIIUfinal_ELE4=="F"] <- "Construction"
f$sector[CIIUfinal_ELE4=="G"] <- "Retail"
f$sector[CIIUfinal_ELE4=="H"] <- "Transport"
f$sector[CIIUfinal_ELE4=="I"] <- "Hotels"
f$sector[CIIUfinal_ELE4=="J"] <- "TIC"
f$sector[CIIUfinal_ELE4=="K"] <- "Banking"
f$sector[CIIUfinal_ELE4=="M"] <- "Professsional-cientific Activities"
f$sector[CIIUfinal_ELE4=="Y"] <- "Real estate"
f$sector[CIIUfinal_ELE4=="Z"] <- "Artistic and Recreative"
detach(f)

f$LP <- log(f$LP)

attach(f)
f$LPcat[LP > 0   & LP<= 1] <- 1
f$LPcat[LP > 1   & LP<= 2] <- 2
f$LPcat[LP > 2   & LP<= 3] <- 3
f$LPcat[LP > 3   & LP<= 4] <- 4
f$LPcat[LP > 4   & LP<= 5] <- 5
f$LPcat[LP > 5   & LP<= 6] <- 6
f$LPcat[LP > 6   & LP<= 7] <- 7
f$LPcat[LP > 7   & LP<= 8] <- 8
f$LPcat[LP > 8   & LP<= 9] <- 9
f$LPcat[LP > 9   & LP<=10] <- 10
f$LPcat[LP > 10  & LP<=11] <- 11
f$LPcat[LP > 11  & LP<=12] <- 12
f$LPcat[LP > 12  & LP<=13] <- 13
f$LPcat[LP > 13  & LP<=14] <- 14
f$LPcat[LP > 14] <- 15
detach(f)

Finally I create the histogram

f$ventas <- f$Y2015 * f$FE_E 

f <- aggregate(f$ventas, by=list(f$sector, f$LPcat), FUN=sum, na.rm=TRUE)
f$FE <- f$x 
f$sector <- f$Group.1
f$LPcat <- f$Group.2
f <- f[c("sector", "LPcat", "FE")]

suma <- sum(f$FE)

f$FE_prop <- f$FE / suma

ggplot(f, aes(x=LPcat, y=FE_prop, fill=sector, order=sector)) + geom_bar(stat="identity") + theme(legend.position = "right") + xlab("Labor Productivity") + ylab("Firms distribution") + ggtitle("Histogram of Firms") + scale_y_continuous(labels = scales::percent_format())

UpDog logo  Proudly hosted with UpDog.