// File: la_stats_1971-2011_analysis_public.do // Creator: Diarmuid McDonnell // Created: 22/08/2018 /* Syntax file for producing analysis underpinning Research Note: McDonnell, D. P., Mohan, J., & Norman, P. "Charity Density and Social Need: A Longitudinal Perspective". Nonprofit and Voluntary Sector Quarterly. Questions can be directed to corresponding author Diarmuid McDonnell (https://diarmuidm.github.io/). The files needed for analysis: - la_stats_1971-2011_analysis_public.dta OR - la_stats_1971-2011_analysis_public.csv The codebook describing the variables: la_stats_1971-2011_analysis_codebook.txt Contents: - Define Paths [DP001] - Descriptive Statistics [DS001] - Statistical Modelling [SM001] */ ******* Preliminaries ******* // These are all handled by profile.do /* clear capture clear matrix set mem 400m // not necessary in recent versions of Stata set more off, perm set scrollbufsize 2048000 exit */ ** [DP001] include "C:\Users\mcdonndz-local\Desktop\github\a-tale-of-four-cities\do_files\leverhulme_paths.doi" /* Empty figures folder */ **shell rmdir $path1 /s /q /* pwd local workdir "C:\Users\mcdonndz-local\Desktop\github\a-tale-of-four-cities\figures\" cd `workdir' local datafiles: dir "`workdir'" files "*.gph" foreach datafile of local datafiles { rm `datafile' } local datafiles: dir "`workdir'" files "*.png" foreach datafile of local datafiles { rm `datafile' } */ /* 1. Open the clean dataset */ **global tdate = subinstr("$S_DATE", " ", "", .) // Make sure I update this at the beginning of every analysis session. **di `tdate' **local tdate "27Mar2018" capture log close **log using $path9\rqone_`tdate'.log, text replace use $path3\la_stats_1971-2011_analysis.dta, clear capture datasignature report count desc, f notes /* NOTES: - Correlation matrix of Townsend to charpop for each year (see John's email and Liverpool slide). [DONE] - Decide on the correct correlation statistic: either Spearman's rank correlation coefficient or Pearson's r of the rank variables. [DONE] - Table of means of TS quintiles and charpop, by year (try and graph this; see old Stirling notes). - Consider standardised version of charpop. [DONE] - Consider violin plot. [DONE] - Look at weights and what they tell us (see Slack comments). [DONE] - Look at distribution of new charities between census years i.e. are they basing themselves in more deprived areas? Unlikely but worth checking. - Say something about differences between local authorities in terms of types of charities i.e. deprivation quintiles and % of social services. - Add in other explanatory variables for sensitivity analysis: `pph_cat` and ONS stats (see raw data folder) - Deciles of town and link to charpop; describe the relationship better i.e. sharp drop in charpop initially, then a leveling out, then an upward tick. */ /* Export version for archiving */ /* Rationalise number of variables. [DONE] Label all important variables. [DONE] Produce codebook. Export .dta and .csv formats. */ preserve // Rationalise notes and variables notes drop _dta in 8/20 desc, f drop ln_punemp-cunum_19 lregpop-cprcv *merge label variable town2 "Squared value of Townsend Score" label variable lag_charpop "Number of charities per 5,000 residents in previous census period" label variable lag_lcharpop "Number of local charities per 5,000 residents in previous census period" label variable lag_ncharpop "Number of national charities per 5,000 residents in previous census period" label variable lag_ocharpop "Number of overseas charities per 5,000 residents in previous census period" forvalues i = 1/19 { label variable lag_charpop_icnpo`i' "Number of icnpo`i' charities per 5,000 residents in previous census period" } label variable ncharpop "Number of national charities per 5,000 residents" label variable ocharpop "Number of overseas charities per 5,000 residents" label variable charpop_change "Change in charity density between census periods" label variable town_change "Change in deprivation between census periods" label variable town2_change "Change in deprivation (squared term) between census periods" label variable pph_cat_change "Change in urban/rural density between census periods" label variable lag_charpop_change "Change in prior charity density between census periods" label variable rural_change "Local authority became more rural between census periods" label variable urban_change "Local authority became more urban between census periods" desc, f // Produce codebook capture log using C:\Users\mcdonndz-local\Desktop\github\a-tale-of-four-cities\logs\la_stats_1971-2011_analysis_codebook.txt, replace text desc, f capture log close // Export .dta and .csv versions sav $path3\la_stats_1971-2011_analysis_public.dta, replace export delimited using $path3\la_stats_1971-2011_analysis_public.csv, replace restore egen std_charpop = std(charpop) sum std* charpop, detail return list dotplot std_charpop , over(year) yline(-2 0 2, lpatt(longdash)) vioplot std_charpop , over(year) yline(-2 0 2, lpatt(longdash)) obs **************************************************************************************************************************** **************************************************************************************************************************** /* Map of charity density */ preserve use $path3\la_stats_1971-2011_maps.dta, clear desc, f l la_code la_name charpop_mean sum charpop_mean, detail *hist charpop_mean , norm freq scheme(s1mono) replace charpop_mean = round(charpop_mean, 1) capture grmap, activate grmap charpop_mean , clm(q) cln(5) ndo(red) legt("Density Quintiles") legs(2) legend(pos(10) size(vsmall)) /// title("Mean Charity Density (1971-2011)") subtitle("By local authority") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) /// caption("Local authorities with a level of charity density in the 99th percentile are excluded.", size(vsmall)) graph export $path6\la_stats_charpop_map.png, replace width(4096) forvalues y = 1971(10)2011 { grmap charpop`y', clm(q) cln(5) ndo(red) legt("Density Quintiles") legs(2) legend(pos(10) size(vsmall)) /// title("Mean Charity Density (`y')") subtitle("By local authority") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) /// caption("Local authorities with a level of charity density in the 99th percentile are excluded.", size(vsmall)) graph export $path6\la_stats_charpop_map_`y'.png, replace width(4096) } restore /* Map of material deprivation */ preserve use $path3\la_stats_1971-2011_maps.dta, clear desc, f l la_code la_name town_mean sum town_mean, detail *hist town_mean , norm freq scheme(s1mono) replace town_mean = round(town_mean, 1) capture grmap, activate grmap town_mean , clm(q) cln(5) ndo(red) legt("Density Quintiles") legs(2) legend(pos(10) size(vsmall)) /// title("Mean Townsend Score (1971-2011)") subtitle("By local authority") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) /// caption("Local authorities with a level of charity density in the 99th percentile are excluded.", size(vsmall)) graph export $path6\la_stats_town_map.png, replace width(4096) forvalues y = 1971(10)2011 { grmap town`y', clm(q) cln(5) ndo(red) legt("Density Quintiles") legs(2) legend(pos(10) size(vsmall)) /// title("Mean Townsend Score (`y')") subtitle("By local authority") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) /// caption("Local authorities with a level of charity density in the 99th percentile are excluded.", size(vsmall)) graph export $path6\la_stats_town_map_`y'.png, replace width(4096) } restore /* Sample description */ /* Overall statistics for the dependent and independent variables. */ xtdes count // Table A1. Percentages of people or households for each component measure of the Townsend Score, by census year tabstat punemp pnonown pnocar povercr, s(p50) by(year) // Lower than figures in Lloyd et al (2017) due to absence of Scottish LAs // Table 2. tabstat popest town *cunum *charpop, s(p5 p25 p50 p75 p95 mean sd n) tabstat popest town *cunum *charpop, s(p5 p25 p50 p75 p95 mean sd n) by(year) sum charpop, detail hist charpop , norm freq // Poisson distribution // Figure 1. bys year: sum charpop, detail /* Look at percentage changes in charpop over time, by different areas and percentiles/quintiles of charpop. */ spearman charpop year quietly sum charpop, detail local cmean = `r(mean)' local numobs = `r(N)' local fdate "20190129" dotplot charpop , over(year) median title("Distribution of Charity Density") subtitle("By census year") /// ytitle("No. of charities per 5,000 residents") yline(`cmean', lpatt(solid) lcolor(gs12)) /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Solid line represents the mean value for the entire period." /// "Spearman's rho = .41 (p < .001)", span size(small)) /// scheme(s1mono) graph export $path6\la_stats_charpop_`fdate'.png, replace width(4096) sum lcharpop, detail dotplot lcharpop, over(year) median // Exclude extreme outliers; I like this graph graph export $path6\la_stats_lcharpop_`fdate'.png, replace width(4096) /* Not much difference between the two plots, unsurprising given that Local charities are the most common type. */ // Figure #. spearman town year quietly sum town, detail local numobs = `r(N)' dotplot town, over(year) median title("Distribution of Deprivation") subtitle("By census year") /// ytitle("Townsend Score") yline(0.00, lpatt(solid) lcolor(gs12)) /// yline(-2 2, lpatt(longdash) lcolor(gs12)) /// text(-5.00 1971 "Less deprived", place(e) size(vsmall) color(gs1)) /// text(13.00 1971 "More deprived", place(e) size(vsmall) color(gs1)) /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'. Produced: $S_DATE" /// "Local authorities above the solid line are more deprived than average in a given year, while those below are less deprived." /// "Local authorities above and below the dashed lines are two standard deviations or more away from the average." /// "Spearman's rho = -.61 (p < .001)", span size(vsmall)) /// scheme(s1mono) graph export $path6\la_stats_town_`fdate'.png, replace width(4096) // Change over time in ratio of charities to population preserve collapse (p50) medchar=cunumpchange (p5) fchar=cunumpchange (p25) tchar=cunumpchange (p75) schar=cunumpchange (p95) nchar=cunumpchange (mean) mchar=cunumpchange, by(year) desc, f list foreach var of varlist medchar-mchar { replace `var'=0 if year==1971 } lgraph fchar tchar medchar mchar schar nchar year, /// wide title("Percentage Change in Registered Charities") subtitle("By census year") ytitle("% change") /// ylabel(, labsize(small)) xlabel(, labsize(small)) /// legend(label(1 "p5") label(2 "p25") label(3 "median") label(4 "mean") label(5 "p75") label(6 "p95") size(vsmall) rows(2)) /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,650. Produced: $S_DATE" /// "1971 = base year.", span size(vsmall)) /// scheme(s1mono) graph export $path6\la_stats_cunumchange_`fdate'.png, replace width(4096) restore preserve collapse (p50) medchar=charpoppchange (p5) fchar=charpoppchange (p25) tchar=charpoppchange (p75) schar=charpoppchange (p95) nchar=charpoppchange (mean) mchar=charpoppchange, by(year) desc, f list foreach var of varlist medchar-mchar { replace `var'=0 if year==1971 } lgraph fchar tchar medchar mchar schar nchar year, /// wide title("Percentage Change in Charity/Population Ratio") subtitle("By census year") ytitle("% change") /// ylabel(, labsize(small)) xlabel(, labsize(small)) /// legend(label(1 "p5") label(2 "p25") label(3 "median") label(4 "mean") label(5 "p75") label(6 "p95") size(vsmall) rows(2)) /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,650. Produced: $S_DATE" /// "1971 = base year.", span size(vsmall)) /// scheme(s1mono) graph export $path6\la_stats_charpopchange_`fdate'.png, replace width(4096) restore sum charpoppchange, detail dotplot charpoppchange if charpoppchange <= r(p99), over(year) median // Exclude extreme outliers; I like this graph // Figure A1. preserve collapse (p50) medchar=cunumpchange medpop=popestpchange medratio=charpoppchange, by(year) desc, f list replace medchar=0 if year==1971 replace medpop=0 if year==1971 replace medratio=0 if year==1971 twoway (scatter medchar year, c(l) m(Th) xlab(1971(10)2011)) (scatter medpop year, c(l) m(Oh)) (scatter medratio year, c(l) m(Dh)), /// title("Percentage Change in LA Characteristics") subtitle("By census year") ytitle("Median change (%)") /// legend(label(1 "Number of charities") label(2 "Size of population") label(3 "Charity/Population ratio") size(vsmall) rows(1)) /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,650. Produced: $S_DATE" /// "1971 = base year.", span size(vsmall)) /// scheme(s1mono) graph export $path6\la_stats_typchange_`fdate'.png, replace width(4096) restore // Changes in the level of registered charities and population over time tabstat popest cunum, s(p50) by(year) // Lower than figures in Lloyd et al (2017) due to absence of Scottish LAs // Variation in charpop over time - UPDATE!! tabstat charpop if la_name!="City of London", s(min max range p50) by(year) di 37.9327/1.364685 di 66.64366/2.138951 di 90.14657/3.498927 di 68.04243/4.581362 di 66.46719/4.750844 tabstat lcharpop if la_name!="City of London", s(min max range p50) by(year) di 29.13164/.972625 di 33.27556/1.426185 di 37.03299/1.875568 di 40.59825/4.252478 di 35.61769/4.187781 // Produce correlation matrix of char ratios for a given year (Table 2) pwcorr charpop lcharpop ncharpop ocharpop, star(.05) obs sig bys year: pwcorr charpop lcharpop ncharpop ocharpop, star(.05) obs sig /* Correlation between these variables for each time period. Now look at correlation within a variable across time periods. */ preserve keep la_id charpop period reshape wide charpop , i(la_id) j(period) desc, f list pwcorr charpop* , star(.05) obs sig twoway (scatter charpop5 charpop1) (lfit charpop5 charpop1) /// , xtitle("Density 1971") ytitle("Density 2011") /// legend(off) scheme(s1mono) **pwcorr charpop* lcharpop* , star(.05) obs sig **pwcorr charpop* ncharpop* , star(.05) obs sig **pwcorr charpop* ocharpop* , star(.05) obs sig restore preserve keep la_id charpop_rank period reshape wide charpop_rank , i(la_id) j(period) desc, f list pwcorr charpop_rank* , star(.05) obs sig **pwcorr charpop* lcharpop* , star(.05) obs sig **pwcorr charpop* ncharpop* , star(.05) obs sig **pwcorr charpop* ocharpop* , star(.05) obs sig restore // Change in ranking for charpop /* hist charpop_rank_change /// , norm by(year) /// title("Distribution of Change in Charity Density Ranking") subtitle("By census year") /// scheme(s1mono) hist lcharpop_rank_change, norm scheme(s1mono) by(year) hist ncharpop_rank_change, norm scheme(s1mono) by(year) hist ocharpop_rank_change, norm scheme(s1mono) by(year) */ bys year: sum charpop_rank_change, detail dotplot charpop_rank_change , over(year) title("Distribution of Change in Density Ranking") subtitle("By census year") /// ytitle("Change in ranking") yline(0, lpatt(solid) lcolor(gs12)) /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,650. Produced: $S_DATE" /// "Solid line represents the mean value for the entire period." , span size(vsmall)) /// scheme(s1mono) graph export $path6\la_stats_charpoprank_`fdate'.png, replace width(4096) /* Double-check the normal distribution of these variables is correct and not a relic of data management gremlins. */ // Coefficient of variation for charpop_ranking hist cprcv /// , norm percent scheme(s1mono) title("Variability in Charity Density Ranking") subtitle("1971-2011") /// xtitle("Coefficient of Variation (%)") sum cprcv, detail list la_name charpop_rank cunum popest year cprcv if cprcv >= `r(p95)' list la_name charpop_rank cunum popest year cprcv if cprcv <= `r(p5)' // Coefficient of variation for charity density sum overallcv, detail list la_name charpop cunum popest year overallcv if overallcv >= `r(p95)' list la_name charpop cunum popest year overallcv if overallcv <= `r(p5)' sum lcv, detail list la_name lcharpop cunum popest year lcv if lcv >= `r(p95)' list la_name lcharpop cunum popest year lcv if lcv <= `r(p5)' sum ncv, detail list la_name ncharpop cunum popest year ncv if ncv >= `r(p95)' list la_name ncharpop cunum popest year ncv if ncv <= `r(p5)' sum ocv, detail list la_name ocharpop cunum popest year ocv if ocv >= `r(p95)' list la_name ocharpop cunum popest year ocv if ocv <= `r(p5)' /* This is quite good actually. Plenty of variability in the charpop ratio. Do the same for cunum and popest. And move this code to the data management do file. */ // Correlation of charpop over time spearman charpop year // Strong, positive assocation i.e. more density over time. /* Bivariate associations between key indicators */ // Association between number of charities and population pwcorr cunum popest, star(.05) obs sig twoway (scatter cunum popest) (lfit cunum popest) bysort year: pwcorr cunum popest, star(.05) obs sig pwcorr lcunum popest, star(.05) obs sig bysort year: pwcorr lcunum popest, star(.05) obs sig /* More evidence of the strong predictive power of population size on the number of charitable organisations */ // Association between ratio and deprivation /* See what happens if I exclude the 95th percentile of town score as well: does the association become linear? */ preserve forvalues i = 1/5 { quietly sum town if period==`i', detail tab la_name if period==`i' & town >=`r(p95)' drop if town >=`r(p95)' & period==`i' } restore /* There is an argument to exclude London from the analysis; see what John says. */ // Produce correlation matrix of deprivation to char ratio (Table 3) spearman charpop town bysort year: spearman charpop town spearman lcharpop town spearman ncharpop town spearman ocharpop town bys year: spearman lcharpop town bys year: spearman ncharpop town bys year: spearman ocharpop town corr charpop town bysort year: corr charpop town // Check robustness using Kendall's tao ktau charpop town bysort year: ktau charpop town /* Unpack these findings: still appears to be a moderate/strong association in the same direction. What happens if I exclude London LAs? https://en.wikipedia.org/wiki/Local_government_in_London */ /* Figure 2. Distribution of charity density, by Townsend Score and census year */ local fdate "20190129" preserve levelsof year, local(levels) foreach l of local levels { quietly sum charpop if year==`l', detail *return list label variable town " " // Remove variable label so it does not display on the graphs twoway (scatter charpop town if year==`l' & london==., msymbol(Oh) mcolor(%100)) /// (scatter charpop town if year==`l' & london==1, msymbol(X)) (qfit charpop town if year==`l', lwidth(medium) lpatt(shortdash)) /// , title("`l'") /// ylabel(, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(g`l') *graph export $path6\la_stats_scatter_`l'_`fdate'.png, replace width(4096) } graph combine g1971 g1981 g1991 g2001 g2011 /// , title("Distribution of Charity Density") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,635." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Local authorities based in London are marked with an X.", span size(small)) graph export $path6\la_stats_scatter_overall_`fdate'.png, replace width(4096) restore // Now graph the ranking variables preserve *drop if charpop>60 *separate charpop, by(period) levelsof year, local(levels) foreach l of local levels { quietly sum charpop if year==`l', detail return list twoway (scatter charpop_rank town_rank if year==`l', msymbol(Oh) mcolor(%70)) (fpfit charpop_rank town_rank if year==`l') /// , title("`l'") /// ytitle("No. of charities per 5,000 residents (rank)") xtitle("Townsend Score (rank)") /// ylabel(, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(r`l') graph export $path6\la_stats_scatter_`l'_`fdate'.png, replace width(4096) } graph combine r1971 r1981 r1991 r2001 r2011 /// , title("Distribution of Charities/Population Ratio") subtitle("By Townsend Score") graph export $path6\la_stats_rank_overall_`fdate'.png, replace width(4096) restore sort year charpop_rank list la_name year charpop_rank town_rank if period==3 bys year: spearman charpop_rank town_rank bys year: spearman charpop town /* Gives the same result. */ pwcorr charpop_rank town_rank, star(.05) obs sig pwcorr charpop_rank town_rank [w=popest], star(.05) obs sig bys year: pwcorr charpop_rank town_rank, star(.05) obs sig bys year: pwcorr charpop_rank town_rank [w=popest], star(.05) obs sig /* Weights make very little difference to correlations. */ twoway (scatter charpop_rank town_rank, msymbol(Oh) mcolor(%70) yscale(reverse)) (lfit charpop_rank town_rank), /// by(year) /* The little cluster in the bottom left are the London LAs which rank very highly for deprivation and the number of charities. I think Pearson correlation for rank values is the best approach. */ // Weighted by popest and cunum pwcorr charpop town, star(.05) obs sig pwcorr charpop town [aweight=popest], star(.05) obs sig pwcorr charpop town [aweight=cunum], star(.05) obs sig /* This is playing out exactly like Dave said it would! Unpack this: */ pwcorr charpop popest, star(.05) obs sig // Small, negative association between charpop and popest pwcorr charpop cunum, star(.05) obs sig // Moderate, positive association between charpop and cunum pwcorr cunum popest, star(.05) obs sig // Strong, positive association between cunum and popest xtile popest_quin = popest, n(5) tab popest_quin table popest_quin year, c(p50 charpop p50 cunum) // This is an interesting table // Ratio of charities to population for each year and quintile tab quin year forvalues i = 1/5 { tabstat charpop if period==`i', s(mean p50 n) by(quin) sum charpop if period==`i', detail table quin year if charpop<=`r(p99)' & period==`i', c(p50 charpop) } // Need to exclude the 99th percentile of charpop /* Focus on the quintiles: pop, charpop, lcharpop. If I'm interpreting this right, with the exception of 2001 the lowest quintile (i.e. most deprived) had the highest ratio of charities per population (considerably more) if using the mean. The median tells a completely different story! Convert to a table: year in columns, quintiles in rows. */ /* Association between 1971 and 2011 */ /* Subsector analysis */ // Produce Figure 3 by ICNPO -> produce graphs for every icnpo category but perhaps focus on 4 subsectors in particular (most common, substantively interesting?) **************************************************************************************************************************** **************************************************************************************************************************** /* Change in ranking over time */ /* Variables: - popest_rank_change - cunum_rank_change - charpop_rank_change */ // Table of key stats for our four areas cls preserve keep if la_name=="Birmingham" | la_name=="Tower Hamlets" | la_name=="Bolton" | la_name=="Burnley" table la_name year, c(mean popest mean town mean charpop mean charpoppchange) format(%9.0fc) table la_name year, c(mean popest_rank mean town_rank mean charpop_rank) format(%9.0fc) *bysort year: corr charpop town *twoway (scatter charpop town) (lfit charpop town) restore /* Create variables capturing % change from 1971 - 2011, and ranking of percentage change. */ tab charpop_rank if year==1971, sort list la_id la_name year charpop_rank if charpop_rank <=10 & year==1971 // Top 10 in 1971 list la_id la_name year charpop_rank if charpop_rank >=321 & year==1971 // Bottom 10 in 1971 // Top 10 in 1971 preserve keep if charpop_rank <=10 | charpop_rank >=321 levelsof la_id if charpop_rank <=10 & year==1971, local(top10) foreach l of local top10 { list la_name year charpop charpop_rank charpop_rank_change if la_id==`l', clean noobs table la_name year if la_id==`l', c(mean charpop mean charpop_rank mean charpop_rank_change) } /* Very little change in rankings, even as charpop varies to some degree. */ restore // Bottom 10 in 1971 preserve keep if charpop_rank <=10 | charpop_rank >=321 levelsof la_id if charpop_rank >=321 & year==1971, local(bottom10) foreach l of local bottom10 { list la_name year charpop charpop_rank charpop_rank_change if la_id==`l', clean noobs table la_name year if la_id==`l', c(mean charpop mean charpop_rank mean charpop_rank_change) } /* Little change in rankings, even as charpop increases over time. */ restore // Which local authorities experienced the largest change in charpop and charpop_rank, by year? **************************************************************************************************************************** **************************************************************************************************************************** /* Statistical modelling [SM001] */ /* The dependent variable is charpop; the independents are townsend score (polynomial), census year, lagged density, urban/rural, and interactions. Approach: - between story: sectoral differences in density by deprivation for each census year [DONE] - within story: geographic differences in density by deprivation for each census year (change score model) [DONE] Idea: create separate regressions by census year. [DONE] Sample reduction: drop 1st and 99th percentiles of charpop in each year (include in loop). [DONE] There is dependence in the data that needs to be accounted for, either through robust standard errors or longitudinal models (re, fe). Try a range of estimators, including regress (and longitudinal extensions of it). Graph predicted counts in the same manner as Wo (2018) and Clifford (2012). Comment on the symmetry (or otherwise) of the predicted counts i.e. is it better to be one standard deviation above or below in terms of nonprofit density? The raw data suggests that the relationship is symmetrical, at least for recent years. KEY ISSUE: DECIDE ON FUNCTIONAL FORM OF DEPRIVATION. [DONE] */ sum charpop hist charpop, norm freq tabstat charpop, s(mean sd) by(year) // Looks to be over-dispersion and no zeroes; use tnbreg // Check shape of relationship between town and town2 and outcome scatter town2 town scatter charpop town2, by(period) /* Not sure what to do here... ask the Socstats group. */ // Estimate multivariate Zero-Truncated Negative Binomial Regression models /* A single model with interactions between deprivation and census year, multiple models, one for each census year. */ gen charpop_discrete = round(charpop) l charpop_discrete in 1/10 // Pooled cross section model - check for effect of year and interaction with deprivation tnbreg charpop town town2 ib1.period , nolog vce(robust) test 2.period 3.period 4.period 5.period // test if period variable is significant overall - yes it is tnbreg charpop town town2 ib1.period c.town#i.period, nolog vce(robust) test 2.period 3.period 4.period 5.period // test if period variable is significant overall - yes it is test 2.period#c.town 3.period#c.town 4.period#c.town 5.period#c.town // interactions overall are significant /* This provides justification for separate regressions with a lagged variable a la Clifford (2012). */ // 5.2.1 Overall charity density // 1971 tab pph_cat if period==1 quietly sum town if period==1, detail local min = `r(min)' local max = `r(max)' // try charpop_discrete tnbreg charpop_discrete town town2 ib4.pph_cat if period==1, nolog vce(robust) tnbreg charpop town town2 ib4.pph_cat if period==1, nolog vce(robust) tnbreg charpop town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall capture vif collin town town2 pph_cat if period==1 **margins , at(town = (`min'(.75)`max')) grand // view predicted counts **marginsplot, noci allsim nolabels name(m1) predict pr_charpop_1 // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables tabstat charpop if period==1, s(mean p50) by(pph_cat) // sense-check the results of the margins command // Remove town2 and pph_cat to see the effects of multicollinearity tnbreg charpop town ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. capture vif collin town pph_cat if period==1 listcoef , percent help // view predicted change in outcome by independent variables tnbreg charpop town town2 if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. collin town town2 if period==1 listcoef , percent help // view predicted change in outcome by independent variables /* Why doesn't tnbreg report the results of LR test for alpha = 0? It's because of 'vce(robust)'; removing this option reveals a significant LR test, meaning a negative binomial model is more appropriate than Poisson. Would qvgraph work with results of count model? [YES] */ // Quasi-variance comparison intervals /* The results of this command tell us whether there are statistically significant differences between quintiles, not just in reference to the base category. */ /* qv ib4.pph_cat qvgraph ib4.pph_cat, ytitle ("Expected change in number of charities (log)", size(medsmall)) xtitle("Urban/Rural Classification", size(medsmall) height(5)) /// title("Expected Change in Number of Charities") subtitle("Coefficient and Quasi-Variance Comparison Intervals (95%)") /// bgcolor(white) ylab(, nogrid) xlab(,val labs(vsmall)) name(qvg1) **graph save $path4\qvgraph_journal_20160726.gph, replace */ // 1981-2011 forvalues i = 2/5 { tab pph_cat if period==`i' quietly sum town if period==`i', detail local min = `r(min)' local max = `r(max)' tpoisson charpop town town2 ib4.pph_cat lag_charpop if period==`i', nolog vce(robust) fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall capture vif collin town town2 pph_cat lag_charpop if period==`i' *margins , at(town = (`min'(.75)`max')) grand // view predicted counts *marginsplot, noci allsim nolabels name(m`i') predict pr_charpop_`i' // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables **qv ib4.pph_cat **qvgraph ib4.pph_cat, ytitle ("Expected change in number of charities (log)", size(medsmall)) xtitle("Urban/Rural Classification", size(medsmall) height(5)) /// **title("Expected Change in Number of Charities") subtitle("Coefficient and Quasi-Variance Comparison Intervals (95%)") /// **bgcolor(white) ylab(, nogrid) xlab(,val labs(vsmall)) name(qvg`i') } /* Check if LR test for alpha is significant for these years - the test says no overdispersion but the variance is clearly greater than the mean. */ // Graph the predicted and observed number of charities by year gen pr_charpop = . forvalues i = 1/5 { replace pr_charpop = pr_charpop_`i' if period==`i' } list la_name year charpop pr_charpop /* Figure 3. Distribution of predicted number of charities per 5000 residents, by Townsend Score and census year */ // Use marginsplot results /* graph combine m1 m2 m3 m4 m5 /// , title("Distribution of Predicted Charity Density") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,635." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Local authorities based in London are marked with an X.", span size(small)) graph export $path6\la_stats_scatter_pr-overall_`fdate'.png, replace width(4096) */ // Use predicted counts from model postestimation 'predict' command local fdate "20190129" capture graph drop * quietly count if pr_charpop!=. local numobs = `r(N)' preserve levelsof year, local(levels) foreach l of local levels { quietly sum charpop if year==`l', detail *return list label variable town " " // Remove variable label so it does not display on the graphs twoway (fpfit pr_charpop town if year==`l', lwidth(thick)) /// , title("`l'") ytitle(" ") /// ylabel(0(6)30, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(g`l') *graph export $path6\la_stats_scatter_`l'_`fdate'.png, replace width(4096) } graph combine g1971 g1981 g1991 g2001 g2011 /// , title("Distribution of Predicted Charity Density") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small)) graph export $path6\la_stats_scatter_pr-overall_`fdate'.png, replace width(4096) restore // Sensitivity analysis - London effect /* Two analyses: - Add a London binary predictor to the existing models - Estimate the models for local authorities, excluding London-based */ recode london 1=1 *=0 tab london levelsof year, local(levels) foreach l of local levels { quietly sum charpop if year==`l', detail *return list label variable town " " // Remove variable label so it does not display on the graphs twoway (scatter charpop town if year==`l' & london==0, msymbol(Oh) mcolor(%100)) /// (qfit charpop town if year==`l', lwidth(medium) lpatt(shortdash)) /// , title("`l'") /// ylabel(, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(lo`l') *graph export $path6\la_stats_scatter_`l'_`fdate'.png, replace width(4096) } graph combine lo1971 lo1981 lo1991 lo2001 lo2011 /// , title("Distribution of Charity Density") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,635." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Local authorities based in London are excluded.", span size(small)) graph export $path6\la_stats_scatter_overall_nolondon_`fdate'.png, replace width(4096) // 1971 tnbreg charpop town town2 ib4.pph_cat london if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall predict s_pr_charpop_1 // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables // 1981-2011 forvalues i = 2/5 { tpoisson charpop town town2 ib4.pph_cat lag_charpop london if period==`i', nolog vce(robust) fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall predict s_pr_charpop_`i' // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables } list charpop london year if period==3 // Graph the predicted and observed number of charities by year gen s_pr_charpop = . forvalues i = 1/5 { replace s_pr_charpop = s_pr_charpop_`i' if period==`i' } list la_name year charpop s_pr_charpop /* Figure 4. Sensitivity analysis: Distribution of predicted number of charities per 5000 residents, by Townsend Score and census year */ // Use predicted counts from model postestimation 'predict' command local fdate "20190129" capture graph drop * preserve levelsof year, local(levels) foreach l of local levels { quietly sum charpop if year==`l', detail *return list label variable town " " // Remove variable label so it does not display on the graphs twoway (fpfit s_pr_charpop town if year==`l', lwidth(thick)) /// , title("`l'") /// ylabel(, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(s`l') *graph export $path6\la_stats_scatter_`l'_`fdate'.png, replace width(4096) } graph combine s1971 s1981 s1991 s2001 s2011 /// , title("Distribution of Predicted Charity Density") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,635." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small)) graph export $path6\la_stats_scatter_s_pr-overall_`fdate'.png, replace width(4096) restore /* The results are robust to the inclusion of a London explanatory factor, though the variable itself is significant for most years. */ // 5.2.2 Geographic scale of operation [SM002] /* Selected scales: - Local - National - Overseas TASK: consider the correct model specification i.e. zero-inflated, zero-truncated, poisson etc */ sum lcharpop ncharpop ocharpop, detail tab1 lcharpop ncharpop ocharpop, sort bys year: tabstat lcharpop ncharpop ocharpop, s(mean v) by(pph_cat) /* lcharpop can be modelled using zero-truncated negative binomial approach; ocharpop using zero-inflated poisson; ncharpop using poisson or negative binomial. */ // National charities nbreg ncharpop town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall predict pr_nat_1 // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables /* Statistically significant result for alpha LR test - go with nbreg. */ // 1981-2011 forvalues i = 2/5 { poisson ncharpop town town2 ib4.pph_cat lag_ncharpop if period==`i', nolog vce(robust) fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall predict pr_nat_`i' // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables } /* LR test for alpha is significant for every year except 2011 - switch to standard poisson for 2011. */ // Graph the predicted and observed number of charities by year gen pr_nat = . forvalues i = 1/5 { replace pr_nat = pr_nat_`i' if period==`i' } list la_name year ncharpop pr_nat /* Figure 4. Distribution of predicted number of National charities per 5000 residents, by Townsend Score and census year */ // Use predicted counts from model postestimation 'predict' command local fdate "20190129" capture graph drop * quietly count if pr_nat!=. local numobs = `r(N)' preserve levelsof year, local(levels) foreach l of local levels { *return list label variable pr_nat " " label variable town " " // Remove variable label so it does not display on the graphs twoway (fpfit pr_nat town if year==`l', lwidth(thick)) /// , title("`l'") ytitle(" ") /// ylabel(0(2)10, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(i`l') /* Could potentially add a scatter of London or urban/rural to see where these points lie along the line. */ } graph combine i1971 i1981 i1991 i2001 i2011 /// , title("Distribution of Predicted Density (National Charities)") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small)) graph export $path6\la_stats_pr_nat_`fdate'.png, replace width(4096) restore // Overseas charities tab year ocharpop if ocharpop==0 /* Lots of zeroes for 1971 and 1981, very few or none for 1991, 2001 and 2011 - zip for early years, poisson for later years. */ // 1971 zip ocharpop town town2 ib4.pph_cat if period==1, nolog inflate(town town2 ib4.pph_cat) vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. fitstat test [ocharpop]town [ocharpop]town2 // test if deprivation variables are significant overall test [ocharpop]1.pph_cat [ocharpop]2.pph_cat [ocharpop]3.pph_cat [ocharpop]5.pph_cat // test if urban/rural is significant overall predict pr_ove_1 // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables // 1981 zip ocharpop town town2 ib4.pph_cat lag_ocharpop if period==2, nolog inflate(town town2 ib4.pph_cat lag_ocharpop) vce(robust) fitstat test [ocharpop]town [ocharpop]town2 // test if deprivation variables are significant overall test [ocharpop]1.pph_cat [ocharpop]2.pph_cat [ocharpop]3.pph_cat [ocharpop]5.pph_cat // test if urban/rural is significant overall predict pr_ove_2 // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables // 1991 - 2011 forvalues i = 3/5 { poisson ocharpop town town2 ib4.pph_cat lag_ocharpop if period==`i', nolog vce(robust) fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall predict pr_ove_`i' // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables } // Graph the predicted and observed number of charities by year gen pr_ove = . forvalues i = 1/5 { replace pr_ove = pr_ove_`i' if period==`i' } list la_name year ocharpop pr_ove /* Figure 5. Distribution of predicted number of National charities per 5000 residents, by Townsend Score and census year */ // Use predicted counts from model postestimation 'predict' command local fdate "20190129" capture graph drop * quietly count if pr_ove!=. local numobs = `r(N)' preserve levelsof year, local(levels) foreach l of local levels { *return list label variable pr_ove " " label variable town " " // Remove variable label so it does not display on the graphs twoway (fpfit pr_ove town if year==`l', lwidth(thick)) /// , title("`l'") ytitle(" ") /// ylabel(0(1)6, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(i`l') } graph combine i1971 i1981 i1991 i2001 i2011 /// , title("Distribution of Predicted Density (Overseas Charities)") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small)) graph export $path6\la_stats_pr_ove_`fdate'.png, replace width(4096) restore // Local charities // 1971 tnbreg lcharpop town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall predict pr_loc_1 // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables // 1981-2011 forvalues i = 2/5 { tpoisson lcharpop town town2 ib4.pph_cat lag_lcharpop if period==`i', nolog vce(robust) fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall predict pr_loc_`i' // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables } /* Check if LR test for alpha is significant for these years - the test says no overdispersion but the variance is clearly greater than the mean. */ // Graph the predicted and observed number of charities by year gen pr_loc = . forvalues i = 1/5 { replace pr_loc = pr_loc_`i' if period==`i' } list la_name year lcharpop pr_loc /* Figure 6. Distribution of predicted number of Local charities per 5000 residents, by Townsend Score and census year */ // Use predicted counts from model postestimation 'predict' command local fdate "20190129" capture graph drop * quietly count if pr_loc!=. local numobs = `r(N)' preserve levelsof year, local(levels) foreach l of local levels { *return list label variable pr_loc " " label variable town " " // Remove variable label so it does not display on the graphs twoway (fpfit pr_loc town if year==`l', lwidth(thick)) /// , title("`l'") ytitle(" ") /// ylabel(0(8)24, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(i`l') } graph combine i1971 i1981 i1991 i2001 i2011 /// , title("Distribution of Predicted Density (Local Charities)") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small)) graph export $path6\la_stats_pr_loc_`fdate'.png, replace width(4096) restore // 5.2.3 Field of activity [SM003] /* Selected areas, based on most common categories of ICNPO: - Social services (17) - Religion (14) - Culture and recreation (1) - Development (2) - Education (3) TASK: ask JM about these choices. [DONE] TASK: consider the correct model specification i.e. zero-inflated, zero-truncated, poisson etc */ sum charpop_icnpo* tabstat charpop_icnpo*, s(mean v) by(year) // Looks to be no need for nbreg // Social services hist charpop_icnpo17, freq norm scheme(s1mono) // tpoisson is the appropriate model (no overdispersion and zero-truncated). tpoisson charpop_icnpo17 town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall predict pr_icnpo_17_1 // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables // 1981-2011 forvalues i = 2/5 { tpoisson charpop_icnpo17 town town2 ib4.pph_cat lag_charpop_icnpo17 if period==`i', nolog vce(robust) fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall predict pr_icnpo_17_`i' // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables } /* Check if LR test for alpha is significant for these years - the test says no overdispersion but the variance is clearly greater than the mean. */ // Graph the predicted and observed number of charities by year gen pr_icnpo_17 = . forvalues i = 1/5 { replace pr_icnpo_17 = pr_icnpo_17_`i' if period==`i' } list la_name year charpop_icnpo17 pr_icnpo_17 /* Figure 7. Distribution of predicted number of Social Services charities per 5000 residents, by Townsend Score and census year */ // Use predicted counts from model postestimation 'predict' command local fdate "20190129" capture graph drop * quietly count if pr_icnpo_17!=. local numobs = `r(N)' preserve levelsof year, local(levels) foreach l of local levels { *return list label variable town " " // Remove variable label so it does not display on the graphs twoway (fpfit pr_icnpo_17 town if year==`l', lwidth(thick)) /// , title("`l'") ytitle(" ") /// ylabel(0(1)7, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(i`l') } graph combine i1971 i1981 i1991 i2001 i2011 /// , title("Distribution of Predicted Density (Social Services)") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small)) graph export $path6\la_stats_pr_icnpo17_`fdate'.png, replace width(4096) restore // Religion hist charpop_icnpo14, freq norm scheme(s1mono) // tpoisson is the appropriate model (no overdispersion and zero-truncated). poisson charpop_icnpo14 town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall predict pr_icnpo_14_1 // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables /* tpoisson won't converge; need to go with poisson. */ // 1981-2011 forvalues i = 2/5 { poisson charpop_icnpo14 town town2 ib4.pph_cat lag_charpop_icnpo14 if period==`i', nolog vce(robust) fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall predict pr_icnpo_14_`i' // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables } /* tpoisson won't converge; need to go with poisson. */ // Graph the predicted and observed number of charities by year gen pr_icnpo_14 = . forvalues i = 1/5 { replace pr_icnpo_14 = pr_icnpo_14_`i' if period==`i' } list la_name year charpop_icnpo14 pr_icnpo_14 /* Figure 4. Distribution of predicted number of Social services charities per 5000 residents, by Townsend Score and census year */ // Use predicted counts from model postestimation 'predict' command local fdate "20190129" capture graph drop * quietly count if pr_icnpo_14!=. local numobs = `r(N)' preserve levelsof year, local(levels) foreach l of local levels { *return list label variable town " " // Remove variable label so it does not display on the graphs twoway (fpfit pr_icnpo_14 town if year==`l', lwidth(thick)) /// , title("`l'") ytitle(" ") /// ylabel(0(1)6, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(i`l') } graph combine i1971 i1981 i1991 i2001 i2011 /// , title("Distribution of Predicted Density (Religion)") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small)) graph export $path6\la_stats_pr_icnpo14_`fdate'.png, replace width(4096) restore // Culture and recreation // 1971 poisson charpop_icnpo1 town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall predict pr_icnpo_1_1 // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables // 1981-2011 forvalues i = 2/5 { poisson charpop_icnpo1 town town2 ib4.pph_cat lag_charpop_icnpo1 if period==`i', nolog vce(robust) fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall predict pr_icnpo_1_`i' // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables } // Graph the predicted and observed number of charities by year gen pr_icnpo_1 = . forvalues i = 1/5 { replace pr_icnpo_1 = pr_icnpo_1_`i' if period==`i' } list la_name year charpop_icnpo1 pr_icnpo_1 /* Figure 9. Distribution of predicted number of Social services charities per 5000 residents, by Townsend Score and census year */ // Use predicted counts from model postestimation 'predict' command local fdate "20190129" capture graph drop * quietly count if pr_icnpo_1!=. local numobs = `r(N)' preserve levelsof year, local(levels) foreach l of local levels { *return list label variable town " " // Remove variable label so it does not display on the graphs twoway (fpfit pr_icnpo_1 town if year==`l', lwidth(thick)) /// , title("`l'") ytitle(" ") /// ylabel(0(1)4, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(i`l') } graph combine i1971 i1981 i1991 i2001 i2011 /// , title("Distribution of Predicted Density (Culture and Recreation)") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small)) graph export $path6\la_stats_pr_icnpo1_`fdate'.png, replace width(4096) restore // Development tab year charpop_icnpo2 if charpop_icnpo2 < .1 // Not zero-inflated // 1971 poisson charpop_icnpo2 town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall predict pr_icnpo_2_1 // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables // 1981-2011 forvalues i = 2/5 { poisson charpop_icnpo2 town town2 ib4.pph_cat lag_charpop_icnpo2 if period==`i', nolog vce(robust) fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall predict pr_icnpo_2_`i' // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables } // Graph the predicted and observed number of charities by year gen pr_icnpo_2 = . forvalues i = 1/5 { replace pr_icnpo_2 = pr_icnpo_2_`i' if period==`i' } list la_name year charpop_icnpo2 pr_icnpo_2 /* Figure 10. Distribution of predicted number of Development charities per 5000 residents, by Townsend Score and census year */ // Use predicted counts from model postestimation 'predict' command local fdate "20190129" capture graph drop * quietly count if pr_icnpo_2!=. local numobs = `r(N)' preserve levelsof year, local(levels) foreach l of local levels { *return list label variable town " " // Remove variable label so it does not display on the graphs twoway (fpfit pr_icnpo_2 town if year==`l', lwidth(thick)) /// , title("`l'") ytitle(" ") /// ylabel(0(.5)2, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(i`l') } graph combine i1971 i1981 i1991 i2001 i2011 /// , title("Distribution of Predicted Density (Development)") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small)) graph export $path6\la_stats_pr_icnpo2_`fdate'.png, replace width(4096) restore // Education tab year charpop_icnpo3 if charpop_icnpo3 < .1 // Not zero-inflated // 1971 poisson charpop_icnpo3 town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall predict pr_icnpo_3_1 // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables // 1981-2011 forvalues i = 2/5 { poisson charpop_icnpo3 town town2 ib4.pph_cat lag_charpop_icnpo3 if period==`i', nolog vce(robust) fitstat test town town2 // test if deprivation variables are significant overall test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall predict pr_icnpo_3_`i' // generate predicted counts for each observation listcoef , percent help // view predicted change in outcome by independent variables } // Graph the predicted and observed number of charities by year gen pr_icnpo_3 = . forvalues i = 1/5 { replace pr_icnpo_3 = pr_icnpo_3_`i' if period==`i' } list la_name year charpop_icnpo3 pr_icnpo_3 /* Figure 11. Distribution of predicted number of Education charities per 5000 residents, by Townsend Score and census year */ // Use predicted counts from model postestimation 'predict' command local fdate "20190129" capture graph drop * quietly count if pr_icnpo_3!=. local numobs = `r(N)' preserve levelsof year, local(levels) foreach l of local levels { *return list label variable town " " // Remove variable label so it does not display on the graphs twoway (fpfit pr_icnpo_3 town if year==`l', lwidth(thick)) /// , title("`l'") ytitle(" ") /// ylabel(0(.5)2, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(i`l') } graph combine i1971 i1981 i1991 i2001 i2011 /// , title("Distribution of Predicted Density (Education)") subtitle("By Level of Deprivation") /// l1title("No. of charities per 5,000 residents") b1title("Townsend Score") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small)) graph export $path6\la_stats_pr_icnpo3_`fdate'.png, replace width(4096) restore /* Spatial models */ /* The strategy is to merge observations for each census year with the spatial data set and estimate the same models as above, only this time with spatially autocorrelated residuals. */ use $path3\la_stats_1971-2011_maps.dta, clear sort la_code_str desc, f drop charpop* numobs // drop density and other variables // Create spatial matrices spmatrix clear spmatrix create contiguity m1, rook spmatrix create idistance m2 forvalues i = 1/5 { preserve use $path3\la_stats_1971-2011_analysis.dta, clear // load in density data set keep if period==`i' keep la_id la_code charpop lag_charpop town town2 pph_cat year period rename la_code la_code_str foreach var in charpop lag_charpop town town2 pph_cat { rename `var' `var'_`i' } sort la_code_str sav $path1\la_stats_period-`i'.dta, replace restore *merge 1:1 la_code_str using $path1\la_stats_period-`i'.dta, keep(match master) } forvalues i = 1/5 { merge 1:1 la_code_str using $path1\la_stats_period-`i'.dta, keep(match master) drop _merge } desc, f // Estimate regressions // 1971 regress charpop_1 town_1 town2_1 ib4.pph_cat_1 , vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. estat moran, errorlag(m1) errorlag(m2) // evidence of spatial dependence /* fitstat test town_1 town2_1 // test if deprivation variables are significant overall test 1.pph_cat_1 2.pph_cat_1 3.pph_cat_1 5.pph_cat_1 // test if urban/rural is significant overall capture vif collin town_1 town2_1 pph_cat_1 listcoef, help */ ** Spatial regression ** spregress charpop_1 town_1 town2_1 ib4.pph_cat_1 , gs2sls err(m1) dvarlag(m1) ivarl(m1: town_1) // contiguity matrix test town_1 town2_1 // test if deprivation variables are significant overall test 1.pph_cat_1 2.pph_cat_1 3.pph_cat_1 5.pph_cat_1 // test if urban/rural is significant overall estat impact /* There is a spillover effect for 1971, as well as a statistically significant Y lag effect. */ *spregress charpop_1 town_1 town2_1 ib4.pph_cat_1 , gs2sls err(m1) dvarlag(m1) // inverse distance matrix * estat impact /* Regression coefficients are stable across weighting matrices. */ // 1981-2011 forvalues i = 2/5 { *regress charpop_`i' town_`i' town2_`i' ib4.pph_cat_`i' lag_charpop_`i', vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. *estat moran, errorlag(m1) errorlag(m2) // evidence of spatial dependence *fitstat *test town_`i' town2_`i' // test if deprivation variables are significant overall *test 1.pph_cat_`i' 2.pph_cat_`i' 3.pph_cat_`i' 5.pph_cat_`i' // test if urban/rural is significant overall *capture vif **collin town_`i' town2_`i' pph_cat_`i' *listcoef, help ** Spatial regression ** spregress charpop_`i' town_`i' town2_`i' ib4.pph_cat_`i' lag_charpop_`i' , gs2sls err(m1) dvarlag(m1) ivarl(m1: town_1) // contiguity matrix test town_`i' town2_`i' test 1.pph_cat_`i' 2.pph_cat_`i' 3.pph_cat_`i' 5.pph_cat_`i' // test if urban/rural is significant overall estat impact /* No spillover effects for these years and no statistically significant Y lag effect. */ /* spregress charpop_`i' town_`i' town2_`i' ib4.pph_cat_`i' lag_charpop_`i', gs2sls err(m1) dvarlag(m1) // inverse distance matrix test town_`i' town2_`i' estat impact */ /* Regression coefficients are stable across weighting matrices. */ } predict dres_1, residuals capture grmap, activate grmap dres_1 , t(1) clm(q) cln(9) fcolor(BuYlRd) legt("Residual Cuts") legs(2) legend(pos(10) size(vsmall)) /// title("Model Residuals (1971)") subtitle("By local authority") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) /// caption("Residuals derived from OLS regression.", size(vsmall)) graph export $path6\la_stats_charpop_dres1.png, replace width(4096) capture grmap, activate grmap dres_m1 , t(1) clm(q) cln(9) fcolor(BuYlRd) legt("Residual Cuts") legs(2) legend(pos(10) size(vsmall)) /// title("Model Residuals (1971)") subtitle("By local authority") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) /// caption("Residuals derived from spatial regression with contiguity weighting matrix.", size(vsmall)) graph export $path6\la_stats_charpop_dres_m1.png, replace width(4096) spregress charpop town town2 ib4.pph_cat if period==1, gs2sls err(m2) // lagged residual, inverse distance matrix predict dres_m2, residuals capture grmap, activate grmap dres_m2 , t(1) clm(q) cln(9) fcolor(BuYlRd) legt("Residual Cuts") legs(2) legend(pos(10) size(vsmall)) /// title("Model Residuals (1971)") subtitle("By local authority") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) /// caption("Residuals derived from spatial regression with inverse distance weighting matrix.", size(vsmall)) graph export $path6\la_stats_charpop_dres_m2.png, replace width(4096) *spregress charpop town town2 ib4.pph_cat , gs2sls dvarl(m1) // lagged dependent variable *spregress charpop town town2 ib4.pph_cat , gs2sls ivarl(m1: town) // lagged independent variable /* Change Score models */ /* These models capture changes in the independent variables for a local authority and their impact on the change in the dependent variable. As the change in density can be negative we estimate a series of OLS regressions with robust standard errors. Use binary alternatives to `town` and `town2` e.g. change in ranking up or down. */ l la_name year town town_rank town_rank_change gen trc_cat = . replace trc_cat = . if town_rank_change==0 // set no change to missing as there are two few observations to identify a stable effect replace trc_cat = 0 if town_rank_change<0 replace trc_cat = 1 if town_rank_change>0 // 1 = becoming more deprived tab trc_cat l la_name year town town_rank town_rank_change trc_cat // Whole sample regress charpop_change trc_cat urban_change rural_change , vce(robust) /* Forget about this approach. */ // 1981 regress charpop_change town_change town2_change urban_change rural_change if period==2, vce(robust) fitstat test town_change town2_change // test if deprivation variables are significant overall test urban_change rural_change // test if urban/rural dummy variables are significant overall capture vif collin town_change town2_change urban_change rural_change if period==2 *margins , at(town = (`min'(.75)`max')) grand // view predicted counts *marginsplot, noci allsim nolabels name(m`i') predict pr_charpop_cs_2 // generate predicted counts for each observation listcoef , help // view predicted change in outcome by independent variables // Alternative measure of rank regress charpop_change trc_cat urban_change rural_change if period==2, vce(robust) fitstat test urban_change rural_change // test if urban/rural dummy variables are significant overall capture vif collin trc_cat urban_change rural_change if period==2 *margins , at(town = (`min'(.75)`max')) grand // view predicted counts *marginsplot, noci allsim nolabels name(m`i') *predict pr_charpop_cs_2 // generate predicted counts for each observation listcoef , help // view predicted change in outcome by independent variables // 1991 - 2011 forvalues i = 3/5 { quietly sum town if period==`i', detail local min = `r(min)' local max = `r(max)' regress charpop_change town_change town2_change urban_change rural_change lag_charpop_change if period==`i', vce(robust) // should it be prior density or prior density change? fitstat test town_change town2_change test urban_change rural_change // test if urban/rural dummy variables are significant overall capture vif collin town_change town2_change urban_change rural_change lag_charpop_change if period==`i' *margins , at(town = (`min'(.75)`max')) grand // view predicted counts *marginsplot, noci allsim nolabels name(m`i') predict pr_charpop_cs_`i' // generate predicted counts for each observation listcoef , help // view predicted change in outcome by independent variables **qv ib4.pph_cat **qvgraph ib4.pph_cat, ytitle ("Expected change in number of charities (log)", size(medsmall)) xtitle("Urban/Rural Classification", size(medsmall) height(5)) /// **title("Expected Change in Number of Charities") subtitle("Coefficient and Quasi-Variance Comparison Intervals (95%)") /// **bgcolor(white) ylab(, nogrid) xlab(,val labs(vsmall)) name(qvg`i') } // Alternative measure of rank forvalues i = 3/5 { quietly sum town if period==`i', detail local min = `r(min)' local max = `r(max)' regress charpop_change trc_cat urban_change rural_change lag_charpop_change if period==`i', vce(robust) // should it be prior density or prior density change? fitstat test urban_change rural_change // test if urban/rural dummy variables are significant overall capture vif collin trc_cat urban_change rural_change lag_charpop_change if period==`i' *margins , at(town = (`min'(.75)`max')) grand // view predicted counts *marginsplot, noci allsim nolabels name(m`i') *predict pr_charpop_cs_`i' // generate predicted counts for each observation listcoef , help // view predicted change in outcome by independent variables **qv ib4.pph_cat **qvgraph ib4.pph_cat, ytitle ("Expected change in number of charities (log)", size(medsmall)) xtitle("Urban/Rural Classification", size(medsmall) height(5)) /// **title("Expected Change in Number of Charities") subtitle("Coefficient and Quasi-Variance Comparison Intervals (95%)") /// **bgcolor(white) ylab(, nogrid) xlab(,val labs(vsmall)) name(qvg`i') } // 2011 - check F test regress charpop_change trc_cat urban_change rural_change lag_charpop_change if period==5 // Graph the predicted and observed number of charities by year gen pr_charpop_cs = . forvalues i = 2/5 { replace pr_charpop_cs = pr_charpop_cs_`i' if period==`i' } list la_name year charpop pr_charpop_cs /* Figure #. Distribution of predicted change in number of charities per 5000 residents, by Townsend Score change and census year */ // Use predicted counts from model postestimation 'predict' command local fdate "20190129" capture graph drop * quietly count if pr_charpop_cs!=. local numobs = `r(N)' preserve *levelsof year, local(levels) foreach l in 1981 1991 2001 2011 { quietly sum charpop if year==`l', detail *return list label variable town_change " " // Remove variable label so it does not display on the graphs twoway (fpfit pr_charpop_cs town_change if year==`l', lwidth(thick)) /// , title("`l'") ytitle(" ") /// ylabel(, labsize(small)) xlabel(, labsize(small)) /// legend(off) /// scheme(s1mono) /// name(g`l') *graph export $path6\la_stats_scatter_`l'_`fdate'.png, replace width(4096) } graph combine g1981 g1991 g2001 g2011 /// , title("Distribution of Predicted Change in Charity Density") subtitle("By Change In Level of Deprivation") /// l1title("Change in no. of charities per 5,000 residents") b1title("Townsend Score change") /// note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." /// "Local authorities with a level of charity density in the 99th percentile are excluded." /// "Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small)) graph export $path6\la_stats_scatter_pr-cs_`fdate'.png, replace width(4096) restore /* Fixed Effects models */ // Whole sample twoway (qfit charpop town) (scatter charpop town) xtpoisson charpop town town2 ib4.pph_cat, fe // 1981-2011 xtpoisson charpop town town2 ib4.pph_cat lag_charpop, fe xtreg charpop town town2 ib4.pph_cat lag_charpop, re xtreg charpop town town2 ib4.pph_cat lag_charpop, be xtreg charpop town town2 ib4.pph_cat lag_charpop, fe /* // Alternative functional forms of town and pph forvalues i = 1/5 { xtile town_cat_`i' = town if period==`i', n(5) tab town_cat_`i' tabstat town, s(mean p50) by(town_cat_`i') } tab town_cat quin // Why are these different? Go back to documentation of original variable. list la_name year quin town tab quin year // Struggling to interpret this variable /* tnbreg charpop town_cat pph if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year. test town town2 listcoef , percent help // view predicted change in outcome by independent variables forvalues i = 2/5 { di "Year: " year tnbreg charpop town_cat pph lag_charpop if period==`i', nolog vce(robust) test town town2 listcoef , percent help // view predicted change in outcome by independent variables } */ // Alternative model specifications i.e. pooled cross-sectional, basic Poisson and OLS, random and fixed effects. /* I need to ensure these are consistent with the other models in terms of variables. */ // Pooled cross-sectional [TNBREG & OLS] tnbreg charpop town town2 ib4.pph_cat lag_charpop, nolog vce(robust) reg charpop town town2 ib4.pph_cat ib2.period lag_charpop, vce(robust) // Fixed effects [OLS] xtset la_id period xtreg charpop town town2 ib4.pph_cat ib2.period lag_charpop, fe // Random effects [OLS] xtreg charpop town town2 ib4.pph_cat ib2.period lag_charpop, re /* Test which model is appropriate using Hausman and other tests. */ */