-
Notifications
You must be signed in to change notification settings - Fork 0
/
Project.py
2072 lines (1552 loc) · 123 KB
/
Project.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# coding: utf-8
# In[1]:
get_ipython().run_line_magic('config', 'IPCompleter.greedy=True')
# # Project: National Longitudinal Survey of Youth 1979
# ## An Analysis on the effects of characteristics on Unemployment.
#
# With this analysis I will find what variables are most important for desirable and undesirable outcomes.
#
# My hypothesis are that the these variables will provide a broader and more robust picture of which Americans go through struggles or which Americans conditions are resilient to unemployment.
#
# I will attempt to find the underlying importance of certain exogenous variables to explain key endogenous variables.
#
# ### Exogenous variables
#
# - Age
# - Race
# - Sex
# - Math ASVAB score
# - Word ASVAB score
# - College Major
# - Highest Degree Received
# - Occupation
# - Class of Worker
# - Firm Size (that one works at, or owns)
# - Income
# - Region
# - Marital Status
# - Urban or Rural Household
#
# ### Endogenous Variables
#
# - Unemployed for a period in 2016
# ### ASVAB
#
# > The **Armed Services Vocational Aptitude Battery** (**ASVAB**) is a multiple
# > choice test, administered by the United States Military Entrance Processing
# > Command, used to determine qualification for enlistment in the United States
# > Armed Forces. It is often offered to U.S. high school students when they are
# > in the 10th, 11th and 12th grade, though anyone eligible for enlistment may
# > take it.
#
# Taken from [Armed Services Vocational Aptitude Battery - Wikipedia](https://en.wikipedia.org/wiki/Armed_Services_Vocational_Aptitude_Battery)
# ### Empirical Approach
# In[18]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from pandas_profiling import ProfileReport
url = "NLS_79/NLS_79.csv"
df = pd.read_csv(url)
df.columns = [
"case_id",
"age",
"hgc_mother",
"hgc_father",
"sample_id",
"sample_race",
"sample_sex",
"pov_1980",
"asvab_math",
"asvab_word",
"major",
"max_degree",
"occup_2016",
"css_worker",
"firm_size",
"unemp_2016",
"income_2016",
"fam_net_worth",
"ch_health_limit",
"fam_size",
"fam_net_income",
"fam_pov",
"region",
"marital",
"urban_rural",
]
df.shape
# plt.style.use("dracula")
# ### Description of Variables
# | # | RNUM | QUESTION NAME | VARIABLE TITLE | YEAR | #VALID |
# | --- | -------- | --------------- | ---------------------------------------------------------------- | ------ | ------ |
# | 1 | R0000100 | case_id | IDENTIFICATION CODE | 1979 | 12686 |
# | 2 | R0000600 | age | AGE OF R | 1979 | 12686 |
# | 3 | R0006500 | hgc_mother | HIGHEST GRADE COMPLETED BY R'S MOTHER | 1979 | 12659 |
# | 4 | R0007900 | hgc_father | HIGHEST GRADE COMPLETED BY R'S FATHER | 1979 | 12456 |
# | 5 | R0173600 | sample_id | SAMPLE IDENTIFICATION CODE | 1979 | 12686 |
# | 6 | R0214700 | sample_race | R'S RACIAL/ETHNIC COHORT FROM SCREENER | 78SCRN | 12686 |
# | 7 | R0214800 | sample_sex | SEX OF R | 1979 | 12686 |
# | 8 | R0618500 | pov_1980 | FAMILY POVERTY STATUS IN 1980 | 1981 | 12195 |
# | 9 | R0648301 | asvab_math | ASVAB - ARITHMETIC REASONING/MATHEMATICS KNOWLEDGE Z SCORE | XRND | 11152 |
# | 10 | R0648343 | asvab_word | ASVAB - WORD KNOWLEDGE Z SCORE | XRND | 11160 |
# | 11 | R1207800 | major | MAJOR FIELD OF STUDY AT MOST RECENT COLLEGE ATTENDED | 1984 | 5293 |
# | 12 | R2509800 | max_degree | HIGHEST DEGREE EVER RECEIVED | 1988 | 8571 |
# | 13 | T5256900 | occup | OCCUPATION (CENSUS 4 DIGIT,00 CODES) (ALL) JOB #01 | 2016 | 5323 |
# | 14 | T5257400 | css_worker | CLASS OF WORKER (ALL) JOB #01 | 2016 | 5374 |
# | 15 | T5344300 | firm_size | # OF EMPLOYEES AT LOCATION OF R'S JOB # 01 | 2016 | 4575 |
# | 16 | T5407500 | unemp_2016 | INT CHECK - ANY PERIODS NOT EMPLOYED IN WORK HISTORY CALENDAR? | 2016 | 6912 |
# | 17 | T5619500 | income_2016 | TOTAL INCOME FROM WAGES AND SALARY IN PAST CALENDAR YEAR (TRUNC) | 2016 | 6909 |
# | 18 | T5684500 | fam_net_worth | FAMILY NET WEALTH (TRUNC) _KEY_ | 2016 | 6912 |
# | 19 | T5734600 | ch_health_limit | CHANCES RS HEALTH LIMITS WORK IN NEXT 10 YEARS | 2016 | 6909 |
# | 20 | T5770700 | fam_size | FAMILY SIZE | 2016 | 6912 |
# | 21 | T5770800 | fam_net_income | TOTAL NET FAMILY INCOME IN PAST CALENDAR YEAR _KEY_ (TRUNCATED) | 2016 | 6912 |
# | 22 | T5770900 | fam_pov | FAMILY POVERTY STATUS IN PREVIOUS CALENDAR YEAR | 2016 | 6912 |
# | 23 | T5771000 | region | REGION OF CURRENT RESIDENCE | 2016 | 6850 |
# | 24 | T5771200 | marital | MARITAL STATUS | 2016 | 6912 |
# | 25 | T5774000 | urban_rural | IS R'S CURRENT RESIDENCE URBAN/RURAL? | 2016 | 6850 |
#
# ### Extended Description of Variables
# ```
# ╔═══════════╦═════════════════╦════════╦════════════════════════════════════════════════════════════════════════════════════╗
# ║ RNUM ║ QNAME ║ YEAR ║ DESCRIPTION ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ R00001.00 ║ case_id ║ 1979 ║ ORIGINAL QUESTION NAME: CASEID ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ NOTE: THIS NUMBER WAS ASSIGNED ACCORDING TO THE RECORD NUMBER OF EACH RESPONDENT ║
# ║ ║ ║ ║ ON THE TAPE. THE FIRST RESPONDENT WAS ASSIGNED ID#1, THE SECOND WAS ASSIGNED ║
# ║ ║ ║ ║ ID#2, ETC. ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ R0000600 ║ age ║ 1979 ║ AGE OF R ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ORIGINAL QUESTION NAME: S01Q01B ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ AND THAT MAKES YOU (R'S AGE ON HOUSEHOLD ENUMERATION). IS THAT CORRECT ? (IF ║
# ║ ║ ║ ║ NECESSARY CORRECT HOUSEHOLD ENUMERATION.) ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ACTUAL AGE ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ UNIVERSE: All ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 950 14 ║
# ║ ║ ║ ║ 1563 15 ║
# ║ ║ ║ ║ 1561 16 ║
# ║ ║ ║ ║ 1508 17 ║
# ║ ║ ║ ║ 1634 18 ║
# ║ ║ ║ ║ 1677 19 ║
# ║ ║ ║ ║ 1666 20 ║
# ║ ║ ║ ║ 1690 21 ║
# ║ ║ ║ ║ 437 22 ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 12686 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ TOTAL =========> 12686 VALID SKIP(-4) 0 NON-INTERVIEW(-5) 0 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Min: 14 Max: 22 Mean: 17.9 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Lead In: R00005.00[Default] ║
# ║ ║ ║ ║ Default Next Question: R00007.00 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ R00065.00 ║ hgc_mother ║ 1979 ║ HIGHEST GRADE COMPLETED BY R'S MOTHER ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ORIGINAL QUESTION NAME: S01Q16 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ WHAT IS THE HIGHEST GRADE OR YEAR OF REGULAR SCHOOL THAT YOUR MOTHER EVER ║
# ║ ║ ║ ║ COMPLETED? ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ UNIVERSE: R with mother/mother figure ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 132 0 NONE ║
# ║ ║ ║ ║ 24 1 1ST GRADE ║
# ║ ║ ║ ║ 87 2 2ND GRADE ║
# ║ ║ ║ ║ 183 3 3RD GRADE ║
# ║ ║ ║ ║ 173 4 4TH GRADE ║
# ║ ║ ║ ║ 198 5 5TH GRADE ║
# ║ ║ ║ ║ 421 6 6TH GRADE ║
# ║ ║ ║ ║ 260 7 7TH GRADE ║
# ║ ║ ║ ║ 801 8 8TH GRADE ║
# ║ ║ ║ ║ 698 9 9TH GRADE ║
# ║ ║ ║ ║ 999 10 10TH GRADE ║
# ║ ║ ║ ║ 1107 11 11TH GRADE ║
# ║ ║ ║ ║ 4817 12 12TH GRADE ║
# ║ ║ ║ ║ 364 13 1ST YR COL ║
# ║ ║ ║ ║ 561 14 2ND YR COL ║
# ║ ║ ║ ║ 178 15 3RD YR COL ║
# ║ ║ ║ ║ 647 16 4TH YR COL ║
# ║ ║ ║ ║ 101 17 5TH YR COL ║
# ║ ║ ║ ║ 92 18 6TH YR COL ║
# ║ ║ ║ ║ 14 19 7TH YR COL ║
# ║ ║ ║ ║ 21 20 8TH YR COL OR MORE ║
# ║ ║ ║ ║ 0 95 UNGRADED ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ R00079.00 ║ hgc_father ║ 1979 ║ HIGHEST GRADE COMPLETED BY R'S FATHER ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ORIGINAL QUESTION NAME: S01Q23 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ LET'S GO BACK TO YOUR FATHER NOW. WHAT IS THE HIGHEST GRADE OR YEAR OF REGULAR ║
# ║ ║ ║ ║ SCHOOL THAT YOUR FATHER EVER COMPLETED? ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ UNIVERSE: R with father/father figure ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 199 0 NONE ║
# ║ ║ ║ ║ 48 1 1ST GRADE ║
# ║ ║ ║ ║ 102 2 2ND GRADE ║
# ║ ║ ║ ║ 215 3 3RD GRADE ║
# ║ ║ ║ ║ 208 4 4TH GRADE ║
# ║ ║ ║ ║ 190 5 5TH GRADE ║
# ║ ║ ║ ║ 499 6 6TH GRADE ║
# ║ ║ ║ ║ 306 7 7TH GRADE ║
# ║ ║ ║ ║ 950 8 8TH GRADE ║
# ║ ║ ║ ║ 568 9 9TH GRADE ║
# ║ ║ ║ ║ 754 10 10TH GRADE ║
# ║ ║ ║ ║ 616 11 11TH GRADE ║
# ║ ║ ║ ║ 3694 12 12TH GRADE ║
# ║ ║ ║ ║ 286 13 1ST YR COL ║
# ║ ║ ║ ║ 558 14 2ND YR COL ║
# ║ ║ ║ ║ 162 15 3RD YR COL ║
# ║ ║ ║ ║ 899 16 4TH YR COL ║
# ║ ║ ║ ║ 151 17 5TH YR COL ║
# ║ ║ ║ ║ 209 18 6TH YR COL ║
# ║ ║ ║ ║ 56 19 7TH YR COL ║
# ║ ║ ║ ║ 210 20 8TH YR COL OR MORE ║
# ║ ║ ║ ║ 0 95 UNGRADED ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ R01736.00 ║ sample_id ║ 1979 ║ SAMPLE IDENTIFICATION CODE ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ORIGINAL QUESTION NAME: S24Q01 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ UNIVERSE: Entire Sample ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 2236 1 CROSS MALE WHITE ║
# ║ ║ ║ ║ 203 2 CROSS MALE WH. POOR ║
# ║ ║ ║ ║ 346 3 CROSS MALE BLACK ║
# ║ ║ ║ ║ 218 4 CROSS MALE HISPANIC ║
# ║ ║ ║ ║ 2279 5 CROSS FEMALE WHITE ║
# ║ ║ ║ ║ 198 6 CROSS FEMALE WH POOR ║
# ║ ║ ║ ║ 405 7 CROSS FEMALE BLACK ║
# ║ ║ ║ ║ 226 8 CROSS FEMALE HISPANIC ║
# ║ ║ ║ ║ 742 9 SUP MALE WH POOR ║
# ║ ║ ║ ║ 1105 10 SUP MALE BLACK ║
# ║ ║ ║ ║ 729 11 SUP MALE HISPANIC ║
# ║ ║ ║ ║ 901 12 SUP FEM WH POOR ║
# ║ ║ ║ ║ 1067 13 SUP FEMALE BLACK ║
# ║ ║ ║ ║ 751 14 SUP FEMALE HISPANIC ║
# ║ ║ ║ ║ 609 15 MIL MALE WHITE ║
# ║ ║ ║ ║ 162 16 MIL MALE BLACK ║
# ║ ║ ║ ║ 53 17 MIL MALE HISPANIC ║
# ║ ║ ║ ║ 342 18 MIL FEMALE WHITE ║
# ║ ║ ║ ║ 89 19 MIL FEMALE BLACK ║
# ║ ║ ║ ║ 25 20 MIL FEMALE HISPANIC ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ R02147.00 ║ sample_race ║ 78SCRN ║ R'S RACIAL/ETHNIC COHORT FROM SCREENER ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ UNIVERSE: Entire Sample ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 2002 1 HISPANIC ║
# ║ ║ ║ ║ 3174 2 BLACK ║
# ║ ║ ║ ║ 7510 3 NON-BLACK, NON-HISPANIC ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ R02148.00 ║ sample_sex ║ 1979 ║ SEX OF R ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ UNIVERSE: Entire sample ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 6403 1 MALE ║
# ║ ║ ║ ║ 6283 2 FEMALE ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 12686 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ TOTAL =========> 12686 VALID SKIP(-4) 0 NON-INTERVIEW(-5) 0 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ R0618500 ║ pov_1980 ║ 1980 ║ FAMILY POVERTY STATUS IN 1980 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ORIGINAL QUESTION NAME: *CREATED ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ NOTE: FOR VARIABLE CREATION, SEE APPENDIX 2: TOTAL NET FAMILY INCOME ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 1978 1 IN POVERTY ║
# ║ ║ ║ ║ 7806 0 NOT IN POVERTY ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 9784 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ Invalid Skip(-3) 2411 ║
# ║ ║ ║ ║ TOTAL =========> 12195 VALID SKIP(-4) 0 NON-INTERVIEW(-5) 491 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Min: 0 Max: 1 Mean: .2 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ R0648301 ║ asvab_math ║ XRND ║ ASVAB - ARITHMETIC REASONING/MATHEMATICS KNOWLEDGE Z SCORE ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ORIGINAL QUESTION NAME: *CREATED ║
# ║ ║ ║ ║ NOTE: TWO IMPLIED DECIMAL PLACES. DIVIDE BY 100 & SUBTRACT 5 TO GET ORIGINAL Z ║
# ║ ║ ║ ║ SCORE. ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ NOTE: NORMING PROCEDURE: RESPONDENTS WERE GROUPED INTO FOUR-MONTH AGE ║
# ║ ║ ║ ║ INTERVALS FOR EACH BIRTH YEAR. WITHIN EACH AGE GROUP, IRT THETA SCORES WERE ║
# ║ ║ ║ ║ RANKED FROM LOWEST TO HIGHEST AND A NONPARAMETRIC DISTRIBUTION FUNCTION WAS ║
# ║ ║ ║ ║ CALCULATED USING THE ASVAB SAMPLING WEIGHTS. THESE ESTIMATED PROBABILITIES WERE ║
# ║ ║ ║ ║ TRANSFORMED TO STANDARD NORMAL Z SCORES. ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 17 150 TO 199 ║
# ║ ║ ║ ║ 99 200 TO 249 ║
# ║ ║ ║ ║ 295 250 TO 299 ║
# ║ ║ ║ ║ 739 300 TO 349 ║
# ║ ║ ║ ║ 1437 350 TO 399 ║
# ║ ║ ║ ║ 1987 400 TO 449 ║
# ║ ║ ║ ║ 2151 450 TO 499 ║
# ║ ║ ║ ║ 1871 500 TO 549 ║
# ║ ║ ║ ║ 1329 550 TO 599 ║
# ║ ║ ║ ║ 705 600 TO 649 ║
# ║ ║ ║ ║ 334 650 TO 699 ║
# ║ ║ ║ ║ 120 700 TO 749 ║
# ║ ║ ║ ║ 67 750 TO 799 ║
# ║ ║ ║ ║ 1 800 TO 9999999: 800+ ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 11152 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ TOTAL =========> 11152 VALID SKIP(-4) 1534 NON-INTERVIEW(-5) 0 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Min: 166 Max: 825 Mean: 475.04 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ R0648343 ║ asvab_word ║ XRND ║ ASVAB - WORD KNOWLEDGE Z SCORE ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ORIGINAL QUESTION NAME: *CREATED ║
# ║ ║ ║ ║ NOTE: TWO IMPLIED DECIMAL PLACES. DIVIDE BY 100 & SUBTRACT 5 TO GET ORIGINAL Z ║
# ║ ║ ║ ║ SCORE. ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ NOTE: NORMING PROCEDURE: RESPONDENTS WERE GROUPED INTO FOUR-MONTH AGE ║
# ║ ║ ║ ║ INTERVALS FOR EACH BIRTH YEAR. WITHIN EACH AGE GROUP, IRT THETA SCORES WERE ║
# ║ ║ ║ ║ RANKED FROM LOWEST TO HIGHEST AND A NONPARAMETRIC DISTRIBUTION FUNCTION WAS ║
# ║ ║ ║ ║ CALCULATED USING THE ASVAB SAMPLING WEIGHTS. THESE ESTIMATED PROBABILITIES WERE ║
# ║ ║ ║ ║ TRANSFORMED TO STANDARD NORMAL Z SCORES. ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 22 150 TO 199 ║
# ║ ║ ║ ║ 91 200 TO 249 ║
# ║ ║ ║ ║ 281 250 TO 299 ║
# ║ ║ ║ ║ 774 300 TO 349 ║
# ║ ║ ║ ║ 1489 350 TO 399 ║
# ║ ║ ║ ║ 2033 400 TO 449 ║
# ║ ║ ║ ║ 2057 450 TO 499 ║
# ║ ║ ║ ║ 1772 500 TO 549 ║
# ║ ║ ║ ║ 1307 550 TO 599 ║
# ║ ║ ║ ║ 642 600 TO 649 ║
# ║ ║ ║ ║ 572 650 TO 699 ║
# ║ ║ ║ ║ 116 700 TO 749 ║
# ║ ║ ║ ║ 3 750 TO 799 ║
# ║ ║ ║ ║ 1 800 TO 9999999: 800+ ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 11160 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ TOTAL =========> 11160 VALID SKIP(-4) 1526 NON-INTERVIEW(-5) 0 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Min: 168 Max: 825 Mean: 475.07 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ R1207800 ║ major ║ 1984 ║ MAJOR FIELD OF STUDY AT MOST RECENT COLLEGE ATTENDED ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ORIGINAL QUESTION NAME: Q0373 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ WHAT (IS/WAS) YOUR FIELD OF STUDY AT (NAME OF SCHOOL)? PROBE IF ║
# ║ ║ ║ ║ NECESSARY: WHAT (ARE/WERE) YOUR MAJORING IN? ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ SEE ATTACHMENT 4, FIELDS OF STUDY IN COLLEGE ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ UNIVERSE: R is/was attending college/university ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 327 0 ║
# ║ ║ ║ ║ 0 1 TO 99 ║
# ║ ║ ║ ║ 73 100 TO 199 ║
# ║ ║ ║ ║ 55 200 TO 299 ║
# ║ ║ ║ ║ 0 300 TO 399 ║
# ║ ║ ║ ║ 156 400 TO 499 ║
# ║ ║ ║ ║ 1401 500 TO 599 ║
# ║ ║ ║ ║ 141 600 TO 699 ║
# ║ ║ ║ ║ 362 700 TO 799 ║
# ║ ║ ║ ║ 440 800 TO 899 ║
# ║ ║ ║ ║ 406 900 TO 999 ║
# ║ ║ ║ ║ 1930 1000 TO 9999999: 1000+ ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 5291 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ Invalid Skip(-3) 2 ║
# ║ ║ ║ ║ TOTAL =========> 5293 VALID SKIP(-4) 6776 NON-INTERVIEW(-5) 617 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Min: 0 Max: 9996 Mean: 1233.85 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ R25098.00 ║ max_degree ║ 1988 ║ HIGHEST DEGREE EVER RECEIVED ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ORIGINAL QUESTION NAME: Q3.9A ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ (HAND CARD B) WHAT IS THE NAME OF THE HIGHEST DEGREE YOU HAVE EVER ║
# ║ ║ ║ ║ RECEIVED? ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ UNIVERSE: All except those without any formal schooling; currently enrolled >= ║
# ║ ║ ║ ║ 13; has HS diploma or equivalent ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 6031 1 HIGH SCHOOL DIPLOMA (OR EQUIVALENT) ║
# ║ ║ ║ ║ 626 2 ASSOCIATE/JUNIOR COLLEGE (AA) ║
# ║ ║ ║ ║ 587 3 BACHELOR OF ARTS DEGREE (BA) ║
# ║ ║ ║ ║ 922 4 BACHELOR OF SCIENCE (BS) ║
# ║ ║ ║ ║ 178 5 MASTER'S DEGREE (MA,MBA,MS,MSW) ║
# ║ ║ ║ ║ 11 6 DOCTORAL DEGREE (PHD) ║
# ║ ║ ║ ║ 49 7 PROFESSIONAL DEGREE (MD,LLD,DDS) ║
# ║ ║ ║ ║ 160 8 OTHER ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 8564 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 1 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ Invalid Skip(-3) 6 ║
# ║ ║ ║ ║ TOTAL =========> 8571 VALID SKIP(-4) 1894 NON-INTERVIEW(-5) 2221 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T5256900 ║ occup_2016 ║ 2016 ║ OCCUPATION (CENSUS 4 DIGIT, 00 CODES) (ALL) JOB #01 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ COMMENT: 1970 CENSUS CODE FOR OCCUPATION - EMPLOYER ║
# ║ ║ ║ ║ SEE ATTACHMENT 3, INDUSTRY AND OCCUPATION CODES ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ INCLUDES DATA FOR ALL R'S REPORTING EMPLOYER ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 665 10 TO 430: Executive, Administrative and Managerial ║
# ║ ║ ║ ║ Occupations ║
# ║ ║ ║ ║ 266 500 TO 950: Management Related Occupations ║
# ║ ║ ║ ║ 110 1000 TO 1240: Mathematical and Computer Scientists ║
# ║ ║ ║ ║ 85 1300 TO 1560: Engineers, Architects, Surveyers, Engineering and ║
# ║ ║ ║ ║ Related Technicians ║
# ║ ║ ║ ║ 18 1600 TO 1760: Physical Scientists ║
# ║ ║ ║ ║ 10 1800 TO 1860: Social Scientists and Related Workers ║
# ║ ║ ║ ║ 11 1900 TO 1960: Life, Physical and Social Science Technicians ║
# ║ ║ ║ ║ 113 2000 TO 2060: Counselors, Sociala and Religious Workers ║
# ║ ║ ║ ║ 48 2100 TO 2150: Lawyers, Judges and Legal Support Workers ║
# ║ ║ ║ ║ 233 2200 TO 2340: Teachers ║
# ║ ║ ║ ║ 88 2400 TO 2550: Education, Training and Library Workers ║
# ║ ║ ║ ║ 67 2600 TO 2760: Entertainers and Performers, Sports and Related ║
# ║ ║ ║ ║ Workers ║
# ║ ║ ║ ║ 34 2800 TO 2960: Media and Communications Workers ║
# ║ ║ ║ ║ 134 3000 TO 3260: Health Diagnosing and Treating Practitioners ║
# ║ ║ ║ ║ 249 3300 TO 3650: Health Care Technical and Support Occupations ║
# ║ ║ ║ ║ 144 3700 TO 3950: Protective Service Occupations ║
# ║ ║ ║ ║ 209 4000 TO 4160: Food Preparation and Serving Related Occupations ║
# ║ ║ ║ ║ 272 4200 TO 4250: Cleaning and Building Service Occupations ║
# ║ ║ ║ ║ 18 4300 TO 4430: Entertainment Attendants and Related Workers ║
# ║ ║ ║ ║ 202 4500 TO 4650: Personal Care and Service Workers ║
# ║ ║ ║ ║ 374 4700 TO 4960: Sales and Related Workers ║
# ║ ║ ║ ║ 718 5000 TO 5930: Office and Administrative Support Workers ║
# ║ ║ ║ ║ 21 6000 TO 6130: Farming, Fishing and Forestry Occupations ║
# ║ ║ ║ ║ 278 6200 TO 6940: Construction Trade and Extraction Workers ║
# ║ ║ ║ ║ 206 7000 TO 7620: Installation, Maintenance and Repairs Workers ║
# ║ ║ ║ ║ 84 7700 TO 7750: Production and Operating Workers ║
# ║ ║ ║ ║ 12 7800 TO 7850: Food Preparation Occupations ║
# ║ ║ ║ ║ 246 7900 TO 8960: Setters, Operators and Tenders ║
# ║ ║ ║ ║ 397 9000 TO 9750: Transportation and Material Moving Workers ║
# ║ ║ ║ ║ 6 9840: Armed Forces ║
# ║ ║ ║ ║ 4 9990: Uncodeable ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 5323 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ TOTAL =========> 5323 VALID SKIP(-4) 1589 NON-INTERVIEW(-5) 5774 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Min: 5 Max: 9990 Mean: 4245.22 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T52574.00 ║ css_worker ║ 2016 ║ CLASS OF WORKER (ALL) JOB #01 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ COMMENT: CLASS OF WORKER - EMPLOYER ║
# ║ ║ ║ ║ INCLUDES DATA FOR ALL R'S REPORTING EMPLOYER ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 1102 1: 1 Government ║
# ║ ║ ║ ║ 2903 2: 2 Private for profit company ║
# ║ ║ ║ ║ 465 3: 3 Non-profit organization (including tax exempt and ║
# ║ ║ ║ ║ charitable) ║
# ║ ║ ║ ║ 700 4: 4 Self employed ║
# ║ ║ ║ ║ 56 5: 5 Working in family business ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 5226 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 7 ║
# ║ ║ ║ ║ Don't Know(-2) 127 ║
# ║ ║ ║ ║ Invalid Skip(-3) 14 ║
# ║ ║ ║ ║ TOTAL =========> 5374 VALID SKIP(-4) 1538 NON-INTERVIEW(-5) 5774 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T53443.00 ║ firm_size ║ 2016 ║ # OF EMPLOYEES AT LOCATION OF R'S JOB # 01 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ At the place where you [(work/worked)], how many employees ([(does/did)] ║
# ║ ║ ║ ║ [(employer name)]([QES-LOOP1 counter])/[do/did] you) have? (INTERVIEWER: IF ║
# ║ ║ ║ ║ VOLUNTEERED "NUMBER VARIES", ENTER "99995".) ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 1931 1 TO 49 ║
# ║ ║ ║ ║ 541 50 TO 99 ║
# ║ ║ ║ ║ 331 100 TO 149 ║
# ║ ║ ║ ║ 151 150 TO 199 ║
# ║ ║ ║ ║ 193 200 TO 249 ║
# ║ ║ ║ ║ 73 250 TO 299 ║
# ║ ║ ║ ║ 159 300 TO 349 ║
# ║ ║ ║ ║ 29 350 TO 399 ║
# ║ ║ ║ ║ 86 400 TO 449 ║
# ║ ║ ║ ║ 19 450 TO 499 ║
# ║ ║ ║ ║ 862 500 TO 99999999: 500+ ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 4375 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 11 ║
# ║ ║ ║ ║ Don't Know(-2) 187 ║
# ║ ║ ║ ║ Invalid Skip(-3) 2 ║
# ║ ║ ║ ║ TOTAL =========> 4575 VALID SKIP(-4) 2337 NON-INTERVIEW(-5) 5774 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T54076.00 ║ unemp_2016 ║ 2016 ║ INT CHECK - ANY PERIODS NOT EMPLOYED ║
# ║ ║ ║ ║ IN WORK HISTORY CALENDAR? ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ [total number of between job gaps] == 0 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ COMMENT: SKIP ACCORDING TO THE NUMBER OF GAPS ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 2831 0: 0 CONDITION DOES NOT APPLY ║
# ║ ║ ║ ║ 4081 1: 1 CONDITION APPLIES (Go To T54106.00) ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 6912 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ TOTAL =========> 6912 VALID SKIP(-4) 0 NON-INTERVIEW(-5) 5774 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T56195.00 ║ income_2016 ║ 2016 ║ TOTAL INCOME FROM WAGES AND SALARY IN PAST ║
# ║ ║ ║ ║ CALENDAR YEAR (TRUNC) ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ [During/(Not counting any money you received from your military service...) ║
# ║ ║ ║ ║ During] [calendar year prior to survey year], how much did you receive from ║
# ║ ║ ║ ║ wages, salary, commissions, or tips from all (other) jobs, before deductions ║
# ║ ║ ║ ║ for taxes or anything else? ║
# ║ ║ ║ ║ topcoded item ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 1832 0 ║
# ║ ║ ║ ║ 55 1 TO 999 ║
# ║ ║ ║ ║ 46 1000 TO 1999 ║
# ║ ║ ║ ║ 35 2000 TO 2999 ║
# ║ ║ ║ ║ 43 3000 TO 3999 ║
# ║ ║ ║ ║ 31 4000 TO 4999 ║
# ║ ║ ║ ║ 47 5000 TO 5999 ║
# ║ ║ ║ ║ 31 6000 TO 6999 ║
# ║ ║ ║ ║ 29 7000 TO 7999 ║
# ║ ║ ║ ║ 29 8000 TO 8999 ║
# ║ ║ ║ ║ 16 9000 TO 9999 ║
# ║ ║ ║ ║ 229 10000 TO 14999 ║
# ║ ║ ║ ║ 235 15000 TO 19999 ║
# ║ ║ ║ ║ 304 20000 TO 24999 ║
# ║ ║ ║ ║ 1515 25000 TO 49999 ║
# ║ ║ ║ ║ 2153 50000 TO 99999999: 50000+ ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 6630 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 130 (Go To T56200.00) ║
# ║ ║ ║ ║ Don't Know(-2) 149 (Go To T56204.00) ║
# ║ ║ ║ ║ TOTAL =========> 6909 VALID SKIP(-4) 3 NON-INTERVIEW(-5) 5774 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T56845.00 ║ fam_net_worth ║ 2016 ║ FAMILY NET WEALTH (TRUNC) *KEY* ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ORIGINAL QUESTION NAME: *CREATED Created Variable Total Net Family Wealth ║
# ║ ║ ║ ║ Total Net Wealth for Family. Created by summing all asset values and ║
# ║ ║ ║ ║ subtracting all debts. Missing assets and debt values are imputed. ║
# ║ ║ ║ ║ Top 2% of all values are topcoded. See NLS79 User's Guide Asset Section for ║
# ║ ║ ║ ║ more detail. ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ UNIVERSE: All ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ NOTE: This variable includes estimates provided by the respondent, but does ║
# ║ ║ ║ ║ not include imputed values. ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 629 -999999999 TO -6: < 0 ║
# ║ ║ ║ ║ 603 0 ║
# ║ ║ ║ ║ 115 1 TO 999 ║
# ║ ║ ║ ║ 94 1000 TO 1999 ║
# ║ ║ ║ ║ 93 2000 TO 2999 ║
# ║ ║ ║ ║ 73 3000 TO 3999 ║
# ║ ║ ║ ║ 57 4000 TO 4999 ║
# ║ ║ ║ ║ 64 5000 TO 5999 ║
# ║ ║ ║ ║ 50 6000 TO 6999 ║
# ║ ║ ║ ║ 44 7000 TO 7999 ║
# ║ ║ ║ ║ 33 8000 TO 8999 ║
# ║ ║ ║ ║ 23 9000 TO 9999 ║
# ║ ║ ║ ║ 146 10000 TO 14999 ║
# ║ ║ ║ ║ 102 15000 TO 19999 ║
# ║ ║ ║ ║ 83 20000 TO 24999 ║
# ║ ║ ║ ║ 387 25000 TO 49999 ║
# ║ ║ ║ ║ 3412 50000 TO 99999999: 50000+ ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 6008 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 1 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ Invalid Skip(-3) 903 ║
# ║ ║ ║ ║ TOTAL =========> 6912 VALID SKIP(-4) 0 NON-INTERVIEW(-5) 5774 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T57346.00 ║ ch_health_limit ║ 2016 ║ CHANCES RS HEALTH LIMITS WORK IN NEXT 10 ║
# ║ ║ ║ ║ YEARS ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ What do you think the chances are that your health will limit your work ║
# ║ ║ ║ ║ activity during the next 10 years? ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ENTER 0 TO 100: ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 1103 0 ║
# ║ ║ ║ ║ 208 1 TO 9 ║
# ║ ║ ║ ║ 926 10 TO 19 ║
# ║ ║ ║ ║ 795 20 TO 29 ║
# ║ ║ ║ ║ 362 30 TO 39 ║
# ║ ║ ║ ║ 235 40 TO 49 ║
# ║ ║ ║ ║ 1303 50 TO 59 ║
# ║ ║ ║ ║ 205 60 TO 69 ║
# ║ ║ ║ ║ 256 70 TO 79 ║
# ║ ║ ║ ║ 394 80 TO 89 ║
# ║ ║ ║ ║ 132 90 TO 99 ║
# ║ ║ ║ ║ 892 100 TO 99999999: 100+ ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 6811 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 7 ║
# ║ ║ ║ ║ Don't Know(-2) 91 ║
# ║ ║ ║ ║ TOTAL =========> 6909 VALID SKIP(-4) 3 NON-INTERVIEW(-5) 5774 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T5770700 ║ fam_size ║ 2016 ║ FAMILY SIZE ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 1887 1 ║
# ║ ║ ║ ║ 2519 2 ║
# ║ ║ ║ ║ 1275 3 ║
# ║ ║ ║ ║ 751 4 ║
# ║ ║ ║ ║ 305 5 ║
# ║ ║ ║ ║ 104 6 ║
# ║ ║ ║ ║ 41 7 ║
# ║ ║ ║ ║ 20 8 ║
# ║ ║ ║ ║ 4 9 ║
# ║ ║ ║ ║ 6 10 TO 999: 10+ ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 6912 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ TOTAL =========> 6912 VALID SKIP(-4) 0 NON-INTERVIEW(-5) 5774 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T57708.00 ║ fam_net_income ║ 2016 ║ TOTAL NET FAMILY INCOME IN PAST CALENDAR ║
# ║ ║ ║ ║ YEAR *KEY* (TRUNCATED) ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ COMMENT: Total Net Family Income in previous calendar year ║
# ║ ║ ║ ║ NOTE: FOR VARIABLE CREATION, SEE APPENDIX 2: TOTAL NET FAMILY INCOME TRUNCATED ║
# ║ ║ ║ ║ VALUES ARE EQUAL TO THE AVERAGE VALUE OF THE TOP 2% OF RESPONDENTS WHO ARE ║
# ║ ║ ║ ║ U.S. RESIDENTS. IF THE NUMBER OF TRUNCATED VALUES EXCEEDS THAT WHICH WOULD ║
# ║ ║ ║ ║ HAVE BEEN TRUNCATED AT THE TRADITIONAL TRUNCATION LEVEL OF 100,001 FOR THIS ║
# ║ ║ ║ ║ ITEM, VALUES OVER THAT TRADITIONAL TRUNCATION LEVEL ARE AVERAGED. OVERSEAS ║
# ║ ║ ║ ║ RESPONDENTS WHOSE DOLLAR INCOME COULD NOT BE DETERMINED ARE CODED -3. ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ ACTUAL DOLLARS ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 210 0 ║
# ║ ║ ║ ║ 42 1 TO 999 ║
# ║ ║ ║ ║ 40 1000 TO 1999 ║
# ║ ║ ║ ║ 60 2000 TO 2999 ║
# ║ ║ ║ ║ 28 3000 TO 3999 ║
# ║ ║ ║ ║ 25 4000 TO 4999 ║
# ║ ║ ║ ║ 26 5000 TO 5999 ║
# ║ ║ ║ ║ 24 6000 TO 6999 ║
# ║ ║ ║ ║ 32 7000 TO 7999 ║
# ║ ║ ║ ║ 66 8000 TO 8999 ║
# ║ ║ ║ ║ 72 9000 TO 9999 ║
# ║ ║ ║ ║ 342 10000 TO 14999 ║
# ║ ║ ║ ║ 256 15000 TO 19999 ║
# ║ ║ ║ ║ 250 20000 TO 24999 ║
# ║ ║ ║ ║ 1173 25000 TO 49999 ║
# ║ ║ ║ ║ 3330 50000 TO 99999999: 50000+ ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 5976 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 236 ║
# ║ ║ ║ ║ Don't Know(-2) 690 ║
# ║ ║ ║ ║ Invalid Skip(-3) 10 ║
# ║ ║ ║ ║ TOTAL =========> 6912 VALID SKIP(-4) 0 NON-INTERVIEW(-5) 5774 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T57709.00 ║ fam_poverty ║ 2016 ║ FAMILY POVERTY STATUS IN PREVIOUS CALENDAR ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Family poverty status in past calendar year based on the income reported by ║
# ║ ║ ║ ║ the respondent. See YEAR_INCOME for which year is represented. ║
# ║ ║ ║ ║ NOTE: THIS VARIABLE USES THE TOTAL NET FAMILY INCOME VARIABLE TO DETERMINE THE ║
# ║ ║ ║ ║ THE POVERTY STATUS OF THE RESPONDENT'S HOUSEHOLD. WHEREVER POSSIBLE, THIS ║
# ║ ║ ║ ║ VARIABLE INCORPORATES THE FOLLOW-UP SELF-REPORTED RANGE AND UNFOLDING BRACKET ║
# ║ ║ ║ ║ QUESTIONS PRESENT FOR MANY INCOME AMOUNTS TO ESTIMATE A VALUE FOR ║
# ║ ║ ║ ║ NON-RESPONDERS (THOSE RESPONDING DON'T KNOW OR REFUSE TO ACTUAL INCOME ║
# ║ ║ ║ ║ AMOUNTS). ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 4958 0: NOT IN POVERTY ║
# ║ ║ ║ ║ 1018 1: IN POVERTY ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 5976 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ Invalid Skip(-3) 936 ║
# ║ ║ ║ ║ TOTAL =========> 6912 VALID SKIP(-4) 0 NON-INTERVIEW(-5) 5774 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T57710.00 ║ region ║ 2016 ║ REGION OF CURRENT RESIDENCE ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ COMMENT: REGION OF RESIDENCE ║
# ║ ║ ║ ║ NOTE: FOR VARIABLE CREATION SEE ATTACHMENT 104: GEOGRAPHIC REGIONS ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 1006 1: NORTHEAST ║
# ║ ║ ║ ║ 1537 2: NORTH CENTRAL ║
# ║ ║ ║ ║ 2963 3: SOUTH ║
# ║ ║ ║ ║ 1344 4: WEST ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 6850 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ TOTAL =========> 6850 VALID SKIP(-4) 62 NON-INTERVIEW(-5) 5774 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Min: 1 Max: 4 Mean: 2.68 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T57712.00 ║ marital ║ 2016 ║ MARITAL STATUS ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ COMMENT: Marital Status ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 1050 0: 0 NEVER MARRIED ║
# ║ ║ ║ ║ 3692 1: 1 MARRIED ║
# ║ ║ ║ ║ 333 2: 2 SEPARATED ║
# ║ ║ ║ ║ 1588 3: 3 DIVORCED ║
# ║ ║ ║ ║ 249 6: 6 WIDOWED ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 6912 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ TOTAL =========> 6912 VALID SKIP(-4) 0 NON-INTERVIEW(-5) 5774 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Min: 0 Max: 6 Mean: 1.54 ║
# ╠═══════════╬═════════════════╬════════╬════════════════════════════════════════════════════════════════════════════════════╣
# ║ T57740.00 ║ urban_rural ║ 2016 ║ IS R'S CURRENT RESIDENCE URBAN/RURAL? ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ COMMENT: R's in urban or rural residence? ║
# ║ ║ ║ ║ NOTE: SEE APPENDIX 6: URBAN-RURAL AND SMSA-CENTRAL CITY VARIABLES ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ 1430 0: RURAL ║
# ║ ║ ║ ║ 5332 1: URBAN ║
# ║ ║ ║ ║ 88 2: UNKNOWN ║
# ║ ║ ║ ║ ------- ║
# ║ ║ ║ ║ 6850 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Refusal(-1) 0 ║
# ║ ║ ║ ║ Don't Know(-2) 0 ║
# ║ ║ ║ ║ TOTAL =========> 6850 VALID SKIP(-4) 62 NON-INTERVIEW(-5) 5774 ║
# ║ ║ ║ ║ ║
# ║ ║ ║ ║ Min: 0 Max: 2 Mean: .8 ║
# ╚═══════════╩═════════════════╩════════╩════════════════════════════════════════════════════════════════════════════════════╩
# ```
# In[4]:
def age_2016(x):
"""Calculates the age of R in 2016."""
nls = 1979
year = 2016
x = x + (year - nls)
return x
df["age_2016"] = df.apply(lambda x: age_2016(x["age"]), axis=1)
df.head()
# In[5]:
def negative_clean_up(value):
"""Converts all negative values to NaN"""
if value < 0:
value = float("NaN")
return value
else:
return value
# In[6]:
# Applying NaN to all negative values
df = df.applymap(lambda x: negative_clean_up(x))
# In[7]:
# Checking for count of NaN values over Columnns
{i: j for i, j in df.isnull().sum().iteritems() if j > 0}
# In[8]:
# Identifying NaN criteria
variables_with_NaN = [i for i, j in df.isnull().sum().iteritems() if j > 0]
# In[9]:
# Filling in NaN values with mean values
df.fillna(df.mean(), inplace=True)
# In[10]:
# Querying for NaN values
{i: j for i, j in df.isnull().sum().iteritems() if j > 0}
# 1. Probability of being unemployed in 2016 given endogenous variables.
#
# This data set already has a dichotomous variable for $unemp_{2016}$. Since we filled up missing values with `NaN` and then the mean, we must refine the variable.
# In[11]:
df["unemp_2016"].value_counts(normalize=True)
# In[11]:
# df.loc[df["unemp_2016"] > 0, "unemp_2016_2"] = 1
# df.loc[df["unemp_2016"] == 0, "unemp_2016_2"] = 0
# In[12]:
# df["unemp_2016_2"].value_counts()
# Using the _50/50_ rule would skew the data into too many cases being 1 (unemployed at some point in 2016), so at the expense of dropping almost half of the sample subjects, I will attempt to create a more accurate model.
# In[12]:
unemp_2016_2 = df[(df.unemp_2016 == 1) | (df.unemp_2016 == 0)]
unemp_2016_2
# In[13]:
unemp_2016_2["unemp_2016"].value_counts()
# In[15]:
profile = ProfileReport(unemp_2016_2, title="Pandas Profiling Report")
# In[17]:
profile.to_file("your_report.html")
# In[14]:
# Visualizing correlation with Seaborn
sns.set(rc={"figure.figsize": (30, 21)})
sns.set(font_scale=1.5) # crazy big
sns.heatmap(unemp_2016_2.corr(), cmap="seismic", annot=True, vmin=-1, vmax=1)
# $$
# P(unemp_{2016} = 1~|~X) = \frac{\exp(z)}{1 + \exp(z)},
# $$
#
# where $z$ is given by
#
# $$
# \begin{align} z = \theta + w_1 ~ age_{2016} + w_2 ~sample\_race + w_3 ~sample_sex + w_4 ~asvab\_math + w_5 ~asvab\_word \\ + w_6 ~major + w_7 ~max\_degree + w_8 ~occup_{2016} + w_9 ~css\_worker + \\ w_{10} ~firm\_size + w_{11} ~income_{2016} + w_{12} ~region + w_{13} ~marital + w_{14} ~urban\_rural \end{align}
# $$
# In[16]:
X1 = unemp_2016_2[
[
"age_2016",
"sample_race",
"sample_sex",
"asvab_math",
"asvab_word",
"major",
"max_degree",
"occup_2016",
"css_worker",
"firm_size",
"income_2016",
"region",
"marital",
"urban_rural",
]
]
# X1 = sm.add_constant(X1)
y1 = unemp_2016_2["unemp_2016"]
logit_1 = sm.Logit(y1, X1).fit()
# In[17]:
print(logit_1.summary2())
# In[18]:
print(logit_1.get_margeff(at="mean", method="dydx").summary())
# In[19]:
unemp_2016_2["logit_1_prob"] = logit_1.predict()
unemp_2016_2
# In[20]:
unemp_2016_2["logit_1_pred"] = [
0 if x < 0.5 else 1 for x in unemp_2016_2["logit_1_prob"]
]
# In[21]:
cm = pd.crosstab(unemp_2016_2["unemp_2016"], unemp_2016_2["logit_1_pred"], margins=True)
cm
# In[22]:
TN = cm[0][0]
FP = cm[1][0]
FN = cm[0][1]
TP = cm[1][1]
accuracy = (TP + TN) / len(unemp_2016_2)
error = 1 - accuracy
sensitivity = TP / (FN + TP)
specificity = TN / (TN + FP)
print("Accuracy:\t", round(accuracy, 4))
print("Error:\t\t", round(error, 4))
print("Sensitivity:\t", round(sensitivity, 4))
print("Specificity:\t", round(specificity, 4))
# In[23]:
final_results = {}
res_logit_1 = """
=========================
L O G I T 1
=========================
Classification Statistics
=========================
Accuracy:\t0.8032
Error:\t\t0.1968