forked from dengyunsun/Getting_and_Cleaning_Data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.Rhistory
112 lines (112 loc) · 3.6 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
source("getdata-data-ss06hid.csv")
file <- read.csv("getdata-data-ss06hid.csv", sep = ",")
file
View(file)
print(length(file[file$VAL > 1000000]))
print(length(file[,file$VAL > 1000000]))
length(file[file$VAL > 1000000])
length(file[,file$VAL > 1000000])
length(file[file$VAL > 1000000,])
print(file[file$VAL > 1000000,])
length(file$VAL > 1000000)
vals <- file$VAL
vals <- vals[!is.na(vals)]
test <- vals > 1000000
test <- vals[vals > 1000000,]
test <- vals[,vals > 1000000]
test <- vals[vals > 1000000,]
vals > 1000000
vals[vals > 1000000,]
vals
sort(vals)
test <- vals[vals == 24,]
vals[vals == 24]
test <- vals[vals == 24]
length(test)
source("getdata-data-DATA.gov_NGAP.xlsx")
file <- read.table("getdata-data-DATA.gov_NGAP.xlsx")
file <- read.table("getdata-data-DATA.gov_NGAP.xlsx", sep = ",")
library(xlsx)
install.packages("xlsx")
library(xlsx)
file <- read.xlsx("getdata-data-DATA.gov_NGAP.xlsx", header = TRUE)
file <- read.xlsx("getdata-data-DATA.gov_NGAP.xlsx", header = TRUE, sheetIndex = 1)
dat <- read.xlsx("getdata-data-DATA.gov_NGAP.xlsx", header = TRUE, sheetIndex = 1, rowIndex = 18:23, colIndex = 7:15)
sum(dat$Zip*dat$Ext,na.rm=T)
install.packages(XML)
install.packages("XML")
library(XML)
file <- xmlTreeParse("https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Frestaurants.xml", useInternal = TRUE)
fileURL <- https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Frestaurants.xml
fileURL <- https:/d396qusza40orc.cloudfront.net/getdata%2Fdata%2Frestaurants.xml
fileURL <- "https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Frestaurants.xml"
doc <- xmlTreeParse(fileURL, useInternal = TRUE)
doc <- xmlTreeParse(fileURL, useInternal = TRUE)
doc = xmlTreeParse(fileURL, useInternal = TRUE)
z <- sum(x ^ 2 * p) - sum(x * p) ^ 2
x <- 1:4
p <- x/sum(x)
temp <- rbind(x, p)
rownames(temp) <- c("X", "Prob")
temp
mean(X)
mean(x)
round((0.75*0.3)/( (0.75*0.3) + ((1-0.52)*(1-0.3)) )*100)
a <- c(140, 138, 150, 148, 135)
b <- c(132, 135, 151, 146, 130)
t.test(a, b, alternative = "two.sided", paired = T)
round(pbinom(2,size=4,prob=0.5,lower.tail=FALSE),2)
pbinom (3,size=4,prob = 0.5)
n1 <- n2 <- 9
x1 <- -3 ##treated
x2 <- 1 ##placebo
s1 <- 1.5 ##treated
s2 <- 1.8 ##placebo
spsq <- ( (n1 - 1) * s1^2 + (n2 - 1) * s2^2) / (n1 + n2 - 2)
t=(x1-x2)/(spsq*sqrt(1/n1 + 1/n2))
2*pt(t,n1+n2-2)
(qnorm(.95) + qnorm(.9))^2 * .04^2 / .01^2
round( (qnorm(0.95) + qnorm(0.90))^2 * 0.04^2 / (0.01^2) )
smm=sqrt(12^2/288 + 12^2/288)
12^2/288
x=44
y=42.04
sx=12
sy=12
n1=288
n2=288
SDpooled <- sqrt( ((n1 - 1) * sx^2 + (n2-1) * sy^2) / (n1 + n2 -2))
SE_est= sqrt(SDpooled^2/n1 + SDpooled^2/n2)
2*pnorm((44-42.04),lower.tail=F)
myPlot <- function(beta){
y <- galton$child - mean(galton$child)
x <- galton$parent - mean(galton$parent)
freqData <- as.data.frame(table(x, y))
names(freqData) <- c("child", "parent", "freq")
plot(
as.numeric(as.vector(freqData$parent)),
as.numeric(as.vector(freqData$child)),
pch = 21, col = "black", bg = "lightblue",
cex = .15 * freqData$freq,
xlab = "parent",
ylab = "child"
)
abline(0, beta, lwd = 3)
points(0, 0, cex = 2, pch = 19)
mse <- mean( (y - beta * x)^2 )
title(paste("beta = ", beta, "mse = ", round(mse, 3)))
}
manipulate(myPlot(beta), beta = slider(0.6, 1.2, step = 0.02))
getwd()
setwd("Getting and Cleaning Data")
install.packages("RMySQL")
library(httr)
oauth_endpoints("github")
myapp <- oauth_app("github", "ClientID", "ClientSecret")
github_token <- oauth2.0_token(oauth_endpoints("github"), my app)
github_token <- oauth2.0_token(oauth_endpoints("github"), myapp)
hurl <- "http://biostat.jhsph.edu/~jleek/contact.html"
con <- url(hurl)
htmlCode <- readLines(con)
close(con)
sapply(htmlCode[c(10, 20, 30, 100)], nchar)