library(languageVariationAndChangeData)
library(dplyr)

Buckeye Data

Making a sub-table

You can create a table of all grammatical classes like so:

table(buckeye$Gram2)

       and      justT       mono   nochange         nt       past   semiweak stemchange 
     10254         53       9846         24       4162       2458        406         71 
      went 
       400 

To get a table with just past and mono, you can index the table with these names.

gram_tab <- table(buckeye$Gram2)
gram_tab[c("past", "mono")]

past mono 
2458 9846 

What words in justT

There are a few ways of going about this. The simplest approach involves first taking a subset of the data

justT <- buckeye %>%
            filter(Gram2 == "justT")

Now, to see what words are in the Word column, you can use the unique() function.

unique(justT$Word)
[1] spent sent  built
1083 Levels: abandoned abhorrent abrupt absent abstract abused accent accept ... zimbalist

You could also use the table() function, but first you’ll need to drop the empty levels of Word (there’s one for every unique word).

# method 1
word_character <- as.character(justT$Word)
table(word_character)
word_character
built  sent spent 
   20    11    22 
#method 2
word_factor <- factor(justT$Word)
table(word_factor)
word_factor
built  sent spent 
   20    11    22 

The SVLR Data

#loading the svlr data
svlr <- read.csv("../../svlr_2015.csv")

This section will be very similar to the previous exercises, except it replaces subset() with filter(), and uses the %>% operator. Again, what’s really crucial is to keep track of which subsets of the data you want. It’s possible to write R code that will run successfully, but looks at the data the wrong way.

Find out what proportion of Scottish respondents said tide/tied were different out of Scottish respondents who were asked about tide/tied.

scottish_ay_same <- svlr %>% 
                        filter(Scottish == "y",
                               Wordpair == "tide/tied",
                               Response == "same")%>%
                        nrow()
scottish_ay_diff <- svlr %>% 
                        filter(Scottish == "y",
                               Wordpair == "tide/tied",
                               Response == "different")%>%
                        nrow()
scottish_ay_diff / (scottish_ay_diff + scottish_ay_same)
[1] 0.7479675

With non-scottish

nonscot_ay_same <- svlr %>% 
                        filter(Scottish == "n",
                               Wordpair == "tide/tied",
                               Response == "same")%>%
                        nrow()
nonscot_ay_diff <- svlr %>% 
                        filter(Scottish == "n",
                               Wordpair == "tide/tied",
                               Response == "different")%>%
                        nrow()
nonscot_ay_diff / (nonscot_ay_diff + nonscot_ay_same)
[1] 0.2992701

Same as above, but with /ow/ now.

scottish_ow_same <- svlr %>% 
                        filter(Scottish == "y",
                               Wordpair == "toad/towed",
                               Response == "same")%>%
                        nrow()
scottish_ow_diff <- svlr %>% 
                        filter(Scottish == "y",
                               Wordpair == "toad/towed",
                               Response == "different")%>%
                        nrow()
scottish_ow_diff / (scottish_ow_diff + scottish_ow_same)
[1] 0.3963964
nonscot_ow_same <- svlr %>% 
                        filter(Scottish == "n",
                               Wordpair == "toad/towed",
                               Response == "same")%>%
                        nrow()
nonscot_ow_diff <- svlr %>% 
                        filter(Scottish == "n",
                               Wordpair == "toad/towed",
                               Response == "different")%>%
                        nrow()
nonscot_ow_diff / (nonscot_ow_diff + nonscot_ow_same)
[1] 0.4174757

Grouping

  buckeye %>%
    summarise(td = mean(td))

It’s taken the buckeye data frame, calculated the mean of the td column, and created a new data frame with a column called td with the mean value in it.

  buckeye %>%
    group_by(Gram2, PreSeg) %>%
    summarise(td = mean(td))

By putting group_by() function before the summarise() function, we’ve taken the mean of the td column for every for every preceding segment & grammatical class.

If we wanted to get a each speaker’s rate of TD Retention for the regular past, we could do it like so:

buckeye %>%
  filter(Gram2 == "past")%>%
  group_by(Speaker) %>%
  summarise(td = mean(td))
speaker_td <- buckeye %>%
                filter(Gram2 == "past")%>%
                group_by(Speaker) %>%
                summarise(td = mean(td))
boxplot(speaker_td$td, ylim = c(0,1))

LS0tCnRpdGxlOiAiV2VlayA0OiBNb2RlbCBBbnN3ZXJzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeShsYW5ndWFnZVZhcmlhdGlvbkFuZENoYW5nZURhdGEpCmxpYnJhcnkoZHBseXIpCmBgYAoKIyBCdWNrZXllIERhdGEKCiMjIE1ha2luZyBhIHN1Yi10YWJsZQpZb3UgY2FuIGNyZWF0ZSBhIHRhYmxlIG9mIGFsbCBncmFtbWF0aWNhbCBjbGFzc2VzIGxpa2Ugc286CgpgYGB7cn0KdGFibGUoYnVja2V5ZSRHcmFtMikKYGBgCgoKVG8gZ2V0IGEgdGFibGUgd2l0aCBqdXN0IGBwYXN0YCBhbmQgYG1vbm9gLCB5b3UgY2FuIGluZGV4IHRoZSB0YWJsZSB3aXRoIHRoZXNlIG5hbWVzLgoKYGBge3J9CmdyYW1fdGFiIDwtIHRhYmxlKGJ1Y2tleWUkR3JhbTIpCmdyYW1fdGFiW2MoInBhc3QiLCAibW9ubyIpXQpgYGAKCgojIyBXaGF0IHdvcmRzIGluIGBqdXN0VGAKClRoZXJlIGFyZSBhIGZldyB3YXlzIG9mIGdvaW5nIGFib3V0IHRoaXMuIFRoZSBzaW1wbGVzdCBhcHByb2FjaCBpbnZvbHZlcyBmaXJzdCB0YWtpbmcgYSBzdWJzZXQgb2YgdGhlIGRhdGEKCmBgYHtyfQpqdXN0VCA8LSBidWNrZXllICU+JQogICAgICAgICAgICBmaWx0ZXIoR3JhbTIgPT0gImp1c3RUIikKYGBgCgoKTm93LCB0byBzZWUgd2hhdCB3b3JkcyBhcmUgaW4gdGhlIGBXb3JkYCBjb2x1bW4sIHlvdSBjYW4gdXNlIHRoZSBgdW5pcXVlKClgIGZ1bmN0aW9uLgoKYGBge3J9CnVuaXF1ZShqdXN0VCRXb3JkKQpgYGAKCllvdSBjb3VsZCBhbHNvIHVzZSB0aGUgYHRhYmxlKClgIGZ1bmN0aW9uLCBidXQgZmlyc3QgeW91J2xsIG5lZWQgdG8gZHJvcCB0aGUgZW1wdHkgbGV2ZWxzIG9mIGBXb3JkYCAodGhlcmUncyBvbmUgZm9yIGV2ZXJ5IHVuaXF1ZSB3b3JkKS4KCmBgYHtyfQojIG1ldGhvZCAxCndvcmRfY2hhcmFjdGVyIDwtIGFzLmNoYXJhY3RlcihqdXN0VCRXb3JkKQp0YWJsZSh3b3JkX2NoYXJhY3RlcikKYGBgCgpgYGB7cn0KI21ldGhvZCAyCndvcmRfZmFjdG9yIDwtIGZhY3RvcihqdXN0VCRXb3JkKQp0YWJsZSh3b3JkX2ZhY3RvcikKYGBgCgojIFRoZSBTVkxSIERhdGEKCmBgYHtyfQojbG9hZGluZyB0aGUgc3ZsciBkYXRhCnN2bHIgPC0gcmVhZC5jc3YoIi4uLy4uL3N2bHJfMjAxNS5jc3YiKQpgYGAKClRoaXMgc2VjdGlvbiB3aWxsIGJlIHZlcnkgc2ltaWxhciB0byB0aGUgcHJldmlvdXMgZXhlcmNpc2VzLCBleGNlcHQgaXQgcmVwbGFjZXMgYHN1YnNldCgpYCB3aXRoIGBmaWx0ZXIoKWAsIGFuZCB1c2VzIHRoZSBgJT4lYCBvcGVyYXRvci4gQWdhaW4sIHdoYXQncyAqcmVhbGx5KiBjcnVjaWFsIGlzIHRvIGtlZXAgdHJhY2sgb2Ygd2hpY2ggc3Vic2V0cyBvZiB0aGUgZGF0YSB5b3Ugd2FudC4gSXQncyBwb3NzaWJsZSB0byB3cml0ZSBSIGNvZGUgdGhhdCB3aWxsIHJ1biBzdWNjZXNzZnVsbHksIGJ1dCBsb29rcyBhdCB0aGUgZGF0YSB0aGUgd3Jvbmcgd2F5LgoKRmluZCBvdXQgd2hhdCBwcm9wb3J0aW9uIG9mIFNjb3R0aXNoIHJlc3BvbmRlbnRzIHNhaWQgYHRpZGUvdGllZGAgd2VyZSBkaWZmZXJlbnQgb3V0IG9mIFNjb3R0aXNoIHJlc3BvbmRlbnRzIHdobyB3ZXJlIGFza2VkIGFib3V0IGB0aWRlL3RpZWRgLiAKYGBge3J9CnNjb3R0aXNoX2F5X3NhbWUgPC0gc3ZsciAlPiUgCiAgICAgICAgICAgICAgICAgICAgICAgIGZpbHRlcihTY290dGlzaCA9PSAieSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBXb3JkcGFpciA9PSAidGlkZS90aWVkIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFJlc3BvbnNlID09ICJzYW1lIiklPiUKICAgICAgICAgICAgICAgICAgICAgICAgbnJvdygpCgpzY290dGlzaF9heV9kaWZmIDwtIHN2bHIgJT4lIAogICAgICAgICAgICAgICAgICAgICAgICBmaWx0ZXIoU2NvdHRpc2ggPT0gInkiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgV29yZHBhaXIgPT0gInRpZGUvdGllZCIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBSZXNwb25zZSA9PSAiZGlmZmVyZW50IiklPiUKICAgICAgICAgICAgICAgICAgICAgICAgbnJvdygpCgpzY290dGlzaF9heV9kaWZmIC8gKHNjb3R0aXNoX2F5X2RpZmYgKyBzY290dGlzaF9heV9zYW1lKQpgYGAKCldpdGggbm9uLXNjb3R0aXNoCgpgYGB7cn0Kbm9uc2NvdF9heV9zYW1lIDwtIHN2bHIgJT4lIAogICAgICAgICAgICAgICAgICAgICAgICBmaWx0ZXIoU2NvdHRpc2ggPT0gIm4iLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgV29yZHBhaXIgPT0gInRpZGUvdGllZCIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBSZXNwb25zZSA9PSAic2FtZSIpJT4lCiAgICAgICAgICAgICAgICAgICAgICAgIG5yb3coKQoKbm9uc2NvdF9heV9kaWZmIDwtIHN2bHIgJT4lIAogICAgICAgICAgICAgICAgICAgICAgICBmaWx0ZXIoU2NvdHRpc2ggPT0gIm4iLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgV29yZHBhaXIgPT0gInRpZGUvdGllZCIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBSZXNwb25zZSA9PSAiZGlmZmVyZW50IiklPiUKICAgICAgICAgICAgICAgICAgICAgICAgbnJvdygpCgpub25zY290X2F5X2RpZmYgLyAobm9uc2NvdF9heV9kaWZmICsgbm9uc2NvdF9heV9zYW1lKQpgYGAKCgpTYW1lIGFzIGFib3ZlLCBidXQgd2l0aCAvb3cvIG5vdy4KYGBge3J9CnNjb3R0aXNoX293X3NhbWUgPC0gc3ZsciAlPiUgCiAgICAgICAgICAgICAgICAgICAgICAgIGZpbHRlcihTY290dGlzaCA9PSAieSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBXb3JkcGFpciA9PSAidG9hZC90b3dlZCIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBSZXNwb25zZSA9PSAic2FtZSIpJT4lCiAgICAgICAgICAgICAgICAgICAgICAgIG5yb3coKQoKc2NvdHRpc2hfb3dfZGlmZiA8LSBzdmxyICU+JSAKICAgICAgICAgICAgICAgICAgICAgICAgZmlsdGVyKFNjb3R0aXNoID09ICJ5IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFdvcmRwYWlyID09ICJ0b2FkL3Rvd2VkIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFJlc3BvbnNlID09ICJkaWZmZXJlbnQiKSU+JQogICAgICAgICAgICAgICAgICAgICAgICBucm93KCkKCnNjb3R0aXNoX293X2RpZmYgLyAoc2NvdHRpc2hfb3dfZGlmZiArIHNjb3R0aXNoX293X3NhbWUpCmBgYApgYGB7cn0Kbm9uc2NvdF9vd19zYW1lIDwtIHN2bHIgJT4lIAogICAgICAgICAgICAgICAgICAgICAgICBmaWx0ZXIoU2NvdHRpc2ggPT0gIm4iLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgV29yZHBhaXIgPT0gInRvYWQvdG93ZWQiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgUmVzcG9uc2UgPT0gInNhbWUiKSU+JQogICAgICAgICAgICAgICAgICAgICAgICBucm93KCkKCm5vbnNjb3Rfb3dfZGlmZiA8LSBzdmxyICU+JSAKICAgICAgICAgICAgICAgICAgICAgICAgZmlsdGVyKFNjb3R0aXNoID09ICJuIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFdvcmRwYWlyID09ICJ0b2FkL3Rvd2VkIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFJlc3BvbnNlID09ICJkaWZmZXJlbnQiKSU+JQogICAgICAgICAgICAgICAgICAgICAgICBucm93KCkKCm5vbnNjb3Rfb3dfZGlmZiAvIChub25zY290X293X2RpZmYgKyBub25zY290X293X3NhbWUpCmBgYAoKCiMgR3JvdXBpbmcKCmBgYHtyfQogIGJ1Y2tleWUgJT4lCiAgICBzdW1tYXJpc2UodGQgPSBtZWFuKHRkKSkKYGBgCgpJdCdzIHRha2VuIHRoZSBgYnVja2V5ZWAgZGF0YSBmcmFtZSwgY2FsY3VsYXRlZCB0aGUgbWVhbiBvZiB0aGUgYHRkYCBjb2x1bW4sIGFuZCBjcmVhdGVkIGEgbmV3IGRhdGEgZnJhbWUgd2l0aCBhIGNvbHVtbiBjYWxsZWQgYHRkYCB3aXRoIHRoZSBtZWFuIHZhbHVlIGluIGl0LiAKCgpgYGB7cn0KICBidWNrZXllICU+JQogICAgZ3JvdXBfYnkoR3JhbTIsIFByZVNlZykgJT4lCiAgICBzdW1tYXJpc2UodGQgPSBtZWFuKHRkKSkKYGBgCgoKQnkgcHV0dGluZyBgZ3JvdXBfYnkoKWAgZnVuY3Rpb24gYmVmb3JlIHRoZSBgc3VtbWFyaXNlKClgIGZ1bmN0aW9uLCB3ZSd2ZSB0YWtlbiB0aGUgbWVhbiBvZiB0aGUgYHRkYCBjb2x1bW4gZm9yIGV2ZXJ5IGZvciBldmVyeSBwcmVjZWRpbmcgc2VnbWVudCAmIGdyYW1tYXRpY2FsIGNsYXNzLgoKCklmIHdlIHdhbnRlZCB0byBnZXQgYSBlYWNoIHNwZWFrZXIncyByYXRlIG9mIFREIFJldGVudGlvbiBmb3IgdGhlIHJlZ3VsYXIgcGFzdCwgd2UgY291bGQgZG8gaXQgbGlrZSBzbzoKCgpgYGB7cn0KYnVja2V5ZSAlPiUKICBmaWx0ZXIoR3JhbTIgPT0gInBhc3QiKSU+JQogIGdyb3VwX2J5KFNwZWFrZXIpICU+JQogIHN1bW1hcmlzZSh0ZCA9IG1lYW4odGQpKQpgYGAKCmBgYHtyfQpzcGVha2VyX3RkIDwtIGJ1Y2tleWUgJT4lCiAgICAgICAgICAgICAgICBmaWx0ZXIoR3JhbTIgPT0gInBhc3QiKSU+JQogICAgICAgICAgICAgICAgZ3JvdXBfYnkoU3BlYWtlcikgJT4lCiAgICAgICAgICAgICAgICBzdW1tYXJpc2UodGQgPSBtZWFuKHRkKSkKCmJveHBsb3Qoc3BlYWtlcl90ZCR0ZCwgeWxpbSA9IGMoMCwxKSkKYGBgCgoK