-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathddmore.Rmd
117 lines (85 loc) · 3.01 KB
/
ddmore.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
---
title: "ddmore"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(stringr)
library(tidyverse)
```
```{r}
## one big df
data_dir <- "./testdata/nmoutputs"
file_paths <- list.files(data_dir, "\\.lst$", full.names = TRUE)
bdf <- function(.filename){
df <- tibble::tibble( file_name = as.character(basename(.filename)),
line_text = readr::read_lines(.filename)) %>%
mutate (line_number = row_number()) %>%
select(file_name, line_number, everything())
}
df1 <- map_df(file_paths, bdf)
df <- df1 %>%
mutate(has_dollar_est = grepl(x = df$line_text, pattern = "$EST", fixed = TRUE, useBytes = TRUE)) %>%
mutate(has_dollar_table = grepl(x = df$line_text, pattern = "$TABLE", fixed = TRUE, useBytes = TRUE)) %>%
mutate(has_dollar_input = grepl(x = df$line_text, pattern = "$INPUT", fixed = TRUE, useBytes = TRUE)) %>%
mutate(has_any_method = grepl(x = df$line_text, pattern = "METH", fixed = TRUE, useBytes = TRUE))
#df %>% dplyr::filter(has_dollar_est == TRUE)
#df %>% dplyr::filter(has_dollar_table == TRUE)
#df %>% dplyr::filter(has_dollar_input == TRUE)
#df %>% dplyr::filter(has_any_meth == TRUE)
```
```{r}
## one long list of lines
lines <- unlist(lapply(file_paths, readLines, skipNul = TRUE)) %>%
grep(pattern = "^[^;]", value = TRUE, useBytes = TRUE)
```
```{r}
## distinct list of lines
dlines <- unlist(lapply(file_paths, readLines, skipNul = TRUE)) %>%
grep(pattern = "^[^;]", value = TRUE, useBytes = TRUE) %>%
as_data_frame() %>%
distinct(value) %>%
as.list()
```
```{r}
## show lines around grep hit
show_lines <- function(.pattern, .line_count = 4){
ln <- grep(.pattern, lines, fixed = TRUE, useBytes = TRUE)
lapply(ln, function(.x){
start <- as.numeric(.x)
end <- as.numeric(.x+.line_count)
lines[start:end]
})
}
show_lines("$TABLE", .line_count = 4)
```
```{r}
grep("METH", lines, value=TRUE, fixed = TRUE, useBytes = TRUE) %>% as_data_frame() %>% distinct(value)
grep("METHOD", lines, value=TRUE, fixed = TRUE, useBytes = TRUE) %>% as_data_frame() %>% distinct(value)
grep("METHOD=", lines, value=TRUE, fixed = TRUE, useBytes = TRUE) %>% as_data_frame() %>% distinct(value)
grep("#METH", lines, value=TRUE, fixed = TRUE, useBytes = TRUE) %>% as_data_frame() %>% distinct(value)
grep("#METH", lines, value=TRUE, fixed = TRUE, useBytes = TRUE) %>% as_data_frame() %>% count()
grep("#METH", lines, value=TRUE, fixed = TRUE, useBytes = TRUE) %>% as_data_frame() %>% distinct(value)
```
```{r}
grep("^\\$EST", lines, value=TRUE, useBytes = TRUE ) %>%
as_data_frame() %>%
distinct(value)
```
```{r}
show_lines("$EST")
```
```{r}
show_lines("$INPUT", 8)
```
```{r}
show_lines("$MODEL")
```
```{r}
show_lines("$DATA",0)
grep("$DATA", lines, value=TRUE, fixed = TRUE, useBytes = TRUE) %>%
as_data_frame() %>%
distinct(value)
```