Skip to content

Commit

Permalink
Next Release
Browse files Browse the repository at this point in the history
- Final for CRAN
  • Loading branch information
bedantaguru committed Aug 26, 2019
1 parent 4340c27 commit d830eb3
Show file tree
Hide file tree
Showing 60 changed files with 311 additions and 218 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Type: Package
Package: tidycells
Title: Read Tabular Data from Diverse Sources and Easily Make
Them Tidy
Version: 0.2.0.99
Version: 0.2.1
Authors@R:
person(given = "Indranil",
family = "Gayen",
Expand Down
7 changes: 5 additions & 2 deletions R/collate_columns.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,16 @@ collate_columns <- function(composed_data,
if (length(dcl) == 1) {
out_d <- dcl[[1]]

colnames(out_d) <- stringr::str_replace_all(colnames(out_d), "uncollated_", "old_uc_")
colnames(out_d) <- stringr::str_replace_all(colnames(out_d), "collated_", "old_c_")

restcols <- setdiff(colnames(out_d), defcols_this)
if (length(restcols) > 0) {
cn_map_0 <- tibble(cn = restcols) %>%
mutate(is_major = stringr::str_detect(tolower(cn), "major")) %>%
arrange(cn) %>%
mutate(sn = seq_along(cn), sn_m = sn + is_major * (10^10)) %>%
arrange(desc(sn_m)) %>%
mutate(sn = seq_along(cn), sn_m = sn - is_major * (10^10)) %>%
arrange(sn_m) %>%
mutate(fsn = seq_along(cn), new_cn = paste0("collated_", fsn)) %>%
select(cn, new_cn)

Expand Down
137 changes: 67 additions & 70 deletions R/compose_cells.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,51 +49,44 @@ compose_cells_raw <- function(ca, post_process = TRUE, attr_sep = " :: ",
if (!inherits(ca, "cell_analysis")) {
abort("A 'Cell Analysis' expected.")
}

dam <- ca$details$data_attr_map_raw

dam <- dam %>%
group_by(data_gid, direction_basic, direction_group) %>%
mutate(dist_order = dist %>% as.factor() %>% as.integer()) %>%
ungroup()

dam <- dam %>%
group_by(data_gid, attr_gid) %>%
mutate(attr_gid_split_order = attr_gid_split %>% as.factor() %>% as.integer()) %>%
ungroup()

fj_this <- function(x, y) {
fj(x, y,
join_by = c("row", "col", "value", "data_block"),
sallow_join = TRUE, sep = attr_sep
)
}


dcomp00 <- dam %>%
group_by(data_gid) %>%
group_split() %>%
map(~ .x %>%
group_by(attr_gid, direction, attr_gid_split) %>%
group_split())
group_by(attr_gid, direction, attr_gid_split) %>%
group_split())

dcomp0 <- dcomp00 %>%
map(~ .x %>%
# this try should be removed if unpivotr::enhead is internalized
# or similar behaving fucntions is developed.
map(~ {
e <- try(stitch_direction(.x, ca$cell_df, trace_it = trace_it_back), silent = TRUE)
.ok <- !inherits(e, "try-error")
.d <- NULL
if (!.ok) .d <- .x
list(ok = .ok, out = e, dat = .d)
}))
# this try should be removed if unpivotr::enhead is internalized
# or similar behaving fucntions is developed.
map(~ {
e <- try(stitch_direction(.x, ca$cell_df, trace_it = trace_it_back), silent = TRUE)
.ok <- !inherits(e, "try-error")
.d <- NULL
if (!.ok) .d <- .x
list(ok = .ok, out = e, dat = .d)
}))

chk0 <- dcomp0 %>%
map_lgl(~ .x %>%
map_lgl(~ !.x$ok) %>%
any()) %>%
map_lgl(~ !.x$ok) %>%
any()) %>%
any()

if (chk0) {
if (!silent) {
# Need to show user what has been missed
Expand All @@ -110,65 +103,69 @@ compose_cells_raw <- function(ca, post_process = TRUE, attr_sep = " :: ",
ok = "Yes", cancel = "No",
is_question = TRUE
)

if (identical(user_res, TRUE)) {
user_res <- "yes"
}

if (user_res == "yes") {
# return failed analysis part for observing
patched_ca <- ca

dp0 <- dcomp0 %>% map_df(~ .x %>%
map_lgl(~ !.x$ok) %>%
.x[.] %>%
map_df(~ .x$dat))
map_lgl(~ !.x$ok) %>%
.x[.] %>%
map_df(~ .x$dat))
patched_ca$details$data_attr_map_raw <- unique(dp0[colnames(patched_ca$details$data_attr_map_raw)])

warn(paste0(
"Failed portion of Cell-Analysis is returned",
"\nIn the plots you should see texts, only in failed attributes."
))

return(patched_ca)
}
}
}
}

dcomp0 <- dcomp0 %>% map(~ .x %>%
map_lgl(~ .x$ok) %>%
.x[.] %>%
map(~ .x$out))
map_lgl(~ .x$ok) %>%
.x[.] %>%
map(~ .x$out))

chk1 <- dcomp0 %>%
map_int(length) %>%
sum()

if (chk1 > 0) {
dcomp <- dcomp0 %>% map(~ reduce(.x, fj_this))
dcomp <- dcomp0 %>%
map(~ reduce(.x, fj,
join_by = c("row", "col", "value", "data_block"),
sallow_join = TRUE, sep = attr_sep
))
} else {
abort("Failed to compose")
}


if (print_col_info) {
dlinf <- dcomp %>% map(get_all_col_representative, cut_th = 4, lower_it = FALSE)

dlinfc <- dlinf %>% map(~ .x %>% purrr::imap_chr(~ paste0(" ", cli_bb(.y), "\n ", paste0(cli_g(.x), collapse = ", "))))
names(dlinfc) <- paste0("data_block = ", seq_along(dlinfc))

xmsg <- dlinfc %>%
purrr::imap_chr(~ paste0(cli_br(.y), "\n", paste0(.x, collapse = "\n"))) %>%
paste0(collapse = "\n")

cat(xmsg)
}

if (!post_process) {
return(invisible(dcomp))
}

compose_cells_raw_post_process(dcomp, details = details, discard_raw_cols = discard_raw_cols, attr_sep = attr_sep)
}

Expand All @@ -181,50 +178,50 @@ compose_cells_raw_post_process <- function(dcomp, details = FALSE, discard_raw_c
cns <- cns %>% setdiff(cns_trace)
cns_base <- c("row", "col", "data_block", "value")
cns <- cns %>% setdiff(cns_base)

cns_d <- tibble(cname = cns, cn = cns) %>%
tidyr::separate(cn, into = c("ag", "rc", "dir", "ad", "d"))


cns_d <- cns_d %>%
# anticlockwise
mutate(dir_n = recode(dir,
top = 1,
topLeft = 2,
left = 3,
bottomLeft = 4,
bottom = 5,
bottomRight = 6,
right = 7,
topRight = 8
top = 1,
topLeft = 2,
left = 3,
bottomLeft = 4,
bottom = 5,
bottomRight = 6,
right = 7,
topRight = 8
)) %>%
mutate(rc_n = recode(rc,
row = 1,
col = 2,
corner = 3
row = 1,
col = 2,
corner = 3
)) %>%
mutate(cname_ord = paste(rc_n, dir_n, ad, d, sep = "_"))



dcomp_r <- dcomp %>%
map(~ refine_cols(.x, cn_df = cns_d, sep = attr_sep)) %>%
bind_rows()

# add rc_df class
class(dcomp_r) <- c(class(dcomp_r), "rc_df") %>% unique()

this_cols <- colnames(dcomp_r)
f_cols <- c("row", "col", "data_block", "value")
this_cols <- this_cols %>% setdiff(f_cols)
nm_cols <- this_cols[stringr::str_detect(this_cols, "row|col|corner")]
m_cols <- this_cols %>% setdiff(nm_cols)

if (details) {
lo <- list(raw_data = dcomp_r, must_cols = f_cols, major_col = m_cols, minor_col = nm_cols)
return(lo)
}

if (discard_raw_cols) {
dcomp_r[c(f_cols, m_cols)]
} else {
Expand Down
4 changes: 3 additions & 1 deletion R/read_cells_stages.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ do_collate <- function(at_level, this_level, out_l, simplify, simple) {
dcl <- list(out_l$final_composition)
}

out_l$final <- dcl %>% map_df(~ collate_columns(.x) %>% as_tibble())
out_l$final <- dcl %>%
map(~ collate_columns(.x, retain_cell_address = TRUE)) %>%
collate_columns()

out_l$stage <- read_cell_task_orders[6]
if (simplify) {
Expand Down
4 changes: 2 additions & 2 deletions R/reduce_2dfs.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ get_connected_cols <- function(col_map_with_dist) {
}

reduce_2dfs <- function(dc1, dc2, combine_th = 0.6, rest_cols = Inf, retain_other_cols = FALSE) {
colnames(dc1) <- stringr::str_replace_all(colnames(dc1), "collated_", "d1_old_c_")
colnames(dc1) <- stringr::str_replace_all(colnames(dc1), "uncollated_", "d1_old_uc_")
colnames(dc1) <- stringr::str_replace_all(colnames(dc1), "collated_", "d1_old_c_")

colnames(dc2) <- stringr::str_replace_all(colnames(dc2), "collated_", "d2_old_c_")
colnames(dc2) <- stringr::str_replace_all(colnames(dc2), "uncollated_", "d2_old_uc_")
colnames(dc2) <- stringr::str_replace_all(colnames(dc2), "collated_", "d2_old_c_")


cr1 <- get_all_col_representative(dc1)
Expand Down
11 changes: 11 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ To start with `tidycells`, I invite you to see `vignette("tidycells-intro")` or
## Quick Overview

Let's take a quick look at an example data as given in

```{r, eval=FALSE}
system.file("extdata", "marks.xlsx", package = "tidycells", mustWork = TRUE)
```
Expand All @@ -133,6 +134,7 @@ knitr::include_graphics("vignettes/ext/marks.png")
Let's try `tidycells` functions in this data

Read at once

```{r, eval=FALSE}
# you should have tidyxl installed
system.file("extdata", "marks.xlsx", package = "tidycells", mustWork = TRUE) %>%
Expand Down Expand Up @@ -161,13 +163,17 @@ d <- system.file("extdata", "marks.xlsx", package = "tidycells", mustWork = TRUE
read_cells(at_level = "make_cells") %>%
.[[1]]
```

Or

```{r}
# or you may do
d <- system.file("extdata", "marks_cells.rds", package = "tidycells", mustWork = TRUE) %>%
readRDS()
```

Then

```{r}
d <- numeric_values_classifier(d)
da <- analyze_cells(d)
Expand All @@ -182,7 +188,9 @@ dc <- compose_cells(da, print_attribute_overview = TRUE)
knitr::include_graphics("vignettes/ext/compose_cells_cli1.png")
dc <- compose_cells(da)
```

If you want a well-aligned columns then you may like to do

```{r}
# bit tricky and tedious unless you do print_attribute_overview = TRUE in above line
dcfine <- dc %>%
Expand All @@ -206,6 +214,7 @@ dcfine <- dc %>%
```

`head(dcfine)` looks like

```{r, echo=FALSE}
knitr::kable(head(dcfine), align = c(rep("l", 3), "c"))
```
Expand Down Expand Up @@ -277,6 +286,8 @@ The `readabs` package helps you easily download, import, and tidy time series da
Gives ability for choosing any rectangular data file using interactive GUI dialog box, and seamlessly manipulating tidy data between an 'Excel' window and R session.
* The [tidyABS](https://github.com/ianmoran11/tidyABS) package:
The `tidyABS` package converts ABS excel tables to tidy data frames. It uses rules-of-thumb to determine the structure of excel tables, however it sometimes requires pointers from the user. This package is in early development.
* The [hypoparsr](https://github.com/tdoehmen/hypoparsr) package:
This package takes a different approach to CSV parsing by creating different parsing hypotheses for a given file and ranking them based on data quality features.


## Acknowledgement
Expand Down
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,9 @@ After this you need to run `compose_cells` (with argument
dc <- compose_cells(da, print_attribute_overview = TRUE)
```

<img src="vignettes/ext/compose_cells_cli1.png" width="451px" /> If you
want a well-aligned columns then you may like to
<img src="vignettes/ext/compose_cells_cli1.png" width="451px" />

If you want a well-aligned columns then you may like to
do

``` r
Expand Down Expand Up @@ -339,6 +340,10 @@ level only.
uses rules-of-thumb to determine the structure of excel tables,
however it sometimes requires pointers from the user. This package
is in early development.
- The [hypoparsr](https://github.com/tdoehmen/hypoparsr) package: This
package takes a different approach to CSV parsing by creating
different parsing hypotheses for a given file and ranking them based
on data quality features.

## Acknowledgement

Expand Down
Loading

0 comments on commit d830eb3

Please sign in to comment.