ggplot2警告许多“因素中的重复级别已被弃用”

我已经创建了一个函数,可以同时绘制来自多个因子分析的负载,也就是当它们的变量没有完全重叠(或根本不重叠)时.它工作正常,除了它生成了一些“因素被弃用的重复级别”警告,我不明白为什么.

代码允许应该是可重现的.

library(devtools)
source_url("https://raw.githubusercontent.com/Deleetdk/psych2/master/psych2.R")

loadings.plot2 = function(fa.objects, fa.names="") {
  fa.num = length(fa.objects) #number of fas

  if (fa.names=="") {
    fa.names = str_c("fa.", 1:fa.num)
  }
  if (length(fa.names) != fa.num) {
    stop("Names vector does not match the number of factor analyses.")
  }

  #merge into df
  d = data.frame() #to merge into
  for (fa.idx in 1:fa.num) { #loop over fa objects
    loads = fa.objects[[fa.idx]]$loadings
    rnames = rownames(loads)
    loads = as.data.frame(as.vector(loads))
    rownames(loads) = rnames
    colnames(loads) = fa.names[fa.idx]

    d = merge.datasets(d, loads, 1)
  }


  #reshape to long form
  d2 = reshape(d,
               varying = 1:fa.num,
               direction="long",
               ids = rownames(d))
  d2$time = as.factor(d2$time)
  d2$id = as.factor(d2$id)
  print(d2)
  print(levels(d2$time))
  print(levels(d2$id))

  #plot
  g = ggplot(reorder_by(id, ~ fa, d2), aes(x=fa, y=id, color=time)) +
      geom_point() +
      xlab("Loading") + ylab("Indicator") +
      scale_color_discrete(name="Analysis",
                           labels=fa.names)

  return(g)
}

fa1 = fa(iris[-5])
fa2 = fa(iris[-c(1:50),-5])
fa3 = fa(ability)
fa4 = fa(ability[1:50,])

loadings.plot2(list(fa1))
loadings.plot2(list(fa1,fa2))
loadings.plot2(list(fa1,fa2,fa3))
loadings.plot2(list(fa1,fa2,fa3,fa4))

绘制不同数量的因子会产生不同数量的错误.

我已经尝试将变量设置为as.factor,然后再将它们提供给ggplot,但它没有改变任何东西.

有任何想法吗?也许与reorder_by()有关?需要此函数来对data.frame进行排序,否则ggplot会按字母顺序对它们进行排序,这是无用的.

最佳答案 正如评论中所提到的,此警告是由使用reorder_by()函数引起的,但仅与ggplot2一起使用.具体而言,这些级别由于某种原因重复:

#> levels(d2$id)
[1] "Sepal.Width"  "Sepal.Width"  "Sepal.Length" "Sepal.Length" "Petal.Width"  "Petal.Width"  "Petal.Length"
[8] "Petal.Length"

ggplot2不喜欢重复级别,因此给出了警告.

如果有人感兴趣,我写了新的代码来自我重新调整,以避免这个问题,并避免对plotflow包的依赖.

新功能是这样的:

#' Plot multiple factor loadings in one plot.
#'
#' Returns a ggplot2 plot with sorted loadings colored by the analysis they belong to. Supports reversing óf any factors that are reversed. Dodges to avoid overplotting. Only works for factor analyses with 1 factor solutions!
#' @param fa_objects (list of fa-class objects) Factor analyses objects from the fa() function from the \code{\link{psych}} package.
#' @param fa_labels (chr vector) Names of the analyses. Defaults to fa.1, fa.2, etc..
#' @param reverse_vector (num vector) Vector of numbers to use for reversing factors. Use e.g. c(1, -1) to reverse the second factor. Defaults not reversing.
#' @param reorder (chr scalar or NA) Which factor analysis to order the loadings by. Can be integers that reprensent each factor analysis. Can also be "mean", "median" to use the means and medians of the loadings. Use "all" for the old method. Default = "mean".
#' @export
#' @examples
#' library(psych)
#' plot_loadings_multi(fa(iris[-5])) #extract a factor and reverse
plot_loadings_multi = function (fa_objects, fa_labels, reverse_vector = NA, reorder = "mean") {
  library("stringr")
  library("ggplot2")
  library("plyr")

  fa_num = length(fa_objects)
  fa_names = str_c("fa.", 1:fa_num)
  if (!is.list(fa_objects)) {
    stop("fa_objects parameter is not a list.")
  }
  if (class(fa_objects) %in% c("psych", "fa")) {
    fa_objects = list(fa_objects)
    fa_num = length(fa_objects)
    fa_names = str_c("fa.", 1:fa_num)
  }
  if (missing("fa_labels")) {
    if (!is.null(names(fa_objects))) {
      fa_labels = names(fa_objects)
    }
    else {
      fa_labels = fa_names
    }
  }
  if (length(fa_labels) != fa_num) {
    stop("Factor analysis labels length is not identical to number of analyses.")
  }
  if (all(is.na(reverse_vector))) {
    reverse_vector = rep(1, fa_num)
  }
  else if (length(reverse_vector) != fa_num) {
    stop("Length of reversing vector does not match number of factor analyses.")
  }
  d = data.frame()
  for (fa.idx in 1:fa_num) {
    loads = fa_objects[[fa.idx]]$loadings * reverse_vector[fa.idx]
    rnames = rownames(loads)
    loads = as.data.frame(as.vector(loads))
    rownames(loads) = rnames
    colnames(loads) = fa_names[fa.idx]
    suppressor({
      d = merge_datasets(d, loads, 1)
    })
  }
  d2 = reshape(d, varying = 1:fa_num, direction = "long", ids = rownames(d))
  d2$time = as.factor(d2$time)
  d2$id = as.factor(d2$id)
  colnames(d2)[2] = "fa"

  #reorder factor?
  if (!is.na(reorder)) {
    if (reorder == "all") {
      library("plotflow")

      silence({
        d2 = reorder_by(id, ~fa, d2)
      })
    } else if (reorder == "mean") {
      v_aggregate_values = daply(d2, .(id), function(x) {
        mean(x$fa)
      })

      #re-level
      d2$id = factor(d2$id, levels = names(sort(v_aggregate_values, decreasing = F)))

    } else if (reorder == "median") {
      v_aggregate_values = daply(d2, .(id), function(x) {
        median(x$fa)
      })

      #re-level
      d2$id = factor(d2$id, levels = names(sort(v_aggregate_values, decreasing = F)))

    } else {
      d2_sub = d2[d2$time == reorder, ] #subset the analysis whose loading is to be used for the reorder
      silence({
        d2_sub = reorder_by(id, ~fa, d2_sub)
      })

      library(gdata)
      d2$id = reorder.factor(d2$id, new.order = levels(d2_sub$id))
    }
  }

  #plot
  g = ggplot(d2, aes(x = id, y = fa, color = time, group = time)) +
    geom_point(position = position_dodge(width = 0.5)) +
    ylab("Loading") + xlab("Indicator") + scale_color_discrete(name = "Analysis",
                                                               labels = fa_labels) + coord_flip()
  return(g)
}

library(psych)
fa_1 = fa(iris[-5])
fa_2 = fa(iris[1:125, -5])
plot_loadings_multi(list(fa_1, fa_2), reorder = "mean")

在没有警告的情况下生成以下图:

《ggplot2警告许多“因素中的重复级别已被弃用”》

代码来自my personal package.

点赞