bibliometrix v2.1.0

============== New functionality: * biblioshiny networks are now plotted using VisNetwork package. * biblioshiny menu have been completely rewritten. Now, descriptive analyses are organized by the unit of analysis * Several descriptive plots and tables have been added * Added the new funtion authorProdOverTime to calclulate and plot the productivity over the time of the top authors * Added "measure" parameter in plotThematicEvolution * Added the new function bib2df. It give the possibility to import data from a "generic" bibtex file format (i.e. Zotero, JabRef, etc.) * Added the possibility to calculate H-index for sources
massimoaria · Jan 8, 2019 · 65417be · 65417be
1 parent b2cc57f
commit 65417be
Show file tree

Hide file tree

Showing 11 changed files with 1,707 additions and 746 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: bibliometrix
 Type: Package
 Title: An R-Tool for Comprehensive Science Mapping Analysis
-Version: 2.0.3
-Date: 2018-11-19
+Version: 2.1.0
+Date: 2019-01-06
 Authors@R: c(person("Massimo", "Aria", email = "[email protected]", role=c("cre","aut")),
              person("Corrado", "Cuccurullo", email = "[email protected]", role="aut"))
 Description: Tool for quantitative research in scientometrics and bibliometrics.

diff --git a/NAMESPACE b/NAMESPACE
@@ -5,6 +5,7 @@ S3method(summary,bibliometrix)
 S3method(summary,bibliometrix_netstat)
 export(Hindex)
 export(KeywordGrowth)
+export(authorProdOverTime)
 export(bib2df)
 export(biblioAnalysis)
 export(biblioNetwork)

diff --git a/NEWS b/NEWS
@@ -1,7 +1,11 @@
-bibliometrix v2.0.3 (Release date: 2018-11-19)
+bibliometrix v2.1.0 (Release date: 2019-01-06)
 ==============
 
 New functionality:
+* biblioshiny networks are now plotted using VisNetwork package.
+* biblioshiny menu have been completely rewritten. Now, descriptive analyses are organized by the unit of analysis
+* Several descriptive plots and tables have been added
+* Added the new funtion authorProdOverTime to calclulate and plot the productivity over the time of the top authors
 * Added "measure" parameter in plotThematicEvolution
 * Added the new function bib2df. It give the possibility to import data from a "generic" bibtex file format  (i.e. Zotero, JabRef, etc.)
 * Added the possibility to calculate H-index for sources
@@ -10,6 +14,7 @@ New functionality:
 Changes:
 * Completely rewritten the importing function from bibtex files
 * Solved an issue in dominance(). Now Dominance factor in correctly calculated
+* Solved sevaral issues in importing functions due to inconsistence in WOS/SCOPUS web-exporting procedures
 
 
 

diff --git a/R/Hindex.R b/R/Hindex.R
@@ -70,7 +70,7 @@ Hindex <- function(M, field="author", elements, sep = ";",years=10){
   ## identify manuscripts of the author or of the sources
 
 
-  H=data.frame(Element=elements,h_index=0,g_index=0,m_index=0,TC=0,NP=0)
+  H=data.frame(Element=elements,h_index=0,g_index=0,m_index=0,TC=0,NP=0, stringsAsFactors = FALSE)
   TotalCitations=list()
   for (j in 1:length(elements)){
     author=elements[j]
@@ -92,7 +92,7 @@ Hindex <- function(M, field="author", elements, sep = ";",years=10){
     }
     #TotalCitations[[j]]=data.frame(Year=as.numeric(M$PY[ind]),TC,Year2=sort(as.numeric(M$PY[ind])),TC2)
     if (length(ind)>0){
-    df=data.frame(Authors=substr(M$AU[ind], 1, 30),Journal=substr(M$SO[ind], 1, 30),Year=as.numeric(M$PY[ind]),TotalCitation=M$TC[ind])
+    df=data.frame(Authors=substr(M$AU[ind], 1, 30),Journal=substr(M$SO[ind], 1, 30),Year=as.numeric(M$PY[ind]),TotalCitation=M$TC[ind], stringsAsFactors = FALSE)
     TotalCitations[[j]]=df[order(df$TotalCitation),]
     }
   }

diff --git a/R/authorProdOverTime.R b/R/authorProdOverTime.R
@@ -0,0 +1,76 @@
+#' Top-Authors' Productivity over the Time
+#'
+#' It calculates and plots the author production (in terms of number of publications) over the time. 
+#' @param M is a bibliographic data frame obtained by \code{\link{convert2df}} function.
+#' @param k is a integer. It is the number of top auhtors to analize and plot. Default is \code{k = 10}.
+#' @param graph is logical. If TRUE the function plots the author production over time graph. Default is \code{graph = TRE}.
+#' @return The function \code{authorProdOverTime} returns a list containing two objects:
+#' \tabular{lll}{
+#' \code{dfAU}  \tab   \tab is a data frame\cr
+#' \code{graph}   \tab   \tab a ggplot object}
+#'
+#' @examples
+#' data(scientometrics)
+#' res <- authorProdOverTime(scientometrics, k=10)
+#' print(res$dfAU)
+#' plot(res$graph)
+#'
+#' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis
+#' @seealso \code{\link{summary}} method for class '\code{bibliometrix}'
+#' 
+#' @export
+#' 
+authorProdOverTime <- function(M,k=10, graph=TRUE){
+
+  M$TC=as.numeric(M$TC)
+  M$PY=as.numeric(M$PY)
+  AU=names(tableTag(M,"AU"))
+  k=min(k,length(AU))
+  AU=AU[1:k]
+  #AU=names(AU)
+  df=data.frame("Author"="NA","year"=NA, "TC"=NA,"TCpY"=NA,stringsAsFactors = FALSE)
+  Y=as.numeric(substr(Sys.time(),1,4))
+  for (i in 1:length(AU)){
+
+    ind=which(regexpr(AU[i],M$AU)>-1)
+    TCpY=M$TC[ind]/(Y-M$PY[ind]+1)
+    dfAU=data.frame("Author"=rep(AU[i],length(ind)),"year"=M$PY[ind],"TC"=M$TC[ind], "TCpY"=TCpY,stringsAsFactors = TRUE)
+    df=rbind(df,dfAU)
+  }
+  df=df[-1,]
+
+  df2<-dplyr::group_by(df, Author,year) %>%
+    dplyr::summarise(freq=length(year),TC=sum(TC),TCpY=sum(TCpY))
+
+  df2=as.data.frame(df2)
+  df2$Author=factor(df2$Author,levels=AU[1:k])
+  #theme_set(theme_bw())
+
+  g <- ggplot(df2, aes(Author, year))+
+    geom_point(aes(alpha=df2$TCpY,size = df2$freq), color="dodgerblue4")+ 
+    scale_size(range=c(2,6))+
+    scale_alpha(range=c(0.3,1))+
+    scale_y_continuous(breaks = seq(min(df2$year),max(df2$year), by=2))+
+    guides(size = guide_legend(order = 1, "N.Articles"), alpha = guide_legend(order = 2, "TC per Year"))+
+    theme(text = element_text(color = "#444444")
+          ,panel.background = element_rect(fill = 'gray97')
+          ,panel.grid.minor = element_line(color = '#FFFFFF')
+          ,panel.grid.major = element_line(color = '#FFFFFF')
+          ,plot.title = element_text(size = 24)
+          ,axis.title = element_text(size = 14, color = '#555555')
+          ,axis.title.y = element_text(vjust = 1, angle = 0, face="bold")
+          ,axis.title.x = element_text(hjust = .95, face="bold")
+          ,axis.text.x = element_text(face="bold")
+          ,axis.text.y = element_text(face="bold")
+    )+
+    labs(title="Top-Authors' Productivity over the Time", 
+         x="Author",
+         y="Year")+
+    geom_line(data=df2, aes(x = df2$Author, y = df2$year),size=1.0, color="firebrick", alpha=0.3 )+
+    scale_x_discrete(limits = rev(levels(df2$Author)))+
+    coord_flip()
+  res <- list(dfAU=df2,graph=g)
+  if (isTRUE(graph)){plot(g)}
+  return(res)
+}
+
diff --git a/R/biblioshiny.R b/R/biblioshiny.R
@@ -10,5 +10,6 @@
 #' @export
 
 biblioshiny <- function(){
+
   runApp(system.file("biblioshiny",package="bibliometrix"),launch.browser = TRUE)
 }
diff --git a/R/networkPlot.R b/R/networkPlot.R
@@ -90,7 +90,7 @@ networkPlot<-function(NetMatrix, normalize=NULL, n=NULL, degree=NULL, Title="Plo
 
 
   # vertex labels 
-  V(bsk.network)$name <- colnames(NET)
+  V(bsk.network)$name <- tolower(colnames(NET))
 
 
   # Compute node degrees (#links) and use that to set node size:
@@ -164,6 +164,8 @@ networkPlot<-function(NetMatrix, normalize=NULL, n=NULL, degree=NULL, Title="Plo
         if (q<0){q=0}
         q=quantile(V(bsk.network)$deg,q)
         LABEL[V(bsk.network)$deg<q]=""
+        V(bsk.network)$labelsize=10
+        V(bsk.network)$labelsize[V(bsk.network)$deg<q]=0
       }
     }
 
@@ -195,13 +197,13 @@ networkPlot<-function(NetMatrix, normalize=NULL, n=NULL, degree=NULL, Title="Plo
       plot(net_groups,bsk.network1, rescale=T, asp=0, ylim=c(-1,1), xlim=c(-1,1), layout = l, edge.curved=curved, 
            vertex.label.dist = 0.7, vertex.frame.color = adjustcolor('black',alpha), vertex.label.color = adjustcolor('black',min(c(1,alpha+0.1))),
            vertex.color=adjustcolor(V(bsk.network1)$color,alpha),
-           vertex.label.font = 2, vertex.label = tolower(LABEL), main=Title)
+           vertex.label.font = 2, vertex.label = LABEL, main=Title)
 
     }else{
       plot(bsk.network1, rescale=T, asp=0, ylim=c(-1,1), xlim=c(-1,1), layout = l, edge.curved=curved, 
            vertex.label.dist = 0.7, vertex.frame.color = adjustcolor('black',alpha), 
            vertex.color=adjustcolor(V(bsk.network1)$color,alpha),vertex.label.color = adjustcolor(lab.color, min(c(1,alpha+0.2))), 
-           vertex.label.font = 2, vertex.label = tolower(LABEL), main=Title, edge.color=adjustcolor(E(bsk.network1)$color,alpha/2))
+           vertex.label.font = 2, vertex.label = LABEL, main=Title, edge.color=adjustcolor(E(bsk.network1)$color,alpha/2))
     }
 
   }else{net_groups$modularity=rep(1,vcount(bsk.network))} 
@@ -214,7 +216,7 @@ networkPlot<-function(NetMatrix, normalize=NULL, n=NULL, degree=NULL, Title="Plo
   } else {cluster_res=NA}
 
 
-  net=list(graph=bsk.network, graph_pajek=bsk.save, cluster_obj=net_groups, cluster_res=cluster_res,layout=l)
+  net=list(graph=bsk.network1, graph_pajek=bsk.save, cluster_obj=net_groups, cluster_res=cluster_res,layout=l)
 
   return(net)}
 

diff --git a/R/summary.bibliometrix.R b/R/summary.bibliometrix.R
@@ -46,7 +46,7 @@ summary.bibliometrix<-function(object, ...){
   if (sum(names(arguments)=="pause")==0){pause=FALSE} else {pause=arguments$pause}
   if (sum(names(arguments)=="width")==0){options(width=120)} else {options(width=arguments$width)}
   if (sum(names(arguments)=="verbose")==0){verbose=TRUE} else {verbose=FALSE}
-
+  K=k
   Co=NULL
   AC=NULL
 
@@ -118,6 +118,9 @@ summary.bibliometrix<-function(object, ...){
 
   # Most Productive Authors
   if (isTRUE(verbose)){cat("\nMost Productive Authors\n\n")}
+  if (K==Inf){
+    k=length(object$Authors)
+  }
   A=data.frame(cbind(object$Authors[1:k]))
   A$MPA=row.names(A);A=A[,c(2,1)]
   A[,3:4]=object$AuthorsFrac[1:k,]
@@ -132,6 +135,9 @@ summary.bibliometrix<-function(object, ...){
 
   # Most Cited Manuscipts
   if (isTRUE(verbose)){cat("\nTop manuscripts per citations\n\n")}
+  if (K==Inf){
+    k=dim(object$MostCitedPapers)[1]
+  }
   MostCitedPapers=object$MostCitedPapers[1:k,]
   MostCitedPapers=format(MostCitedPapers,justify="left",digits=3)
   row.names(MostCitedPapers)=1:k
@@ -140,7 +146,9 @@ summary.bibliometrix<-function(object, ...){
   if (pause==TRUE & isTRUE(verbose)){
     cat("Hit <Return> to see next table: ")
     line <- readline()}
-
+  if (K==Inf){
+    k=length(object$Countries)
+  }
   kk=k
   if (!is.null(object$Countries)){
   # Most Productive Countries
@@ -190,6 +198,9 @@ summary.bibliometrix<-function(object, ...){
   if (!is.null(object$Sources)){
   # Most relevant Sources
     if (isTRUE(verbose)){cat("\nMost Relevant Sources\n\n")}
+    if (K==Inf){
+      k=length(object$Sources)
+    }
   kk=k
   if (length(object$Sources)<k){kk=length(object$Sources)}
   AA=data.frame(cbind(object$Sources[1:kk]))
@@ -208,6 +219,9 @@ summary.bibliometrix<-function(object, ...){
   if (!is.null(object$ID) & !is.null(object$DE)){
   # Most relevant Keywords
     if (isTRUE(verbose)){cat("\nMost Relevant Keywords\n\n")}
+    if (K==Inf){
+      k=min(c(length(object$DE),length(object$ID)))
+    }
   AAA=data.frame(cbind(object$DE[1:k]))
   AAA$MPA=row.names(AAA);AAA=AAA[,c(2,1)]
   names(AAA)=c("DE Keywords     ", "Articles")