Supplementary material (MATLAB script for generating the figures)
clear all;close all;clc
PP=0:1:100; % vector of percentiles
VT=logspace(log10(1),log10(2000),50);VT=[0 unique(round(VT))]; % vector of logarithmically spaced numbers between 0 and 2000
FILENAME='D:\jcfdewinter\My Documents\PONE_Citation_data\alm_report_2014-06-10.xlsx'; % last publication 9 June 2014
[~,~,raw]=xlsread(FILENAME,'A2:AA123255'); % import Excel file into the matrix 'raw'
date=datenum(raw(:,2));X=cell2mat(raw(:,4:end));
PONE=NaN(size(X,1),1);
for i=1:size(X,1)
try
PONE(i)=sign(cell2mat((strfind(raw(i,1),'.pone.')))); % code the PLOS ONE articles as 1
catch error
end
end
M1=NaN(length(PP)-1,1);M2=M1;M4=M1;M6=M1;nn1=M1;nn2=M1;nn3=M1;
M=NaN(length(VT)-1,size(X,2));
N=NaN(length(VT)-1,1);
temp=find(date>=datenum('1-jul-2012') & date <= datenum('31-jun-2013') & PONE==1); % PLOS ONE articles between 1 July 2012 and 31 June 2013
for i=1:length(VT)-1; % loop across the VT vector
temp2=find(X(temp,17)>=VT(i)& X(temp,17)<VT(i+1)); % find number of tweets
M(i,:)=mean(X(temp(temp2),:),1); % means as a function of number of tweets
end
for i=1:length(PP)-1; % loop across percentiles vector
if i==length(PP)-1;
temp2=temp(X(temp,20)>=prctile(X(temp,20),PP(i))& X(temp,20)<=prctile(X(temp,20),PP(i+1)) & X(temp,17)>-1); % find articles according to percentile of number of article views
else
temp2=temp(X(temp,20)>=prctile(X(temp,20),PP(i))& X(temp,20)<prctile(X(temp,20),PP(i+1)) & X(temp,17)>-1); % find articles according to percentile of number of article views
end
temp3a=temp2(X(temp2,17)==0); % 0 tweets
temp3b=temp2(X(temp2,17)>=1 & X(temp2,17)<=3); % between 1 and 3 tweets
temp3c=temp2(X(temp2,17)>3); % more than 3 tweets
M1(i)=nanmean(X(temp2,20),1); % mean number of article views
M2(i)=nanmean(X(temp3a,3),1); % mean number of citations
M4(i)=nanmean(X(temp3b,3),1); % mean number of citations
M6(i)=nanmean(X(temp3c,3),1); % mean number of citations
nn1(i)=length(temp3a);
nn2(i)=length(temp3b);
nn3(i)=length(temp3c);
end
disp('Number of articles')
disp(length(temp))
disp('Mean and SD number of tweets')
disp(round(100*[mean(X(temp,17)) std(X(temp,17))])/100)
disp('Percentage of articles with zero tweets')
disp(round(1000*mean(X(temp,17)==0))/10)
R=regress(zscore(tiedrank(X(temp,3))),[ones(length(temp),1) zscore(tiedrank([date(temp) X(temp,17)]))]); % regression analysis to predict citations from tweets
disp('Beta coefficients of regression analysis')
disp(round(R*100)/100)
R=regress(zscore(tiedrank(X(temp,3))),[ones(length(temp),1) zscore(tiedrank([date(temp) X(temp,[17 20])]))]); % regression analysis to predict citations from tweets and PLOS ONE article views
disp('Beta coefficients of regression analysis')
disp(round(R*100)/100)
R=regress(zscore(tiedrank(X(temp,20))),[ones(length(temp),1) zscore(tiedrank([date(temp) X(temp,17)]))]); % regression analysis to predict PLOS ONE article views from tweets
disp('Beta coefficients of regression analysis')
disp(round(R*100)/100)
R=regress(zscore(tiedrank(X(temp,22))),[ones(length(temp),1) zscore(tiedrank([date(temp) X(temp,17)]))]); % regression analysis to predict PMC article views from tweets
disp('Beta coefficients of regression analysis')
disp(round(R*100)/100)
R=regress(zscore(tiedrank(X(temp,2))),[ones(length(temp),1) zscore(tiedrank([date(temp) X(temp,17)]))]); % regression analysis to predict mendeley additions from tweets
disp('Beta coefficients of regression analysis')
disp(round(R*100)/100)
R=regress(zscore(tiedrank(X(temp,11))),[ones(length(temp),1) zscore(tiedrank([date(temp) X(temp,17)]))]); % regression analysis to predict facebook activity from tweets
disp('Beta coefficients of regression analysis')
disp(round(R*100)/100)
temp4=find( date>=datenum('1-jul-2012') & date <= datenum('31-jun-2013') & PONE==1 & X(:,17)>100 ); % find papers with more than 100 tweets
[a,b]=sort(X(temp4,17),'descend');temp4=temp4(b(1:15));
disp(raw(temp4,3))
disp([X(temp4,17) X(temp4,20) X(temp4,3)])
disp(round(100*mean(X(temp4,[17 20 3])))/100)
disp(round(100*mean(X(temp,[17 20 3])))/100)
%%
close all
% DD=NaN(108,1);
% c=0;for y=2006:2015;for i=1:12;c=c+1;str=[num2str(i) '-1-' num2str(y)];DD(c)=datenum(str);end;end
% DD=DD(1:end-7);
% CC=NaN(length(DD)-1,1);
% CC2=NaN(length(DD)-1,1);
% NN=NaN(length(DD)-1,1);
%
% for i=1:length(DD)-1;
% Y=find(date>=DD(i) & date<DD(i+1) & PONE==1); % look for year
% CC(i)=mean(X(Y,3)); % mean number of citations
% NN(i)=length(Y); % nummber of publications
% end
%
% figure;plot(16+DD(1:end-1), NN,'k-o','Linewidth',3)
% grid minor
% xlabel('Publication date','Fontsize',30)
% set(gca,'xtick',[datenum('1-jan-2006') datenum('1-jan-2007') datenum('1-jan-2008') datenum('1-jan-2009') datenum('1-jan-2010') datenum('1-jan-2011') datenum('1-jan-2012') datenum('1-jan-2013') datenum('1-jan-2014')])
% set(gca,'xticklabel',2006:2014)
% ylabel('Number of publications/month','Fontsize',30)
% h = findobj(gcf,'FontName','Helvetica');
% set(h,'FontSize',28)
% set(gca,'xlim',[datenum('1-jan-2007') datenum('1-jan-2015')])
%
% figure;plot(16+DD(1:end-1), CC,'k-o','Linewidth',3)
% grid minor
% xlabel('Publication date','Fontsize',30)
% ylabel('Mean number of citations','Fontsize',30)
% set(gca,'xtick',[datenum('1-jan-2006') datenum('1-jan-2007') datenum('1-jan-2008') datenum('1-jan-2009') datenum('1-jan-2010') datenum('1-jan-2011') datenum('1-jan-2012') datenum('1-jan-2013') datenum('1-jan-2014')])
% set(gca,'xticklabel',2006:2014)
% h = findobj(gcf,'FontName','Helvetica');
% set(h,'FontSize',28)
% h = findobj(gcf,'FontName','Helvetica');
% set(h,'FontSize',28)
% set(gca,'xlim',[datenum('1-jan-2007') datenum('1-jan-2015')])
figure;hold on
plot(M(:,17),log10(M(:,20)),'k-o','Linewidth',3,'Markersize',12);
plot(M(:,17),log10(M(:,22)),'-d','Linewidth',3,'color',[.8 .8 .8],'Markersize',12)
plot(M(:,17),log10(M(:,11)),'k-s','Linewidth',3,'markerfacecolor',[.8 .8 .8],'Markersize',12)
plot(M(:,17),log10(M(:,3)),'k-v','Linewidth',3,'markerfacecolor','w','Markersize',12)
plot(M(:,17),log10(M(:,2)),'k-*','Linewidth',3,'Markersize',12);
text(60,log10(50000),'PLOS ONE article views')
text(15,log10(1200),'PubMed Central article views')
text(75,log10(210),'Facebook likes/shares/posts/comments')
text(130,log10(30),'Mendeley additions')
text(100,log10(4),'Citations')
grid on
set(gca,'xlim',[0 300])
D=[1 2 3 5 10 20 30 50 100 200 300 500 1000 2000 3000 5000 10000 20000 30000 50000 100000];
set(gca,'ytick',log10(D))
set(gca,'yticklabel',D)
xlabel('Number of tweets','Fontsize',30)
ylabel('Activity count','Fontsize',30)
h = findobj(gcf,'FontName','Helvetica');
set(h,'FontSize',28)
V=PP(1:end-1)+mean(diff(PP))/2;
figure
plot(V,nn1,'k-o','Linewidth',3);hold on
plot(V,nn2,'-s','Linewidth',3,'color',[.4 .4 .4])
plot(V,nn3,'-^','Linewidth',3,'color',[.8 .8 .8])
grid on
legend('Articles without tweets','Articles with 1 to 3 tweets','Articles with more than 3 tweets',2)
set(gca,'xtick',[1 10:10:100]);
set(gca,'xticklabel',round(M1([1 10:10:100])));
xlabel('Number of article views','Fontsize',30)
ylabel('Number of articles','Fontsize',30)
h = findobj(gcf,'FontName','Helvetica');
set(h,'FontSize',28)
figure
plot(V,M2,'k-o','Linewidth',3);hold on
plot(V,M4,'-s','Linewidth',3,'color',[.4 .4 .4])
plot(V,M6,'-^','Linewidth',3,'color',[.8 .8 .8])
grid on
legend('Articles without tweets','Articles with 1 to 3 tweets','Articles with more than 3 tweets',2)
set(gca,'xtick',[1 10:10:100]);
set(gca,'xticklabel',round(M1([1 10:10:100])));
xlabel('Number of article views','Fontsize',30)
ylabel('Number of citations','Fontsize',30)
h = findobj(gcf,'FontName','Helvetica');
set(h,'FontSize',28)
set(gca,'ylim',[0 8])
% 1 citeulike
% 2 mendeley
% 3 crossref
% 4 datacite
% 5 pmceurope
% 6 pmceuropedata
% 7 pubmed
% 8 scopus
% 9 articlecoverage
% 10 articlecoveragecurated
% 11 facebook
% 12 plos_comments
% 13 nature
% 14 reddit
% 15 researchblogging
% 16 scienceseeker
% 17 twitter
% 18 wikipedia
% 19 wordpress
% 20 counter
% 21 figshare
% 22 pmc
% 23 relativemetric
% 24 f1000