Supplementary material (MATLAB script for generating the figures)

clear all;close all;clc

PP=0:1:100; % vector of percentiles

VT=logspace(log10(1),log10(2000),50);VT=[0 unique(round(VT))]; % vector of logarithmically spaced numbers between 0 and 2000

FILENAME='D:\jcfdewinter\My Documents\PONE_Citation_data\alm_report_2014-06-10.xlsx'; % last publication 9 June 2014

[~,~,raw]=xlsread(FILENAME,'A2:AA123255'); % import Excel file into the matrix 'raw'

date=datenum(raw(:,2));X=cell2mat(raw(:,4:end));

PONE=NaN(size(X,1),1);

for i=1:size(X,1)

try

PONE(i)=sign(cell2mat((strfind(raw(i,1),'.pone.')))); % code the PLOS ONE articles as 1

catch error

end

end

M1=NaN(length(PP)-1,1);M2=M1;M4=M1;M6=M1;nn1=M1;nn2=M1;nn3=M1;

M=NaN(length(VT)-1,size(X,2));

N=NaN(length(VT)-1,1);

temp=find(date>=datenum('1-jul-2012') & date <= datenum('31-jun-2013') & PONE==1); % PLOS ONE articles between 1 July 2012 and 31 June 2013

for i=1:length(VT)-1; % loop across the VT vector

temp2=find(X(temp,17)>=VT(i)& X(temp,17)<VT(i+1)); % find number of tweets

M(i,:)=mean(X(temp(temp2),:),1); % means as a function of number of tweets

end

for i=1:length(PP)-1; % loop across percentiles vector

if i==length(PP)-1;

temp2=temp(X(temp,20)>=prctile(X(temp,20),PP(i))& X(temp,20)<=prctile(X(temp,20),PP(i+1)) & X(temp,17)>-1); % find articles according to percentile of number of article views

else

temp2=temp(X(temp,20)>=prctile(X(temp,20),PP(i))& X(temp,20)<prctile(X(temp,20),PP(i+1)) & X(temp,17)>-1); % find articles according to percentile of number of article views

end

temp3a=temp2(X(temp2,17)==0); % 0 tweets

temp3b=temp2(X(temp2,17)>=1 & X(temp2,17)<=3); % between 1 and 3 tweets

temp3c=temp2(X(temp2,17)>3); % more than 3 tweets

M1(i)=nanmean(X(temp2,20),1); % mean number of article views

M2(i)=nanmean(X(temp3a,3),1); % mean number of citations

M4(i)=nanmean(X(temp3b,3),1); % mean number of citations

M6(i)=nanmean(X(temp3c,3),1); % mean number of citations

nn1(i)=length(temp3a);

nn2(i)=length(temp3b);

nn3(i)=length(temp3c);

end

disp('Number of articles')

disp(length(temp))

disp('Mean and SD number of tweets')

disp(round(100*[mean(X(temp,17)) std(X(temp,17))])/100)

disp('Percentage of articles with zero tweets')

disp(round(1000*mean(X(temp,17)==0))/10)

R=regress(zscore(tiedrank(X(temp,3))),[ones(length(temp),1) zscore(tiedrank([date(temp) X(temp,17)]))]); % regression analysis to predict citations from tweets

disp('Beta coefficients of regression analysis')

disp(round(R*100)/100)

R=regress(zscore(tiedrank(X(temp,3))),[ones(length(temp),1) zscore(tiedrank([date(temp) X(temp,[17 20])]))]); % regression analysis to predict citations from tweets and PLOS ONE article views

disp('Beta coefficients of regression analysis')

disp(round(R*100)/100)

R=regress(zscore(tiedrank(X(temp,20))),[ones(length(temp),1) zscore(tiedrank([date(temp) X(temp,17)]))]); % regression analysis to predict PLOS ONE article views from tweets

disp('Beta coefficients of regression analysis')

disp(round(R*100)/100)

R=regress(zscore(tiedrank(X(temp,22))),[ones(length(temp),1) zscore(tiedrank([date(temp) X(temp,17)]))]); % regression analysis to predict PMC article views from tweets

disp('Beta coefficients of regression analysis')

disp(round(R*100)/100)

R=regress(zscore(tiedrank(X(temp,2))),[ones(length(temp),1) zscore(tiedrank([date(temp) X(temp,17)]))]); % regression analysis to predict mendeley additions from tweets

disp('Beta coefficients of regression analysis')

disp(round(R*100)/100)

R=regress(zscore(tiedrank(X(temp,11))),[ones(length(temp),1) zscore(tiedrank([date(temp) X(temp,17)]))]); % regression analysis to predict facebook activity from tweets

disp('Beta coefficients of regression analysis')

disp(round(R*100)/100)

temp4=find( date>=datenum('1-jul-2012') & date <= datenum('31-jun-2013') & PONE==1 & X(:,17)>100 ); % find papers with more than 100 tweets

[a,b]=sort(X(temp4,17),'descend');temp4=temp4(b(1:15));

disp(raw(temp4,3))

disp([X(temp4,17) X(temp4,20) X(temp4,3)])

disp(round(100*mean(X(temp4,[17 20 3])))/100)

disp(round(100*mean(X(temp,[17 20 3])))/100)

%%

close all

% DD=NaN(108,1);

% c=0;for y=2006:2015;for i=1:12;c=c+1;str=[num2str(i) '-1-' num2str(y)];DD(c)=datenum(str);end;end

% DD=DD(1:end-7);

% CC=NaN(length(DD)-1,1);

% CC2=NaN(length(DD)-1,1);

% NN=NaN(length(DD)-1,1);

%

% for i=1:length(DD)-1;

% Y=find(date>=DD(i) & date<DD(i+1) & PONE==1); % look for year

% CC(i)=mean(X(Y,3)); % mean number of citations

% NN(i)=length(Y); % nummber of publications

% end

%

% figure;plot(16+DD(1:end-1), NN,'k-o','Linewidth',3)

% grid minor

% xlabel('Publication date','Fontsize',30)

% set(gca,'xtick',[datenum('1-jan-2006') datenum('1-jan-2007') datenum('1-jan-2008') datenum('1-jan-2009') datenum('1-jan-2010') datenum('1-jan-2011') datenum('1-jan-2012') datenum('1-jan-2013') datenum('1-jan-2014')])

% set(gca,'xticklabel',2006:2014)

% ylabel('Number of publications/month','Fontsize',30)

% h = findobj(gcf,'FontName','Helvetica');

% set(h,'FontSize',28)

% set(gca,'xlim',[datenum('1-jan-2007') datenum('1-jan-2015')])

%

% figure;plot(16+DD(1:end-1), CC,'k-o','Linewidth',3)

% grid minor

% xlabel('Publication date','Fontsize',30)

% ylabel('Mean number of citations','Fontsize',30)

% set(gca,'xtick',[datenum('1-jan-2006') datenum('1-jan-2007') datenum('1-jan-2008') datenum('1-jan-2009') datenum('1-jan-2010') datenum('1-jan-2011') datenum('1-jan-2012') datenum('1-jan-2013') datenum('1-jan-2014')])

% set(gca,'xticklabel',2006:2014)

% h = findobj(gcf,'FontName','Helvetica');

% set(h,'FontSize',28)

% h = findobj(gcf,'FontName','Helvetica');

% set(h,'FontSize',28)

% set(gca,'xlim',[datenum('1-jan-2007') datenum('1-jan-2015')])

figure;hold on

plot(M(:,17),log10(M(:,20)),'k-o','Linewidth',3,'Markersize',12);

plot(M(:,17),log10(M(:,22)),'-d','Linewidth',3,'color',[.8 .8 .8],'Markersize',12)

plot(M(:,17),log10(M(:,11)),'k-s','Linewidth',3,'markerfacecolor',[.8 .8 .8],'Markersize',12)

plot(M(:,17),log10(M(:,3)),'k-v','Linewidth',3,'markerfacecolor','w','Markersize',12)

plot(M(:,17),log10(M(:,2)),'k-*','Linewidth',3,'Markersize',12);

text(60,log10(50000),'PLOS ONE article views')

text(15,log10(1200),'PubMed Central article views')

text(75,log10(210),'Facebook likes/shares/posts/comments')

text(130,log10(30),'Mendeley additions')

text(100,log10(4),'Citations')

grid on

set(gca,'xlim',[0 300])

D=[1 2 3 5 10 20 30 50 100 200 300 500 1000 2000 3000 5000 10000 20000 30000 50000 100000];

set(gca,'ytick',log10(D))

set(gca,'yticklabel',D)

xlabel('Number of tweets','Fontsize',30)

ylabel('Activity count','Fontsize',30)

h = findobj(gcf,'FontName','Helvetica');

set(h,'FontSize',28)

V=PP(1:end-1)+mean(diff(PP))/2;

figure

plot(V,nn1,'k-o','Linewidth',3);hold on

plot(V,nn2,'-s','Linewidth',3,'color',[.4 .4 .4])

plot(V,nn3,'-^','Linewidth',3,'color',[.8 .8 .8])

grid on

legend('Articles without tweets','Articles with 1 to 3 tweets','Articles with more than 3 tweets',2)

set(gca,'xtick',[1 10:10:100]);

set(gca,'xticklabel',round(M1([1 10:10:100])));

xlabel('Number of article views','Fontsize',30)

ylabel('Number of articles','Fontsize',30)

h = findobj(gcf,'FontName','Helvetica');

set(h,'FontSize',28)

figure

plot(V,M2,'k-o','Linewidth',3);hold on

plot(V,M4,'-s','Linewidth',3,'color',[.4 .4 .4])

plot(V,M6,'-^','Linewidth',3,'color',[.8 .8 .8])

grid on

legend('Articles without tweets','Articles with 1 to 3 tweets','Articles with more than 3 tweets',2)

set(gca,'xtick',[1 10:10:100]);

set(gca,'xticklabel',round(M1([1 10:10:100])));

xlabel('Number of article views','Fontsize',30)

ylabel('Number of citations','Fontsize',30)

h = findobj(gcf,'FontName','Helvetica');

set(h,'FontSize',28)

set(gca,'ylim',[0 8])

% 1 citeulike

% 2 mendeley

% 3 crossref

% 4 datacite

% 5 pmceurope

% 6 pmceuropedata

% 7 pubmed

% 8 scopus

% 9 articlecoverage

% 10 articlecoveragecurated

% 11 facebook

% 12 plos_comments

% 13 nature

% 14 reddit

% 15 researchblogging

% 16 scienceseeker

% 17 twitter

% 18 wikipedia

% 19 wordpress

% 20 counter

% 21 figshare

% 22 pmc

% 23 relativemetric

% 24 f1000