function results=Experiments(dataset,frac,probLabeled,algo,lambda,lambda_u,multiple_switches) % Reproduces Experimental results of the paper: % "Large Scale Semi-supervised Linear SVMs" % V. Sindhwani & S.S.Keerthi % Book Chapter in Large Scale Kernel Machines, % Eds: Bottou, Decoste, Chapelle,Weston. 2006 % A version of this paper appeared in ACM SIGIR 2006. % % % Inputs: % % dataset: 'aut-avn','real-sim','ccat','gcat','33-36','pcmac' % frac: specifies what fraction of the full data to be used as % training (labeled & unlabeled). % 1-frac is the fraction used as test examples. % probLabeled: specifies what fraction of training data to be used as % labeled. Can be a vector. % algo: algorithm to use: (0: rls, 1:l2svm 2:tsvm 3:da) % lambda: regularization parameter (default 0.001 was used for all % experiments) % lambda_u: regularization parameter (used 1 for all datasets except % aut-avn, ccat where 10 was used. % multiple_switches: for TSVM, how many switches to use (set 1 for single % switching, Inf (u/2) for maximum switching) % Use the following values -- % % aut-avn, real-sim: % frac=0.5, probLabeled=[0.00125 0.0025 0.0050 0.0100 0.0200 0.0400]; % ccat, gcat: % frac=0.75, probLabeled=[0.0025 0.0050 0.0100 0.0200 0.0400 0.0800]; % 33-36: frac=0.5, probLabeled=[0.00125 0.0025 0.0050 0.0100 0.0200 0.0400]; % pcmac: frac=0.75,probLabeled=[0.0250 0.0500 0.0750 0.1000 0.1250 0.1500]; % seed=1e5; results.seed=seed; results.algo=algo; results.Dataset=dataset; results.probLabeled=probLabeled; results.numLabeled=zeros(length(probLabeled),1); load(dataset); r=sum(Y>0)/length(Y); if exist('multiple_switches','var') if isinf(multiple_switches) S=length(Y); else S=multiple_switches; end end cmdline=[' -A ' num2str(algo) ' -W ' num2str(lambda) ' -U ' num2str(lambda_u) ' -R ' num2str(r) ' -S ' num2str(S)]; dataset cmdline [pos,neg]=make_splits(Y,seed); n=ceil(frac*length(Y)); results.TrainSize=n; results.TestSize=length(Y)-n; np=ceil(r*n); nn=n-np; numLabeled=ceil(probLabeled*n); results.numLabeled=numLabeled; for i = 1:size(pos,1) for j = 1:length(probLabeled) l=numLabeled(j); lp=ceil(l*r); ln=l-lp; L=[pos(i,1:lp) neg(i,1:ln)]; U=[pos(i,lp+1:np) neg(i,ln+1:nn)]; T=[pos(i,np+1:end) neg(i,nn+1:end)]; Ytrain=zeros(length([L U]),1); Ytrain(1:length(L))=Y(L); [w,o,f]=svmlin(cmdline,X([L U]',:),Ytrain); results.time(i,j)=f; error_u(i,j)=performance(o(l+1:end),Y(U)); [w,o]=SVMlin([],X(T',:),[],w); error_t(i,j)=performance(o,Y(T)); fprintf(1,'transduction error (split %d , lab %d ) = %f (time %f)\n',i,l, error_u(i,j),f); fprintf(1,' test error (split %d , lab %d ) = %f\n',i,l, error_t(i,j)); end end results.Unlab.error=error_u; results.Test.error=error_t; function [pos,neg]=make_splits(Y,seed) idx_pos=find(Y>0); idx_pos=idx_pos'; idx_neg=find(Y<0); idx_neg=idx_neg'; rand('seed',seed); pos=zeros(10,length(idx_pos)); neg=zeros(10,length(idx_neg)); for i=1:10 pos(i,:)=idx_pos(randperm(length(idx_pos))); neg(i,:)=idx_neg(randperm(length(idx_neg))); end function error=performance(f,y) guess=sign(f); error=1-sum(guess==y)/length(y);