-- 作者:admin
-- 发布时间:2011/2/3 17:39:02
-- PageRank计算方法的SQL实现
1、表准备 存储所有网页的基本信息 CREATE TABLE [WebPages] ( [DOCID] [int] IDENTITY (1, 1) NOT NULL , [URL] [varchar] (900) COLLATE Chinese_PRC_CI_AS NULL , [PAGETEXT] [text] COLLATE Chinese_PRC_CI_AS NULL , [REFCOUNT] [int] NULL , [WEIGHT] [float] NULL , CONSTRAINT [DOCID] PRIMARY KEY CLUSTERED ( [DOCID] ) ON [PRIMARY] ) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY] GO
存储网页链接关系 CREATE TABLE [PageLinks] ( [docid] [int] NULL , [url] [varchar] (900) COLLATE Chinese_PRC_CI_AS NULL , [anchordocid] [int] NULL , [anchorurl] [varchar] (900) COLLATE Chinese_PRC_CI_AS NULL ) ON [PRIMARY] GO
2、执行脚本,填充WebPages中的REFCOUNT,使之成为链出网页的数量 declare cur_webpages cursor for select docid from webpages
declare @docid int open cur_webpages fetch next from cur_webpages into @docid while (@@fetch_status=0) begin declare @rcount int select @rcount=count(*) from pagelinks where docid=@docid update webpages set refcount=@rcount WHERE CURRENT OF cur_webpages fetch next from cur_webpages into @docid end close cur_webpages deallocate cur_webpages
3、填充初始值 update webpages set weight=1.0/(select count(*) from webpages) update webpages set REVWEIGHT=0
4、计算PageRank的SQL脚本 declare @count int set @count=0
while(@count<=7) begin declare cur_webpages cursor for select docid,refcount,weight,revweight from webpages declare @docid int declare @refcount int declare @weight float declare @revweight float if(@count%2=0) select docid,weight into pagebak from webpages else select docid,revweight as weight into pagebak from webpages CREATE INDEX [docid] ON [dbo].[pagebak]([docid]) open cur_webpages fetch next from cur_webpages into @docid,@refcount,@weight,@revweight while (@@fetch_status=0) begin if @refcount<>0 begin if(@count%2=0) update pagebak set weight=weight+(@weight/@refcount) where docid in (select anchordocid from pagelinks where docid=@docid) else update pagebak set weight=weight+(@revweight/@refcount) where docid in (select anchordocid from pagelinks where docid=@docid) end fetch next from cur_webpages into @docid,@refcount,@weight,@revweight end close cur_webpages deallocate cur_webpages if(@count%2=0) begin update webpages set revweight=0.2+(select weight from pagebak where pagebak.docid=webpages.docid) update webpages set revweight=revweight/(select sum(revweight) from webpages) end else begin update webpages set weight=0.2+(select weight from pagebak where pagebak.docid=webpages.docid) update webpages set weight=weight/(select sum(weight) from webpages) end drop table pagebak select * from webpages set @count=@count+1 end
|