Demo entry 6626495

word

   

Submitted by anonymous on Jun 25, 2017 at 16:31
Language: Java. Code size: 5.3 kB.

package pagerank;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Map.Entry;

public class PageRankBasic {
	static String DataPath = "files\\WikiData.txt";
	static List<Edge> edgeList = new ArrayList<Edge>();//存储所有的边
	static Set<Integer> nodeList = new TreeSet<Integer>();//存储所有的节点
	static int[] outDegree;//记录节点的出度
	static int maxNode;//最大的节点序号
	static double m[][];//转移矩阵
	static double[] oldScore;//计算前的PageRank值
	static double[] newScore;//计算后的PageRank值
	static int edgeNum;//链接总数
	static int nodeNum;//节点总数
	static int deadNum;//终止节点总数
	static int inZero;//入度为0的节点数
	//存储最终的排序结果
	static TreeMap<Double,TreeSet<Integer>> result = 
			new TreeMap<Double,TreeSet<Integer>>(new sortComparator());
	
	//读文件
	public static void readFile(String filePath) throws IOException{
		BufferedReader buf;
		buf = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), "utf8"));
		String line = "";
		while((line = buf.readLine()) != null){
			String[] elements = line.split("\t");
			Edge tmpEdege = new Edge(Integer.parseInt(elements[0]),Integer.parseInt(elements[1]));
			edgeList.add(tmpEdege);//添加链接
			nodeList.add(Integer.parseInt(elements[0]));//添加节点
			nodeList.add(Integer.parseInt(elements[1]));
		}
		buf.close();
	}
	//计算PageRank score
	public static void pageRank(double beta, double threshold) throws IOException{
		Iterator<Integer> iterator = nodeList.iterator(); 
		while(iterator.hasNext()){
			maxNode = (int) iterator.next();
		}
		edgeNum = edgeList.size();
		nodeNum = nodeList.size();
		newScore = new double[maxNode + 1];
		oldScore = new double[maxNode + 1];
		outDegree = new int[maxNode + 1];
		m  = new double[maxNode + 1][maxNode + 1];

		//初始化出度和PageRank score
		for(int i = 0; i < maxNode + 1; i++){
			if(nodeList.contains(i)){
				oldScore[i] = 1.0 / nodeNum;
				outDegree[i] = 0;
			}
			else {
				outDegree[i] = -1;
				oldScore[i] = 0;
			}
		}
		//出度+1
		for(int i = 0; i < edgeList.size(); i++){
			outDegree[edgeList.get(i).from]++;
		}
		//出度为0或者节点不存在,则在转移矩阵中对应的元素为0
		for(int i = 0; i < maxNode; i++){
			for(int j = 0; j < maxNode; j++){
				if(outDegree[j] == -1 || outDegree[j] == 0)
					m[i][j] = 0;
			}
		}
		//遍历链接,修改转移矩阵
		for(int i = 0; i < edgeList.size(); i++){
			int from = edgeList.get(i).from;
			int to = edgeList.get(i).to;
			m[to][from] = beta * 1.0 / outDegree[from];
			nodeList.remove(to);
		}
		inZero = nodeList.size();
		
		for(int i = 0; i < maxNode + 1; i++){
			if(outDegree[i] == 0){
				deadNum++;
			}
		}
		System.out.println("总节点数:" + nodeNum);
		System.out.println("总链接数:" + edgeNum);
		System.out.println("终止节点数:"+deadNum);
		System.out.println("最大节点序号:" + maxNode);
		System.out.println("入度为0的节点数 :" + inZero);

		int count = 0;
		double flag;
		double s;
		do{
			flag = 0.0;
			s = 0.0;
			count++;
			//计算newScore
			for(int i = 0; i < maxNode + 1; i++){
				for(int j = 0; j < maxNode + 1; j++){
					if(outDegree[j] != -1)
						newScore[i] += m[i][j] * oldScore[j];
				}
			}
			//计算s
			for(int i=0; i < maxNode + 1; i++){
				s += newScore[i];
			}
			//如果beta不是1,则要处理终止节点,将(1-s)平均分给所有节点
			for(int i=0; i < maxNode+1; i++){
				if(outDegree[i] != -1){
					if(beta != 1)
						newScore[i] += (1.0 - s)/nodeNum;
					flag += Math.abs(oldScore[i]-newScore[i]);
				}
				oldScore[i] = newScore[i];
				newScore[i] = 0;
			}
		}while(flag > threshold);
		
		newScore =  oldScore;
		System.out.println("迭代次数:" + count);
		for(int i = 0; i < oldScore.length; i++){
			if (result.containsKey(oldScore[i]))
				result.get(oldScore[i]).add(i);
			else{
				TreeSet<Integer> set = new TreeSet<Integer>();//防止多个节点有相同的分数
				set.add(i);
				result.put(oldScore[i], set);
			} 
		}
		//将结果写入文件
		Iterator<Entry<Double, TreeSet<Integer>>> it2 = result.entrySet().iterator();
		int i = 0;
		FileOutputStream outSTr = null;
		outSTr = new FileOutputStream(new File("files\\result_basic.txt"));   
        BufferedOutputStream buff=null; 
        buff = new BufferedOutputStream(outSTr);
		while(it2.hasNext() && i < 100){
			Entry<Double, TreeSet<Integer>> tmpEntry = it2.next();
			double tmpScore = tmpEntry.getKey();
			TreeSet<Integer> entry= tmpEntry.getValue();
			for(Integer index : entry){
				buff.write((index+" "+BigDecimal.valueOf(tmpScore) + "\r\n").getBytes());
				buff.flush();
				i++;
//				System.out.println(++i +"---"+index+"---"+BigDecimal.valueOf(tmpScore));
			}
		}
		buff.close();

	}
	public static void main(String[] args) throws IOException {
		final double threshold = 1e-8;
		final double beta = 0.85;
		String dataPath = "files\\WikiData.txt";
		readFile(dataPath);
		pageRank(beta, threshold);
		System.out.println("占用内存 :" + 
		(Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1024/1024.0 + "MB");
	}
}

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).