// This file demonstrates reading Moving Ratings Data // from GroupLens Research Project at the University of Minnesota // SOURCE: http://www.grouplens.org/ /* From their README file: u.data -- The full u data set, 100000 ratings by 943 users on 1682 items. Each user has rated at least 20 movies. Users and items are numbered consecutively from 1. The data is randomly ordered. This is a tab separated list of user id | item id | rating | timestamp. The time stamps are unix seconds since 1/1/1970 UTC */ // GOAL: read in data, find average rating for each film // and number of ratings for each user, store results // in two different files // // -- M. Branicky, 10/11/06 import java.util.Scanner; // needed for input processing using Scanner import java.io.PrintWriter; // needed for output processing using PrintWriter public class GLRatings { public static void main(String args[]) { ///////////////////// INPUT AND PROCESSING /////////////////////// // Open the file // NOTE: this is the full path on my computer, you must download the // file yourself (on Code Repository) and change this name appropriately Scanner input = TextFileIO.openRead("/Users/msb/Downloads/ml-data/u.data"); // Get input from file /* FORMAT IS user id | item id | rating | timestamp. There are 943 users and 1682 movies */ int userID, itemID, rating, timestamp; int userN [] = new int [944]; int itemSum [] = new int [1683]; int itemN [] = new int [1683]; for (int i=1; i<=943; i++) { userN[i]=0; } for (int i=2; i<=1682; i++) { itemN[i]=0; itemSum[i]=0; } while (input.hasNext()) { // read a line of the data userID = input.nextInt(); itemID = input.nextInt(); rating = input.nextInt(); timestamp = input.nextInt(); // update the variable of interest userN[userID]++; itemN[itemID]++; itemSum[itemID]+=rating; } // Close the input file input.close(); ///////////////////// OUTPUT /////////////////////// // Open the files PrintWriter output1 = TextFileIO.openWrite("userN.txt"); PrintWriter output2 = TextFileIO.openWrite("itemAvg.txt"); // Write the userN output for (int i=1; i<=943; i++) { output1.println(i+"\t"+userN[i]); } double avg; for (int i=1; i<=1682; i++) { avg = ((double) itemSum[i])/((double) itemN[i]); output2.println(i+"\t"+avg); } // Close the files output1.close(); output2.close(); } }