import java.util.ArrayList;
/**
* Posting list deals with the operations of a posting list and a
* term list.
*
* @author cs379/csci335 students of 2004/2006
* @revised Xiannong Meng
* @date 2013-04-26
*
*
Revision notes: simplified the posting list to be just posting node
* each of which contains a doc name and a term frequency.
*/
public class PostingList {
/*
* Data members
*/
public static boolean DEBUG = false;
// public static boolean DEBUG = true;
private ArrayList termList;
/**
* Default constructor.
*/
public PostingList() {
this.termList = new ArrayList();
}
/**
* Adding a new term to the term list.
*
* Precondition: The term does not exist in the list.
* Postcondition: The term now is on the list.
*
* @param term The new term to add to the list
* @param docName The name of the document that contains this
* term.
*/
public void addTerm(String term, String docName) {
PostingHeader node = new PostingHeader(term, docName, 1);
this.termList.add(node);
}
/**
* Update the posting list node when a term is already
* on the term list.
* Two scenarios:
*
* - The docName is not on the posting list, add the docName
* to the posting list.
*
- The docName is already on the posting list, update the
* term frequency.
*
*
* Precondition: The term is on the term list, thus a valid "termIndex"
* has been found.
* Postcondition: The term and document information updated
* accordingly.
*
* @param termIndex The index in the term array list
* @param docName The name of the document
*/
public void updateTerm(int termIndex, String docName) {
int index = this.getDocIndex(termIndex, docName);
if (index == -1) { // the doc node is not on the posting list
PostingNode pNode = new PostingNode(docName, 1);
this.termList.get(termIndex).postList.add(pNode);
this.termList.get(termIndex).incDocFreq();
} else { // the doc node exists, update frequency
this.termList.get(termIndex).postList.get(index).incTF();
}
}
/**
* Find the index of the document named 'dName' on the posting list.
*
* Precondition: the document name is given as 'dName'
* Postcondition: the index of the doucment is given. If not found, a
* -1 is returned.
*
* @param termIndex The index of the term on the term list.
* @param dName The name of the doucment
* @return The index of the document, if found; -1 if not found.
*/
public int getDocIndex(int termIndex, String dName) {
for (int i = 0;
i < this.termList.get(termIndex).postList.size();
i ++) {
PostingNode pNode = this.termList.get(termIndex).postList.get(i);
if (dName.compareToIgnoreCase(pNode.getDocName()) == 0)
return i; // found
}
return -1; // not found
}
/**
* Find the index of the term in the term list. Returns -1
* if the term has not been in the list yet.
*
* @param term The term to be searched
* @return The location of the term in the term list, -1 if not existing.
*/
public int getTermIndex(String term) {
for (int i = 0; i < this.termList.size(); i ++) {
String head = this.termList.get(i).term;
if (term.compareToIgnoreCase(head) == 0)
return i; // found
}
return -1; // not found
}
/**
* Print the entire term list, along with each postig list.
*/
public String toString() {
String retString = "";
// for each term
for (int t = 0; t < this.termList.size(); t ++) {
PostingHeader ph = this.termList.get(t);
retString += "Term : " + ph.term + "\n";
retString += " Doc Frequency : " + ph.docFrequency + "\n";
retString += " Doc List : ";
ArrayList pl = ph.postList;
// for each node on the posting list of the term
for (int d = 0; d < pl.size(); d ++) {
retString += " | " + pl.get(d).docName + ", " +
pl.get(d).tf + " | ";
}
retString += "\n";
}
return retString;
}
public static void main(String args[]) {
// some simple test cases
PostingList plist = new PostingList();
plist.addTerm("hello", "d1.txt");
plist.addTerm("world", "d2.txt");
plist.addTerm("how", "d1.txt");
plist.addTerm("are", "d1.txt");
plist.addTerm("you", "d2.txt");
int i = plist.getTermIndex("hello");
if (i != -1) {
System.out.println("found 'hello' at " + i + " updating ...");
plist.updateTerm(i, "d1.txt");
plist.updateTerm(i, "d2.txt");
plist.updateTerm(i, "d3.txt");
}
i = plist.getTermIndex("you");
if (i != -1) {
System.out.println("found 'you' at " + i + " updating ...");
plist.updateTerm(i, "d2.txt");
plist.updateTerm(i, "d2.txt");
plist.updateTerm(i, "d2.txt");
plist.updateTerm(i, "d1.txt");
plist.updateTerm(i, "d4.txt");
plist.updateTerm(i, "d5.txt");
}
System.out.println(plist);
}
}