import java.util.ArrayList; /** * Posting list deals with the operations of a posting list and a * term list.

* * @author cs379/csci335 students of 2004/2006 * @revised Xiannong Meng * @date 2013-04-26 * *

Revision notes: simplified the posting list to be just posting node * each of which contains a doc name and a term frequency.

*/ public class PostingList { /* * Data members */ public static boolean DEBUG = false; // public static boolean DEBUG = true; private ArrayList termList; /** * Default constructor.

*/ public PostingList() { this.termList = new ArrayList(); } /** *

Adding a new term to the term list.

* *

Precondition: The term does not exist in the list.

*

Postcondition: The term now is on the list.

* * @param term The new term to add to the list * @param docName The name of the document that contains this * term. */ public void addTerm(String term, String docName) { PostingHeader node = new PostingHeader(term, docName, 1); this.termList.add(node); } /** *

Update the posting list node when a term is already * on the term list.

*

Two scenarios:

* * *

Precondition: The term is on the term list, thus a valid "termIndex" * has been found.

*

Postcondition: The term and document information updated * accordingly.

* * @param termIndex The index in the term array list * @param docName The name of the document */ public void updateTerm(int termIndex, String docName) { int index = this.getDocIndex(termIndex, docName); if (index == -1) { // the doc node is not on the posting list PostingNode pNode = new PostingNode(docName, 1); this.termList.get(termIndex).postList.add(pNode); this.termList.get(termIndex).incDocFreq(); } else { // the doc node exists, update frequency this.termList.get(termIndex).postList.get(index).incTF(); } } /** *

Find the index of the document named 'dName' on the posting list.

* *

Precondition: the document name is given as 'dName'

*

Postcondition: the index of the doucment is given. If not found, a * -1 is returned.

* * @param termIndex The index of the term on the term list. * @param dName The name of the doucment * @return The index of the document, if found; -1 if not found. */ public int getDocIndex(int termIndex, String dName) { for (int i = 0; i < this.termList.get(termIndex).postList.size(); i ++) { PostingNode pNode = this.termList.get(termIndex).postList.get(i); if (dName.compareToIgnoreCase(pNode.getDocName()) == 0) return i; // found } return -1; // not found } /** *

Find the index of the term in the term list. Returns -1 * if the term has not been in the list yet.

* * @param term The term to be searched * @return The location of the term in the term list, -1 if not existing. */ public int getTermIndex(String term) { for (int i = 0; i < this.termList.size(); i ++) { String head = this.termList.get(i).term; if (term.compareToIgnoreCase(head) == 0) return i; // found } return -1; // not found } /** *

Print the entire term list, along with each postig list.

*/ public String toString() { String retString = ""; // for each term for (int t = 0; t < this.termList.size(); t ++) { PostingHeader ph = this.termList.get(t); retString += "Term : " + ph.term + "\n"; retString += " Doc Frequency : " + ph.docFrequency + "\n"; retString += " Doc List : "; ArrayList pl = ph.postList; // for each node on the posting list of the term for (int d = 0; d < pl.size(); d ++) { retString += " | " + pl.get(d).docName + ", " + pl.get(d).tf + " | "; } retString += "\n"; } return retString; } public static void main(String args[]) { // some simple test cases PostingList plist = new PostingList(); plist.addTerm("hello", "d1.txt"); plist.addTerm("world", "d2.txt"); plist.addTerm("how", "d1.txt"); plist.addTerm("are", "d1.txt"); plist.addTerm("you", "d2.txt"); int i = plist.getTermIndex("hello"); if (i != -1) { System.out.println("found 'hello' at " + i + " updating ..."); plist.updateTerm(i, "d1.txt"); plist.updateTerm(i, "d2.txt"); plist.updateTerm(i, "d3.txt"); } i = plist.getTermIndex("you"); if (i != -1) { System.out.println("found 'you' at " + i + " updating ..."); plist.updateTerm(i, "d2.txt"); plist.updateTerm(i, "d2.txt"); plist.updateTerm(i, "d2.txt"); plist.updateTerm(i, "d1.txt"); plist.updateTerm(i, "d4.txt"); plist.updateTerm(i, "d5.txt"); } System.out.println(plist); } }