import _root_.scala.collection.mutable.Map

println("running")                                            // so user knows that it is working, and just loading
size(1000, 200)

val lightestColor = 175                                            // maximum lightness for words
var words: Map[String, Int] = Map[String, Int]()              // all the words on the screen, mapped to the number of occurrences
var tweets: Array[Tweet] = Array[Tweet]()                     // all the tweets loaded at the beginning
var visibleTweets: Array[Tweet] = Array[Tweet]()              // all the tweets on the screen                
var counter: Int = 0                                          // how many tweets have been displayed              
  
val fontHeight = 10                                           // font display variables
val textHeight = fontHeight + 6
var tFont: PFont = createFont( "Courier", fontHeight, true);
colorMode(PConstants.RGB,255,255,255,100)
smooth()
val initX = 5
val numRecent = (height / textHeight)                         // hoe many tweets fit on the screen at once

frameRate(0.5f)                                                // how long between tweets - currently, one tweet every two seconds
      
println("loading")
getTweetsThreaded                                              // downloads all the tweets
println("there are " + tweets.length + " tweets")
tweets = Array(new Tweet("", " All " + tweets.length + " available tweets have been used - there are no more tweets in array")) ++ tweets
                                                               // and one more to tell the user you got to the end, and not that something bad happened

// Loops continuously, at a speed determined by frameRate above
def draw() {  
  if (counter < tweets.length) {                              // don't try to draw after you get to the end
    background(255)
    updateVisibleTweets                                       // update which tweets are visible in the data structures
    drawVisibleTweets                                         // and then draw them
    counter += 1
  }
}

// downloads the tweets from search.twitter.com using a php script
def getTweets() {
  for(page <- 1 to 20) {                                                                                          // Twitter limits results to 1500 per search, at 15 per page
    val url = "http://lehrblogger.com/nyu/classes/fall08/icm/hw5/twitterProxy.php?pagenum=" + page.toString     // the URL of the proxy php script, with the current page number
    //val url = "http://search.twitter.com/search.atom?max_id=950939157&page=" + page.toString + "&q=%23debate08"  // without the php script
    val xmlstr = _root_.scala.io.Source.fromURL(url).getLines.mkString("")                                         // get the xml from the page as a string
    val xml = _root_.scala.xml.XML.loadString(xmlstr)                                                              // and turn it into an XML object
    
    for (entry <- xml \\ "entry") {                                                                                // get each entry tag in the XML object (isn't Scala great?) i.e. each individual tweet
          tweets = Array(new Tweet((entry \\ "name").text, (entry \\ "title").text)) ++ tweets                     // make the tweet with the author and title node text, and add it to the array
    }
  }
  
}  

// Jorge Ortiz, a friend and Scala programmer extraordinaire, wrote a threaded version of the getTweets function to make the loading process faster
// I don't have time now to go through it with him and figure everything out, so no comments for now
def getTweetsThreaded() {
  val numPages = 20
  val url = "http://lehrblogger.com/nyu/classes/fall08/icm/hw5/twitterProxy.php?pagenum="
  def fetch(page: Int) = _root_.scala.actors.Futures.future {
    val xmlstr = _root_.scala.io.Source.fromURL(url + page).getLines.mkString("")
    _root_.scala.xml.XML.loadString(xmlstr)
  }
  val xmls =
    (1 to numPages).toList.map(fetch).map(_.apply)

  for(xml <- xmls) {
    for (entry <- xml \\ "entry") {
          tweets = Array(new Tweet((entry \\ "name").text, (entry \\ "title").text)) ++ tweets
    }
  }
}

// Updates the data structures for the visible tweets
def updateVisibleTweets() {
  var newTweet: Tweet = tweets.apply(tweets.length - 1 - counter)                          // get the next tweet in the master list
  
  visibleTweets =  Array(newTweet) ++ visibleTweets                                        // add it to the list of visible tweets
      
  if (visibleTweets.length > numRecent) {                                                  // if the screen is full and a tweet is no longer visible
    visibleTweets.apply(numRecent).tokens.foreach(token => updateOccurrence(token, false)) // remove its occurrences from the map
    visibleTweets = visibleTweets.slice(0, visibleTweets.length - 1)                       // and remove it from the array of visible tweets
  }
  
  newTweet.tokens.foreach(token => updateOccurrence(token, true))                          // update the token occurrences map for each token in this new tweet
}

// Updates the map for one token, behaving differently for adding and subtracting an occurrence
def updateOccurrence(tokenArg: String, adding: boolean) {
  var token = stripToken(tokenArg).toLowerCase              // we only want lowercase tokens without leading or trailing punctuation in the map
  
  val count: Int = words.getOrElseUpdate(token, 0)          // what's the current count for this word (adding it to the map if it has no count)
  
  if (adding) {                                             // if we are adding an occurrence
    words.put(token, count + 1)                             // increase that count
  } else {                                                  // if we are removing an occurrence
    if (count == 1) words.removeKey(token)                  // if this was the only occurrence of this token, remove the entire thing from the map (to keep the map size manageable)
    else            words.put(token, count - 1)             // otherwise, decrease the count
  }
}


// Draws the visible tweets by iterating over the array
def drawVisibleTweets() {
   visibleTweets.zipWithIndex.foreach { pair =>             // zipWithIndex is needed because we need to know the index of the iterator
     val (t, index) = pair 
     drawTweet(t, index * textHeight)                       // for determining how far down on the screen this tweet goes
  }
}    

// Draws one tweet on the screen
def drawTweet(t: Tweet, y: Float) {
  var curX: Float = initX                                   // always start from the same place on the left
  textFont(tFont)                                           // and use the same font
    
  fill(0)                                                   // draw the author's name in black first
  text(t.formatAuthor, curX, textHeight + y)
  curX += textWidth(t.formatAuthor)                         // and keep track of where to draw the next word
  
  t.tokens.foreach(token => {                               // then for each token
    fill(getTokenColor(token))                              // figure out what color it should be
    text(" " + token, curX, textHeight + y)                 // and draw it, preceded by a space, in the right place
    curX += textWidth(" " + token)                          // and update where to draw the next word
  })
}

// Determines what color a token should be, based on the number of times it occurs on the screen
def getTokenColor(tokenArg: String): Float = {
  var token = stripToken(tokenArg).toLowerCase              // we only have lowercase tokens without leading or trailing punctuation in the 
  var maxOccurrences = 1                                    // all words in the map occur at least once, and this prevents dividing by zero later
    
  words.keySet.foreach(k => {                               // to figure out what words should be darkest, go through all the words in the map
    if ((k.length > 0)   &&                                 // make sure they are nonempty (otherwise it will crash)
        (k.first != '#') &&                                 // ignore #hashtags - they aren't as interesting, and this prevents the search term 
                                                            // (initially '#debate08') from being darkest because it is in every tweet returned by the search
        (!isCommon(k))   &&                                 // don't count common words
        (words.getOrElse(k, 1) > maxOccurrences)            
       ) {
      maxOccurrences = words.getOrElse(k, 1)                // keep track of the maximum number of occurrences (but remember 1, not 0, is the minumum
    }
  })
  
  var diff: Float = lightestColor / maxOccurrences          // the difference in color between each gradation of occurrences - allows for varying darknesses
  
  if ((token.length <=  0) ||                               // short circuit so as not to call .first on an empty string 
      (token.first == '#') ||                               // for #hashtags
      isCommon(token)                                       // and common words
     )
    lightestColor                                           // always use the lightest color
  else                                                      // otherwise, make the most common words darkest, and have the difference between darknesses be 'diff'
    (maxOccurrences - words.getOrElse(token, 0)).toFloat * diff
}

// Recursive function to remove leading and trailing punctuation
def stripToken(tokenArg: String): String = {
  var token = tokenArg
  
  if (token.length > 1) {                                  
    if (!token.last.isLetterOrDigit) {                    
      token = stripToken(token.slice(0, token.length - 1))  // if the last digit is not a letter or a digit, slice it off and make a recursive call
    } else if (!token.first.isLetterOrDigit && (token.first != '#') && (token.first != '@')) {  // note this else is to make sure that all punctuation tokens don't get all the way to "" before stopping
      token = stripToken(token.slice(1, token.length))      // if the last digit is not a letter or a digit or a # or a @, slice it off and make a recursive call
    }
  }
  
  token
}

// The commonness of many words distorts the relevance of the visualization, so ignore them here
def isCommon(word: String): boolean = {
  var commonWords: Array[String] = Array[String](
    "the",
    "of",
    "and",    
    "a",
    "to",
    "in",
    "is",
    "you",
    "that",
    "it",    
    "he",
    "was",
    "for",
    "on",
    "are",
    "as",
    "with",
    "his",
    "they",
    "i",
    "at",	
    "be",
    "this",
    "have",
    "from",
    "or",
    "one",    
    "had",
    "by",
    "word",
    "some",
    "who",
    "my",
    "has",
    "how",
    "-",
    "we",
    "but")
    //http://www.duboislc.org/EducationWatch/First100Words.html
    
  commonWords.indexOf(word) != -1
}


/* A supersimple class for keeping track of author and text information for a single Tweet*/
class Tweet(	
    var author: String,
    val text: String
  ) {
 
  private var authors: Array[String] = author.split(" ")  // we only want the first name of the author, and not the one in ()
  if (authors.length > 0)  author = authors.first         // so split the name and grab the first one  (there are other ways to do this)
  
  var tokens: Array[String] = text.split(" ")		  // an array of tokens in the tweet, split by a single space
    
  def formatAuthor: String = {                            // adds whitespace to the author's name so that all authornames are the same length
    var retStr: String = ""                               // and so that the tweets all start in the same horizontal location
    for(i <- 0 to (15 - author.length)) {
      retStr += " " 
    }
      
    retStr + author + ":"
  }
}