Various Mastodon Bots

parser.py 1.7KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. #!/usr/bin/python3
  2. import feedparser
  3. import os
  4. from util import getAbsolutePath
  5. #feedparser docs: https://pythonhosted.org/feedparser/
  6. LATEST_TITLE_DIR= getAbsolutePath(__file__, 'rssTitles/')
  7. class Parser:
  8. def __init__(self, feedURLList):
  9. self.feedURLList = feedURLList
  10. def getNewPosts(self):
  11. newEntries = []
  12. for feedURL in self.feedURLList:
  13. feed = feedparser.parse(feedURL)
  14. latestTitleFile = generateFileName(feedURL)
  15. if len(feed.entries) > 0:
  16. newestTitle = feed.entries[0].title
  17. else:
  18. continue
  19. try:
  20. latestTitle = findLatestTitle(latestTitleFile)
  21. except:
  22. latestTitle = newestTitle
  23. for entry in feed.entries:
  24. if latestTitle == entry.title:
  25. break
  26. newEntries.append(entry)
  27. writeLatestTitle(latestTitleFile, newestTitle)
  28. return newEntries
  29. #check file for latest title accessed
  30. def findLatestTitle(fileName):
  31. with open(fileName) as file:
  32. title = file.read()
  33. return title
  34. #write latest title to file
  35. def writeLatestTitle(fileName, title):
  36. with open(fileName, "w") as file:
  37. file.write(title)
  38. def generateFileName(url):
  39. base = ''.join(e for e in url if e.isalnum())
  40. #base = feed.feed.title
  41. #base = base.replace(" ","_")
  42. fileName = LATEST_TITLE_DIR + base + '.txt'
  43. return fileName
  44. if __name__ == '__main__':
  45. feedURLList = [ "https://www.ribbonfarm.com/feed/" ]
  46. parser = Parser(feedURLList)
  47. entries = parser.getNewPosts()
  48. for entry in entries:
  49. print(entry.title)