Various Mastodon Bots

format.py 2.6KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. #!/usr/bin/python3
  2. from re import findall, match, finditer, sub
  3. import logging
  4. import html2text
  5. def formatStatus(rssEntry, extra):
  6. status = rssEntry.summary
  7. status = htmlToMastodonFormat(status)
  8. status = concatAndInsertLink(status, rssEntry, extra)
  9. return status
  10. #concats to make space for link and whatever else we want to append
  11. def concatAndInsertLink(status, rssEntry, extra):
  12. maxStatusLen = 500
  13. statusLen = len(status)
  14. headerLen = len(rssEntry.title)
  15. extraLen = len(extra)
  16. #mastodon only counts the first 30 chars of a link towards its status limit,
  17. #the other 2 are for newlines before the link
  18. totalLen = statusLen+headerLen+32+extraLen
  19. if totalLen <= maxStatusLen:
  20. status += '\n\n'
  21. else:
  22. shortenedStatusLen = statusLen - (totalLen - maxStatusLen) - 3
  23. status = status[: shortenedStatusLen]
  24. status += '...\n\n'
  25. status += rssEntry.link
  26. status += extra
  27. return status
  28. def htmlToMastodonFormat(status):
  29. status = html2text.html2text(status)
  30. return status
  31. def extractExternalLinks(text):
  32. linkRegEx = r'href="https?://(.*?)"'
  33. externalLinkRegEx = r'refactorcamp.org'
  34. links = findall(linkRegEx, text)
  35. externalLinks = []
  36. for link in links:
  37. if not match(externalLinkRegEx, link):
  38. externalLinks.append(link)
  39. return externalLinks
  40. #grabs all text after the first <p> and before the first <a tag
  41. def extractText(html):
  42. # logging.basicConfig(level=logging.DEBUG)
  43. extractTextRegEx = r'<p>(.*?)<a'
  44. extractTagsRegEx = r'<.+?><.+?>'
  45. textMatch = match(extractTextRegEx, html)
  46. if textMatch:
  47. text = textMatch.group(1)
  48. text = sub(extractTagsRegEx, ' ', text)
  49. else:
  50. text = ''
  51. return text
  52. def formatBlogLog(title, author, link):
  53. blogLog = (title +
  54. ' by <a href="https://refactorcamp.org/' +
  55. author +
  56. '">' +
  57. author +
  58. '</a>. <a href="' +
  59. link +
  60. '">Link</a>')
  61. return blogLog
  62. def formatLinkedTootLog(text, author):
  63. externalLinks = extractExternalLinks(text)
  64. title = extractText(text)
  65. formatString = title + '.'
  66. for link in externalLinks:
  67. formatString += '<a href="http://' + link + '">Link</a>. '
  68. formatString += ('ht <a href="https://refactorcamp.org/@' +
  69. author + '">@' + author + '</a>')
  70. return formatString
  71. def formatLocalTootLog(text, author):
  72. title = extractText(text)
  73. formatString = (title + ' -- <a href="https://refactorcamp.org/@' +
  74. author + '">@' + author + '</a>')
  75. return formatString