|
| 1 | +""" |
| 2 | +Analyze twitter data for profanity of presidential candidates. |
| 3 | +
|
| 4 | +Dependent on: stop word, profanity detection algorithms and twitter_pull.py |
| 5 | +""" |
| 6 | + |
| 7 | +import os |
| 8 | +import re |
| 9 | +import csv |
| 10 | +import sys |
| 11 | +import Algorithmia as alg |
| 12 | + |
| 13 | + |
| 14 | +# Add in your Algorithmia API key |
| 15 | +client = alg.client('your_algorithmia_api_key') |
| 16 | + |
| 17 | + |
| 18 | +def read_data(): |
| 19 | + """Create the list of Tweets from your query.""" |
| 20 | + try: |
| 21 | + filename = os.path.join(sys.argv[1].replace(' ', '-')) |
| 22 | + with open('data/{0}.csv'.format(filename)) as data_file: |
| 23 | + data_object = csv.DictReader(data_file, delimiter=',') |
| 24 | + text_data = [tweets['text'] for tweets in data_object] |
| 25 | + return text_data |
| 26 | + except IndexError as ie: |
| 27 | + print( |
| 28 | + "Input error - did you remember to pass in your system argument?", |
| 29 | + ie) |
| 30 | + sys.exit(1) |
| 31 | + except FileNotFoundError as fe: |
| 32 | + print("File not found - check your directory and filename", fe) |
| 33 | + sys.exit(1) |
| 34 | + except: |
| 35 | + raise |
| 36 | + |
| 37 | + |
| 38 | +def process_text(): |
| 39 | + """Remove emoticons, numbers etc. and returns list of cleaned tweets.""" |
| 40 | + stripped_text = [ |
| 41 | + re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?" + |
| 42 | + sys.argv[1].lower(), '', |
| 43 | + tweets.lower()).strip() for tweets in read_data() |
| 44 | + ] |
| 45 | + return stripped_text |
| 46 | + |
| 47 | + |
| 48 | +def remove_stop_words(): |
| 49 | + """Remove stop words in tweets.""" |
| 50 | + try: |
| 51 | + algo = client.algo('nlp/RetrieveStopWords/0.1.1') |
| 52 | + # Input is an empty list |
| 53 | + stop_word_list = algo.pipe([]) |
| 54 | + # If our word is not in the stop list than we add it to our word list |
| 55 | + clean_text = ' '.join([word for sentence in process_text() |
| 56 | + for word in sentence.split(' ') |
| 57 | + if word not in stop_word_list.result]) |
| 58 | + return clean_text |
| 59 | + except Exception as e: |
| 60 | + print(e) |
| 61 | + sys.exit(1) |
| 62 | + |
| 63 | + |
| 64 | +def profanity(): |
| 65 | + """Return a dictionary of swear words and their frequency.""" |
| 66 | + try: |
| 67 | + algo = client.algo('nlp/ProfanityDetection/0.1.2') |
| 68 | + # Pass in the clean list of tweets combined into a single corpus |
| 69 | + result = algo.pipe([remove_stop_words()]).result |
| 70 | + # Total profanity in corpus |
| 71 | + total = sum(result.values()) |
| 72 | + print('Resulting swear words and counts: ', result) |
| 73 | + print('total swear words: ', total) |
| 74 | + return {'profanity_counts': result, 'profanity_sum': total} |
| 75 | + except Exception as e: |
| 76 | + print(e) |
| 77 | + sys.exit(1) |
| 78 | + |
| 79 | +if __name__ == '__main__': |
| 80 | + profanity() |
0 commit comments