.gitattributes
.gitignore
.travis.yml
CHANGELOG.md
DOCKER.md
Dockerfile
LICENSE
MANIFEST.in
README.md
bashrc
requirements.txt
run_docker.sh
run_publish.sh
setup.py
start.sh
.github/ISSUE_TEMPLATE.md
.github/PULL_REQUEST_TEMPLATE.md
scraper/.DS_Store
scraper/__init__.py
scraper/article.py
scraper/cli.py
scraper/configuration.py
scraper/content_extractor.py
scraper/document_cleaner.py
scraper/image_extractor.py
scraper/mthreading.py
scraper/named_entity_recognition.py
scraper/network.py
scraper/output_formatter.py
scraper/parser.py
scraper/patterns.py
scraper/settings.py
scraper/text.py
scraper/urls.py
scraper/utils.py
scraper/version.py
scraper/video_extractor.py
scraper/resources/useragents.txt
scraper/restful/.DS_Store
scraper/restful/__init__.py
scraper/restful/constants.py
scraper/restful/endpoints.py
scraper/restful/utils.py
scraper/restful/models/__init__.py
scraper/restful/models/article_pool.py
scraper/restful/models/article_progress.py
scraper/restful/models/countries.py
scraper/restful/models/exporting_thread.py
scraper/restful/models/file_types.py
scraper/restful/models/hello_world.py
scraper/restful/models/languages.py
scraper/restful/models/search.py
scraper/restful/models/share.py
stimson_web_scraper.egg-info/PKG-INFO
stimson_web_scraper.egg-info/SOURCES.txt
stimson_web_scraper.egg-info/dependency_links.txt
stimson_web_scraper.egg-info/entry_points.txt
stimson_web_scraper.egg-info/not-zip-safe
stimson_web_scraper.egg-info/requires.txt
stimson_web_scraper.egg-info/top_level.txt
tests/__init__.py
tests/conftest.py
tests/fixtures/fulltext_domain_list.txt
tests/fixtures/html/247wallst.com1.html
tests/fixtures/html/247wallst.com2.html
tests/fixtures/html/Oxford_University_Press.html
tests/fixtures/html/about.com1.html
tests/fixtures/html/about.com2.html
tests/fixtures/html/adoption.com1.html
tests/fixtures/html/al.com1.html
tests/fixtures/html/al.com2.html
tests/fixtures/html/ap_meta_refresh.html
tests/fixtures/html/apartmenttherapy.com1.html
tests/fixtures/html/apartmenttherapy.com2.html
tests/fixtures/html/arabic_article.html
tests/fixtures/html/architecturaldigest.com1.html
tests/fixtures/html/architecturaldigest.com2.html
tests/fixtures/html/avclub.com1.html
tests/fixtures/html/avclub.com2.html
tests/fixtures/html/backstage.com1.html
tests/fixtures/html/backstage.com2.html
tests/fixtures/html/bhg.com1.html
tests/fixtures/html/bhg.com2.html
tests/fixtures/html/bloomberg.com1.html
tests/fixtures/html/books.google.com.html
tests/fixtures/html/bostonherald.com1.html
tests/fixtures/html/bostonherald.com2.html
tests/fixtures/html/businessinsider.com1.html
tests/fixtures/html/businessinsider.com2.html
tests/fixtures/html/businessweek.com1.html
tests/fixtures/html/businessweek.com2.html
tests/fixtures/html/chinese_article.html
tests/fixtures/html/cleveland.com1.html
tests/fixtures/html/cleveland.com2.html
tests/fixtures/html/cnn_article.html
tests/fixtures/html/cnn_main_site.html
tests/fixtures/html/cntraveler.com1.html
tests/fixtures/html/cntraveler.com2.html
tests/fixtures/html/coolhunting.com1.html
tests/fixtures/html/coolhunting.com2.html
tests/fixtures/html/cricket.com.au1.html
tests/fixtures/html/cricket.com.au2.html
tests/fixtures/html/dailycaller.com1.html
tests/fixtures/html/dailycaller.com2.html
tests/fixtures/html/dailystar.co.uk1.html
tests/fixtures/html/dailystar.co.uk2.html
tests/fixtures/html/dallasnews.com1.html
tests/fixtures/html/dallasnews.com2.html
tests/fixtures/html/details.com1.html
tests/fixtures/html/details.com2.html
tests/fixtures/html/elle.com1.html
tests/fixtures/html/elle.com2.html
tests/fixtures/html/flavorwire.com1.html
tests/fixtures/html/flavorwire.com2.html
tests/fixtures/html/fool.com1.html
tests/fixtures/html/fool.com2.html
tests/fixtures/html/foxbusiness.com1.html
tests/fixtures/html/foxbusiness.com2.html
tests/fixtures/html/foxnews.com1.html
tests/fixtures/html/foxnews.com2.html
tests/fixtures/html/glamour.com1.html
tests/fixtures/html/glamour.com2.html
tests/fixtures/html/globalnews.ca1.html
tests/fixtures/html/globalnews.ca2.html
tests/fixtures/html/google_meta_refresh.html
tests/fixtures/html/gq.com1.html
tests/fixtures/html/gq.com2.html
tests/fixtures/html/graziadaily.co.uk1.html
tests/fixtures/html/graziadaily.co.uk2.html
tests/fixtures/html/gulflive.com1.html
tests/fixtures/html/gulflive.com2.html
tests/fixtures/html/huffingtonpost.com1.html
tests/fixtures/html/japanese_article.html
tests/fixtures/html/japanese_article2.html
tests/fixtures/html/lifebuzz.com1.html
tests/fixtures/html/lifebuzz.com2.html
tests/fixtures/html/livescience.com1.html
tests/fixtures/html/livescience.com2.html
tests/fixtures/html/mashable.com1.html
tests/fixtures/html/mashable.com2.html
tests/fixtures/html/mlive.com1.html
tests/fixtures/html/mlive.com2.html
tests/fixtures/html/newyorker.com1.html
tests/fixtures/html/nj.com1.html
tests/fixtures/html/nola.com1.html
tests/fixtures/html/nydailynews.com1.html
tests/fixtures/html/nypost.com1.html
tests/fixtures/html/nypost.com2.html
tests/fixtures/html/ok.co.uk1.html
tests/fixtures/html/ok.co.uk2.html
tests/fixtures/html/oregonlive.com1.html
tests/fixtures/html/oregonlive.com2.html
tests/fixtures/html/parsely.com1.html
tests/fixtures/html/parsely.com2.html
tests/fixtures/html/pe.com1.html
tests/fixtures/html/pewresearch.org1.html
tests/fixtures/html/pewresearch.org2.html
tests/fixtures/html/pixable.com1.html
tests/fixtures/html/pixable.com2.html
tests/fixtures/html/pixelmonkey.org1.html
tests/fixtures/html/pixelmonkey.org2.html
tests/fixtures/html/readwrite.com1.html
tests/fixtures/html/recipe.com1.html
tests/fixtures/html/recipe.com2.html
tests/fixtures/html/reuters.com1.html
tests/fixtures/html/reuters.com2.html
tests/fixtures/html/reuters.com3.html
tests/fixtures/html/reuters.com4.html
tests/fixtures/html/self.com1.html
tests/fixtures/html/self.com2.html
tests/fixtures/html/sitepoint.com1.html
tests/fixtures/html/sitepoint.com2.html
tests/fixtures/html/slate.com1.html
tests/fixtures/html/slate.com2.html
tests/fixtures/html/space.com1.html
tests/fixtures/html/space.com2.html
tests/fixtures/html/spanish_article.html
tests/fixtures/html/syracuse.com1.html
tests/fixtures/html/syracuse.com2.html
tests/fixtures/html/talkingpointsmemo.com1.html
tests/fixtures/html/technologyreview.com1.html
tests/fixtures/html/technologyreview.com2.html
tests/fixtures/html/teenvogue.com1.html
tests/fixtures/html/teenvogue.com2.html
tests/fixtures/html/telegraph.co.uk1.html
tests/fixtures/html/telegraph.co.uk2.html
tests/fixtures/html/thai_article.html
tests/fixtures/html/theatlantic.com1.html
tests/fixtures/html/theatlantic.com2.html
tests/fixtures/html/theatlanticcities.com1.html
tests/fixtures/html/theatlanticcities.com2.html
tests/fixtures/html/thedailybeast.com1.html
tests/fixtures/html/thedailybeast.com2.html
tests/fixtures/html/thedebrief.co.uk1.html
tests/fixtures/html/thedebrief.co.uk2.html
tests/fixtures/html/theglobeandmail.com1.html
tests/fixtures/html/theglobeandmail.com2.html
tests/fixtures/html/thekitchn.com1.html
tests/fixtures/html/thekitchn.com2.html
tests/fixtures/html/thenextweb.com1.html
tests/fixtures/html/theonion.com1.html
tests/fixtures/html/theroot.com1.html
tests/fixtures/html/tnr.com1.html
tests/fixtures/html/tnr.com2.html
tests/fixtures/html/uproxx.com1.html
tests/fixtures/html/uproxx.com2.html
tests/fixtures/html/upworthy.com1.html
tests/fixtures/html/upworthy.com2.html
tests/fixtures/html/usnews.com1.html
tests/fixtures/html/usnews.com2.html
tests/fixtures/html/vanityfair.com1.html
tests/fixtures/html/vogue.de1.html
tests/fixtures/html/vogue.de2.html
tests/fixtures/html/wetpaint.com1.html
tests/fixtures/html/wetpaint.com2.html
tests/fixtures/html/wired.com1.html
tests/fixtures/html/wired.com2.html
tests/fixtures/html/wnet.org1.html
tests/fixtures/html/wnet.org2.html
tests/fixtures/html/yahoo_main_site.html
tests/fixtures/html/youbeauty.com1.html
tests/fixtures/html/youbeauty.com2.html
tests/fixtures/pdf/20170914-tpch-egm201701-enc02-th.pdf
tests/fixtures/pdf/www.econstor.eu.pdf
tests/fixtures/text/247wallst.com1.txt
tests/fixtures/text/247wallst.com2.txt
tests/fixtures/text/about.com1.txt
tests/fixtures/text/about.com2.txt
tests/fixtures/text/adoption.com1.txt
tests/fixtures/text/al.com1.txt
tests/fixtures/text/al.com2.txt
tests/fixtures/text/apartmenttherapy.com1.txt
tests/fixtures/text/apartmenttherapy.com2.txt
tests/fixtures/text/arabic.txt
tests/fixtures/text/architecturaldigest.com1.txt
tests/fixtures/text/architecturaldigest.com2.txt
tests/fixtures/text/avclub.com1.txt
tests/fixtures/text/avclub.com2.txt
tests/fixtures/text/backstage.com1.txt
tests/fixtures/text/backstage.com2.txt
tests/fixtures/text/bhg.com1.txt
tests/fixtures/text/bhg.com2.txt
tests/fixtures/text/bloomberg.com1.txt
tests/fixtures/text/bostonherald.com1.txt
tests/fixtures/text/bostonherald.com2.txt
tests/fixtures/text/businessinsider.com1.txt
tests/fixtures/text/businessinsider.com2.txt
tests/fixtures/text/businessweek.com1.txt
tests/fixtures/text/businessweek.com2.txt
tests/fixtures/text/chinese.txt
tests/fixtures/text/cleveland.com1.txt
tests/fixtures/text/cleveland.com2.txt
tests/fixtures/text/cnn.txt
tests/fixtures/text/cnn_summary.txt
tests/fixtures/text/cntraveler.com1.txt
tests/fixtures/text/cntraveler.com2.txt
tests/fixtures/text/coolhunting.com1.txt
tests/fixtures/text/cricket.com.au1.txt
tests/fixtures/text/cricket.com.au2.txt
tests/fixtures/text/dailycaller.com1.txt
tests/fixtures/text/dailycaller.com2.txt
tests/fixtures/text/dailystar.co.uk1.txt
tests/fixtures/text/dailystar.co.uk2.txt
tests/fixtures/text/dallasnews.com1.txt
tests/fixtures/text/dallasnews.com2.txt
tests/fixtures/text/details.com1.txt
tests/fixtures/text/details.com2.txt
tests/fixtures/text/elle.com1.txt
tests/fixtures/text/elle.com2.txt
tests/fixtures/text/flavorwire.com1.txt
tests/fixtures/text/flavorwire.com2.txt
tests/fixtures/text/fool.com1.txt
tests/fixtures/text/fool.com2.txt
tests/fixtures/text/foxbusiness.com1.txt
tests/fixtures/text/foxbusiness.com2.txt
tests/fixtures/text/foxnews.com1.txt
tests/fixtures/text/foxnews.com2.txt
tests/fixtures/text/foxnews.com3.txt
tests/fixtures/text/foxnews.com4.txt
tests/fixtures/text/glamour.com1.txt
tests/fixtures/text/glamour.com2.txt
tests/fixtures/text/globalnews.ca1.txt
tests/fixtures/text/globalnews.ca2.txt
tests/fixtures/text/gq.com1.txt
tests/fixtures/text/gq.com2.txt
tests/fixtures/text/graziadaily.co.uk1.txt
tests/fixtures/text/graziadaily.co.uk2.txt
tests/fixtures/text/gulflive.com1.txt
tests/fixtures/text/gulflive.com2.txt
tests/fixtures/text/huffingtonpost.com1.txt
tests/fixtures/text/japanese.txt
tests/fixtures/text/japanese2.txt
tests/fixtures/text/lifebuzz.com1.txt
tests/fixtures/text/lifebuzz.com2.txt
tests/fixtures/text/livescience.com1.txt
tests/fixtures/text/livescience.com2.txt
tests/fixtures/text/mashable.com1.txt
tests/fixtures/text/mashable.com2.txt
tests/fixtures/text/mlive.com1.txt
tests/fixtures/text/mlive.com2.txt
tests/fixtures/text/newyorker.com1.txt
tests/fixtures/text/nj.com1.txt
tests/fixtures/text/nola.com1.txt
tests/fixtures/text/nydailynews.com1.txt
tests/fixtures/text/nypost.com1.txt
tests/fixtures/text/nypost.com2.txt
tests/fixtures/text/ok.co.uk1.txt
tests/fixtures/text/ok.co.uk2.txt
tests/fixtures/text/oregonlive.com1.txt
tests/fixtures/text/oregonlive.com2.txt
tests/fixtures/text/parsely.com1.txt
tests/fixtures/text/parsely.com2.txt
tests/fixtures/text/pe.com1.txt
tests/fixtures/text/pewresearch.org1.txt
tests/fixtures/text/pewresearch.org2.txt
tests/fixtures/text/pixable.com1.txt
tests/fixtures/text/pixable.com2.txt
tests/fixtures/text/pixelmonkey.org1.txt
tests/fixtures/text/pixelmonkey.org2.txt
tests/fixtures/text/readwrite.com1.txt
tests/fixtures/text/recipe.com1.txt
tests/fixtures/text/recipe.com2.txt
tests/fixtures/text/reuters.com1.txt
tests/fixtures/text/reuters.com2.txt
tests/fixtures/text/reuters.com3.txt
tests/fixtures/text/reuters.com4.txt
tests/fixtures/text/reuters.com5.txt
tests/fixtures/text/reuters.com6.txt
tests/fixtures/text/self.com1.txt
tests/fixtures/text/self.com2.txt
tests/fixtures/text/sitepoint.com1.txt
tests/fixtures/text/sitepoint.com2.txt
tests/fixtures/text/slate.com1.txt
tests/fixtures/text/slate.com2.txt
tests/fixtures/text/space.com1.txt
tests/fixtures/text/space.com2.txt
tests/fixtures/text/spanish.txt
tests/fixtures/text/syracuse.com1.txt
tests/fixtures/text/syracuse.com2.txt
tests/fixtures/text/talkingpointsmemo.com1.txt
tests/fixtures/text/technologyreview.com1.txt
tests/fixtures/text/technologyreview.com2.txt
tests/fixtures/text/teenvogue.com1.txt
tests/fixtures/text/teenvogue.com2.txt
tests/fixtures/text/telegraph.co.uk1.txt
tests/fixtures/text/telegraph.co.uk2.txt
tests/fixtures/text/thai.txt
tests/fixtures/text/theatlantic.com1.txt
tests/fixtures/text/theatlantic.com2.txt
tests/fixtures/text/theatlanticcities.com1.txt
tests/fixtures/text/theatlanticcities.com2.txt
tests/fixtures/text/thedailybeast.com1.txt
tests/fixtures/text/thedailybeast.com2.txt
tests/fixtures/text/thedebrief.co.uk1.txt
tests/fixtures/text/thedebrief.co.uk2.txt
tests/fixtures/text/theglobeandmail.com1.txt
tests/fixtures/text/theglobeandmail.com2.txt
tests/fixtures/text/thekitchn.com1.txt
tests/fixtures/text/thekitchn.com2.txt
tests/fixtures/text/thenextweb.com1.txt
tests/fixtures/text/theonion.com1.txt
tests/fixtures/text/theroot.com1.txt
tests/fixtures/text/tnr.com1.txt
tests/fixtures/text/tnr.com2.txt
tests/fixtures/text/uproxx.com1.txt
tests/fixtures/text/uproxx.com2.txt
tests/fixtures/text/upworthy.com1.txt
tests/fixtures/text/upworthy.com2.txt
tests/fixtures/text/usnews.com1.txt
tests/fixtures/text/usnews.com2.txt
tests/fixtures/text/vanityfair.com1.txt
tests/fixtures/text/vogue.de1.txt
tests/fixtures/text/vogue.de2.txt
tests/fixtures/text/wetpaint.com1.txt
tests/fixtures/text/wetpaint.com2.txt
tests/fixtures/text/wired.com1.txt
tests/fixtures/text/wired.com2.txt
tests/fixtures/text/wnet.org1.txt
tests/fixtures/text/wnet.org2.txt
tests/fixtures/text/youbeauty.com1.txt
tests/fixtures/text/youbeauty.com2.txt
tests/fixtures/url/20170914-tpch-egm201701-enc02-th.txt
tests/fixtures/url/Oxford_University_Press.txt
tests/fixtures/url/books_google_com_list.txt
tests/fixtures/url/energy-investment-mekong-delta-Thailand-urls.csv
tests/fixtures/url/fulltext_list.txt
tests/fixtures/url/illegal-unreported-and-unregulated-fishing-urls.csv
tests/fixtures/url/industrial-spaces-urls.csv
tests/fixtures/url/test_list.txt
tests/fixtures/url/test_prepare.txt
tests/fixtures/url/test_pubdate.txt
tests/fixtures/url/www_econstor_eu.txt
tests/restful/__init__.py
tests/restful/countries_test.py
tests/restful/file_types_test.py
tests/restful/hello_test.py
tests/restful/languages_test.py
tests/restful/search_test.py
tests/system/__init__.py
tests/system/test_stimson_urls.py
tests/units/__init__.py
tests/units/test_article.py
tests/units/test_configuration.py
tests/units/test_content_extractor.py
tests/units/test_exhaustive.py
tests/units/test_false_positives.py
tests/units/test_multilanguage.py
tests/units/test_named_entity_recognition.py
tests/units/test_parser.py
tests/units/test_patterns.py
tests/units/test_pdf.py
tests/units/test_requires_beautifulsoap.py
tests/units/test_url.py
tests/units/test_utils.py