chardet
filetype
python-magic
lxml
nltk
tabulate
requests
beautifulsoup4
emoji
dataclasses-json
python-iso639
langdetect
numpy
rapidfuzz
backoff
typing-extensions
wrapt

[airtable]
pyairtable

[all-docs]
xlrd
unstructured-inference==0.7.15
pypandoc
python-pptx<=0.6.23
onnx
pypdf
pdf2image
pdfminer.six
unstructured.pytesseract>=0.3.12
markdown
msg_parser
pandas
python-docx>=1.1.0
networkx
openpyxl
pikepdf

[azure]
adlfs
fsspec==2023.9.1

[azure-cognitive-search]
azure-search-documents

[bedrock]
boto3
langchain

[biomed]
bs4

[box]
boxfs
fsspec==2023.9.1

[confluence]
atlassian-python-api

[csv]
pandas

[delta-table]
deltalake
fsspec==2023.9.1

[discord]
discord-py

[doc]
python-docx>=1.1.0

[docx]
python-docx>=1.1.0

[dropbox]
dropboxdrivefs
fsspec==2023.9.1

[elasticsearch]
elasticsearch

[embed-huggingface]
huggingface
langchain
sentence_transformers

[epub]
pypandoc

[gcs]
gcsfs
fsspec==2023.9.1
bs4

[github]
pygithub>1.58.0

[gitlab]
python-gitlab

[google-drive]
google-api-python-client

[hubspot]
hubspot-api-client
urllib3>=1.26.17

[huggingface]
langdetect
sacremoses
sentencepiece
torch
transformers

[image]
onnx
pdf2image
pdfminer.six
pikepdf
pypdf
unstructured-inference==0.7.15
unstructured.pytesseract>=0.3.12

[jira]
atlassian-python-api

[local-inference]
xlrd
unstructured-inference==0.7.15
pypandoc
python-pptx<=0.6.23
onnx
pypdf
pdf2image
pdfminer.six
unstructured.pytesseract>=0.3.12
markdown
msg_parser
pandas
python-docx>=1.1.0
networkx
openpyxl
pikepdf

[md]
markdown

[mongodb]
pymongo

[msg]
msg_parser

[notion]
notion-client
htmlBuilder

[odt]
python-docx>=1.1.0
pypandoc

[onedrive]
msal
Office365-REST-Python-Client<2.4.3
bs4

[openai]
langchain
tiktoken
openai

[org]
pypandoc

[outlook]
msal
Office365-REST-Python-Client<2.4.3

[paddleocr]
unstructured.paddleocr==2.6.1.3

[pdf]
onnx
pdf2image
pdfminer.six
pikepdf
pypdf
unstructured-inference==0.7.15
unstructured.pytesseract>=0.3.12

[ppt]
python-pptx<=0.6.23

[pptx]
python-pptx<=0.6.23

[reddit]
praw

[rst]
pypandoc

[rtf]
pypandoc

[s3]
s3fs
fsspec==2023.9.1

[salesforce]
simple-salesforce

[sharepoint]
msal
Office365-REST-Python-Client<2.4.3

[slack]
slack_sdk

[tsv]
pandas

[wikipedia]
wikipedia

[xlsx]
openpyxl
pandas
xlrd
networkx
