Skip to content
6 changes: 5 additions & 1 deletion atlassian/confluence/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@

def __init__(self, url, *args, **kwargs):
# Detect which implementation to use
if ("atlassian.net" in url or "jira.com" in url) and ("/wiki" not in url):
# Priority: explicit cloud= kwarg > URL-based heuristic
is_cloud = kwargs.get("cloud")
if is_cloud is None:
is_cloud = "atlassian.net" in url or "jira.com" in url or "api.atlassian.com" in url
Comment thread Fixed
Comment thread Fixed
Comment thread Fixed
if is_cloud:
impl = ConfluenceCloud(url, *args, **kwargs)
else:
impl = ConfluenceServer(url, *args, **kwargs)
Expand Down
52 changes: 32 additions & 20 deletions atlassian/confluence/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,27 +134,39 @@ def _get_paged(

yield from response.get("results", [])

if self.cloud:
url = response.get("_links", {}).get("next", {}).get("href")
if url is None:
break
# From now on we have absolute URLs with parameters
absolute = True
# Params are now provided by the url
params = {}
# Trailing should not be added as it is already part of the url
trailing = False
next_link = response.get("_links", {}).get("next")
if next_link is None:
break
if isinstance(next_link, str):
url = next_link
else:
if response.get("_links", {}).get("next") is None:
break
# For server, we need to extract the next page URL from the _links.next.href
next_url = response.get("_links", {}).get("next", {}).get("href")
if next_url is None:
break
url = next_url
absolute = True
params = {}
trailing = False
url = next_link.get("href")
if url is None:
break

if url.startswith("/"):
# Prepend base URL from self.url, stripping the API root suffix to preserve path prefix
# Example: self.url = "https://api.atlassian.com/ex/confluence/abc/wiki/rest/api"
# api_root = "wiki/rest/api"
# base = "https://api.atlassian.com/ex/confluence/abc"
# relative = "/rest/api/content?cursor=1"
# result = "https://api.atlassian.com/ex/confluence/abc/rest/api/content?cursor=1"
api_root_suffix = f"/{self.api_root}"
if self.url.endswith(api_root_suffix):
base = self.url[:-len(api_root_suffix)]
else:
# Fallback: extract scheme+netloc if api_root suffix not found
from urllib.parse import urlparse
parsed = urlparse(self.url)
base = f"{parsed.scheme}://{parsed.netloc}"
url = base + url

# From now on we have absolute URLs with parameters
absolute = True
# Params are now provided by the url
params = {}
# Trailing should not be added as it is already part of the url
trailing = False

return

Expand Down
14 changes: 11 additions & 3 deletions atlassian/confluence/cloud/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ def __init__(self, url="https://api.atlassian.com/", *args, **kwargs):
if "cloud" not in kwargs:
kwargs["cloud"] = True
if "api_version" not in kwargs:
kwargs["api_version"] = "2"
kwargs["api_version"] = "latest"
if "api_root" not in kwargs:
kwargs["api_root"] = "wiki/api/v2"
url = url.strip("/")
kwargs["api_root"] = "wiki/rest/api"
url = url.strip("/") + f"/{kwargs['api_root']}"
super(Cloud, self).__init__(url, *args, **kwargs)

# Content Management
Expand All @@ -28,6 +28,14 @@ def get_content_by_type(self, content_type, **kwargs):
"""Get content by type (page, blogpost, etc.)."""
return self.get("content", params={"type": content_type, **kwargs})

def get_all_pages_from_space(self, space_key, **kwargs):
"""Get all pages from space."""
return self._get_paged("content", params={"spaceKey": space_key, "type": "page", **kwargs})

def get_all_blog_posts_from_space(self, space_key, **kwargs):
"""Get all blog posts from space."""
return self._get_paged("content", params={"spaceKey": space_key, "type": "blogpost", **kwargs})

def create_content(self, data, **kwargs):
"""Create new content."""
return self.post("content", data=data, **kwargs)
Expand Down
49 changes: 0 additions & 49 deletions atlassian/confluence/cloud/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,53 +24,4 @@ def __init__(self, url, *args, **kwargs):
"""
super(ConfluenceCloudBase, self).__init__(url, *args, **kwargs)

def _get_paged(
self,
url,
params=None,
data=None,
flags=None,
trailing=None,
absolute=False,
):
"""
Used to get the paged data for Confluence Cloud

:param url: string: The url to retrieve
:param params: dict (default is None): The parameter's
:param data: dict (default is None): The data
:param flags: string[] (default is None): The flags
:param trailing: bool (default is None): If True, a trailing slash is added to the url
:param absolute: bool (default is False): If True, the url is used absolute and not relative to the root

:return: A generator object for the data elements
"""
if params is None:
params = {}

while True:
response = self.get(
url,
trailing=trailing,
params=params,
data=data,
flags=flags,
absolute=absolute,
)
if "results" not in response:
return

yield from response.get("results", [])

# Confluence Cloud uses _links.next.href for pagination
url = response.get("_links", {}).get("next", {}).get("href")
if url is None:
break
# From now on we have absolute URLs with parameters
absolute = True
# Params are now provided by the url
params = {}
# Trailing should not be added as it is already part of the url
trailing = False

return
18 changes: 9 additions & 9 deletions atlassian/confluence/server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ def get_content_by_id(self, content_id, **kwargs):

def get_all_pages_from_space(self, space_key, **kwargs):
"""Get all pages from space."""
return self.get("content", params={"spaceKey": space_key, "type": "page", **kwargs})
return self._get_paged("content", params={"spaceKey": space_key, "type": "page", **kwargs})

def get_all_blog_posts_from_space(self, space_key, **kwargs):
"""Get all blog posts from space."""
return self.get("content", params={"spaceKey": space_key, "type": "blogpost", **kwargs})
return self._get_paged("content", params={"spaceKey": space_key, "type": "blogpost", **kwargs})

def get_page_by_title(self, space_key, title, **kwargs):
"""Get page by title and space key."""
Expand Down Expand Up @@ -195,11 +195,11 @@ def remove_content_label(self, content_id, label_name, **kwargs):

def get_all_pages_by_label(self, label, **kwargs):
"""Get all pages by label."""
return self.get("content", params={"label": label, "type": "page", **kwargs})
return self._get_paged("content", params={"label": label, "type": "page", **kwargs})

def get_all_blog_posts_by_label(self, label, **kwargs):
"""Get all blog posts by label."""
return self.get("content", params={"label": label, "type": "blogpost", **kwargs})
return self._get_paged("content", params={"label": label, "type": "blogpost", **kwargs})

# Attachment Management
def get_attachments(self, content_id, **kwargs):
Expand Down Expand Up @@ -293,24 +293,24 @@ def get_draft_content(self, content_id, **kwargs):

def get_all_draft_pages_from_space(self, space_key, **kwargs):
"""Get all draft pages from space."""
return self.get("content", params={"spaceKey": space_key, "type": "page", "status": "draft", **kwargs})
return self._get_paged("content", params={"spaceKey": space_key, "type": "page", "status": "draft", **kwargs})

def get_all_draft_blog_posts_from_space(self, space_key, **kwargs):
"""Get all draft blog posts from space."""
return self.get("content", params={"spaceKey": space_key, "type": "blogpost", "status": "draft", **kwargs})
return self._get_paged("content", params={"spaceKey": space_key, "type": "blogpost", "status": "draft", **kwargs})

# Trash Management
def get_trash_content(self, space_key, **kwargs):
"""Get trash content."""
return self.get("content", params={"spaceKey": space_key, "status": "trashed", **kwargs})
return self._get_paged("content", params={"spaceKey": space_key, "status": "trashed", **kwargs})

def get_all_pages_from_space_trash(self, space_key, **kwargs):
"""Get all pages from space trash."""
return self.get("content", params={"spaceKey": space_key, "type": "page", "status": "trashed", **kwargs})
return self._get_paged("content", params={"spaceKey": space_key, "type": "page", "status": "trashed", **kwargs})

def get_all_blog_posts_from_space_trash(self, space_key, **kwargs):
"""Get all blog posts from space trash."""
return self.get("content", params={"spaceKey": space_key, "type": "blogpost", "status": "trashed", **kwargs})
return self._get_paged("content", params={"spaceKey": space_key, "type": "blogpost", "status": "trashed", **kwargs})

# Export
def export_content(self, content_id, **kwargs):
Expand Down
50 changes: 0 additions & 50 deletions atlassian/confluence/server/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,54 +24,4 @@ def __init__(self, url, *args, **kwargs):
"""
super(ConfluenceServerBase, self).__init__(url, *args, **kwargs)

def _get_paged(
self,
url,
params=None,
data=None,
flags=None,
trailing=False,
absolute=False,
):
"""
Used to get the paged data for Confluence Server

:param url: string: The url to retrieve
:param params: dict (default is None): The parameter's
:param data: dict (default is None): The data
:param flags: string[] (default is None): The flags
:param trailing: bool (default is None): If True, a trailing slash is added to the url
:param absolute: bool (default is False): If True, the url is used absolute and not relative to the root

:return: A generator object for the data elements
"""
if params is None:
params = {}

while True:
response = self.get(
url,
trailing=trailing,
params=params,
data=data,
flags=flags,
absolute=absolute,
)
if "results" not in response:
return

yield from response.get("results", [])

# Confluence Server uses _links.next.href for pagination
if response.get("_links", {}).get("next") is None:
break
# For server, we need to extract the next page URL from the _links.next.href
next_url = response.get("_links", {}).get("next", {}).get("href")
if next_url is None:
break
url = next_url
absolute = True
params = {}
trailing = False

return
Loading
Loading