Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add support for no_proxy and case insensitive env variables #9372

Merged
merged 9 commits into from
Feb 26, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions synapse/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,7 @@ def __init__(
treq_args: Dict[str, Any] = {},
ip_whitelist: Optional[IPSet] = None,
ip_blacklist: Optional[IPSet] = None,
http_proxy: Optional[bytes] = None,
https_proxy: Optional[bytes] = None,
no_proxy: Optional[bytes] = None,
use_proxy: bool = False,
):
"""
Args:
Expand All @@ -301,9 +299,8 @@ def __init__(
we may not request.
ip_whitelist: The whitelisted IP addresses, that we can
request if it were otherwise caught in a blacklist.
http_proxy: proxy server to use for http connections. host[:port]
https_proxy: proxy server to use for https connections. host[:port]
no_proxy: locations that should explicitly not use a proxy.
use_proxy: Whether proxy settings should be discovered and used
from conventional environment variables. Defaults to false.
"""
self.hs = hs

Expand Down Expand Up @@ -347,9 +344,7 @@ def __init__(
connectTimeout=15,
contextFactory=self.hs.get_http_client_context_factory(),
pool=pool,
http_proxy=http_proxy,
https_proxy=https_proxy,
no_proxy=no_proxy,
use_proxy=use_proxy,
)

if self._ip_blacklist:
Expand Down
31 changes: 17 additions & 14 deletions synapse/http/proxyagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.
import logging
import re
from urllib.request import proxy_bypass_environment
from urllib.request import getproxies_environment, proxy_bypass_environment

from zope.interface import implementer

Expand Down Expand Up @@ -60,11 +60,8 @@ class ProxyAgent(_AgentBase):
pool (HTTPConnectionPool|None): connection pool to be used. If None, a
non-persistent pool instance will be created.

http_proxy (bytes): Proxy server to use for http connections. host[:port]

https_proxy (bytes): Proxy server to use for https connections. host[:port]

no_proxy (bytes): Locations that should explicitly not use a proxy.
use_proxy (bool): Whether proxy settings should be discovered and used
from conventional environment variables. Defaults to false.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any point in using ProxyAgent without use_proxy=True? I kept this default here to match the same behaviour as before if you didn't specify either of http_proxy or https_proxy but seems like it may be redundant now that this is combined into a single parameter

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're right. Outside of proxying (and a single debug log) ProxyAgent doesn't provide much on top of twisted's default Agent (as it shouldn't!).

I think we could try just using an Agent if use_proxy=False is passed to SimpleHttpClient.

Similarly in tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'd rather leave this out of this PR for now to avoid making more significant changes to the code but sounds like it could be worth a follow up!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds sensible, OK 🙂

"""

def __init__(
Expand All @@ -75,9 +72,7 @@ def __init__(
connectTimeout=None,
bindAddress=None,
pool=None,
http_proxy=None,
https_proxy=None,
no_proxy=None,
use_proxy=False,
):
_AgentBase.__init__(self, reactor, pool)

Expand All @@ -92,6 +87,15 @@ def __init__(
if bindAddress is not None:
self._endpoint_kwargs["bindAddress"] = bindAddress

http_proxy = None
https_proxy = None
no_proxy = None
if use_proxy:
proxies = getproxies_environment()
http_proxy = proxies["http"].encode() if "http" in proxies else None
https_proxy = proxies["https"].encode() if "https" in proxies else None
no_proxy = proxies["no"].encode() if "no" in proxies else None

self.http_proxy_endpoint = _http_proxy_endpoint(
http_proxy, self.proxy_reactor, **self._endpoint_kwargs
)
Expand Down Expand Up @@ -148,13 +152,12 @@ def request(self, method, uri, headers=None, bodyProducer=None):
parsed_uri = URI.fromBytes(uri)
pool_key = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port)
request_path = parsed_uri.originForm
should_skip_proxy = (
proxy_bypass_environment(

should_skip_proxy = False
if self.no_proxy is not None:
should_skip_proxy = proxy_bypass_environment(
parsed_uri.host.decode(), proxies={"no": self.no_proxy.decode()},
)
if self.no_proxy is not None
else False
)

if (
parsed_uri.scheme == b"http"
Expand Down
6 changes: 1 addition & 5 deletions synapse/rest/media/v1/preview_url_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import traceback
from typing import TYPE_CHECKING, Any, Dict, Generator, Iterable, Optional, Union
from urllib import parse as urlparse
from urllib.request import getproxies

import attr

Expand Down Expand Up @@ -145,15 +144,12 @@ def __init__(
self.max_spider_size = hs.config.max_spider_size
self.server_name = hs.hostname
self.store = hs.get_datastore()
proxies = getproxies()
self.client = SimpleHttpClient(
hs,
treq_args={"browser_like_redirects": True},
ip_whitelist=hs.config.url_preview_ip_range_whitelist,
ip_blacklist=hs.config.url_preview_ip_range_blacklist,
http_proxy=proxies["http"].encode() if "http" in proxies else None,
https_proxy=proxies["https"].encode() if "https" in proxies else None,
no_proxy=proxies["no"].encode() if "no" in proxies else None,
use_proxy=True,
)
self.media_repo = media_repo
self.primary_base_path = media_repo.primary_base_path
Expand Down
14 changes: 2 additions & 12 deletions synapse/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
Union,
cast,
)
from urllib.request import getproxies

import twisted.internet.base
import twisted.internet.tcp
Expand Down Expand Up @@ -370,28 +369,19 @@ def get_proxied_http_client(self) -> SimpleHttpClient:
"""
An HTTP client that uses configured HTTP(S) proxies.
"""
proxies = getproxies()
return SimpleHttpClient(
self,
http_proxy=proxies["http"].encode() if "http" in proxies else None,
https_proxy=proxies["https"].encode() if "https" in proxies else None,
no_proxy=proxies["no"].encode() if "no" in proxies else None,
)
return SimpleHttpClient(self, use_proxy=True)

@cache_in_self
def get_proxied_blacklisted_http_client(self) -> SimpleHttpClient:
"""
An HTTP client that uses configured HTTP(S) proxies and blacklists IPs
based on the IP range blacklist/whitelist.
"""
proxies = getproxies()
return SimpleHttpClient(
self,
ip_whitelist=self.config.ip_range_whitelist,
ip_blacklist=self.config.ip_range_blacklist,
http_proxy=proxies["http"].encode() if "http" in proxies else None,
https_proxy=proxies["https"].encode() if "https" in proxies else None,
no_proxy=proxies["no"].encode() if "no" in proxies else None,
use_proxy=True,
)

@cache_in_self
Expand Down
55 changes: 33 additions & 22 deletions tests/http/test_proxyagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
from unittest.mock import patch

import treq
from netaddr import IPSet
Expand Down Expand Up @@ -148,38 +150,47 @@ def test_https_request(self):
agent = ProxyAgent(self.reactor, contextFactory=get_test_https_policy())
self._test_request_no_proxy(agent, b"https", b"test.com", b"abc")

@patch.dict(os.environ, {})
def test_http_request_use_proxy_no_environment(self):
agent = ProxyAgent(self.reactor, use_proxy=True)
self._test_request_no_proxy(agent, b"http", b"test.com", b"")

@patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "NO_PROXY": "test.com"})
def test_http_request_via_uppercase_no_proxy(self):
agent = ProxyAgent(self.reactor, use_proxy=True)
self._test_request_no_proxy(agent, b"http", b"test.com", b"")

@patch.dict(
os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "test.com,unused.com"}
)
def test_http_request_via_no_proxy(self):
agent = ProxyAgent(
self.reactor, http_proxy=b"proxy.com:8888", no_proxy=b"test.com,unused.com"
)
agent = ProxyAgent(self.reactor, use_proxy=True)
self._test_request_no_proxy(agent, b"http", b"test.com", b"")

@patch.dict(
os.environ, {"https_proxy": "proxy.com", "no_proxy": "test.com,unused.com"}
)
def test_https_request_via_no_proxy(self):
agent = ProxyAgent(
self.reactor,
contextFactory=get_test_https_policy(),
https_proxy=b"proxy.com",
no_proxy=b"test.com,unused.com",
self.reactor, contextFactory=get_test_https_policy(), use_proxy=True,
)
self._test_request_no_proxy(agent, b"https", b"test.com", b"abc")

@patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "*"})
def test_http_request_via_no_proxy_star(self):
agent = ProxyAgent(self.reactor, http_proxy=b"proxy.com:8888", no_proxy=b"*")
agent = ProxyAgent(self.reactor, use_proxy=True)
self._test_request_no_proxy(agent, b"http", b"test.com", b"")

@patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "*"})
def test_https_request_via_no_proxy_star(self):
agent = ProxyAgent(
self.reactor,
contextFactory=get_test_https_policy(),
https_proxy=b"proxy.com",
no_proxy=b"*",
self.reactor, contextFactory=get_test_https_policy(), use_proxy=True,
)
self._test_request_no_proxy(agent, b"https", b"test.com", b"abc")

@patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "unused.com"})
def test_http_request_via_proxy(self):
agent = ProxyAgent(
self.reactor, http_proxy=b"proxy.com:8888", no_proxy=b"unused.com"
)
agent = ProxyAgent(self.reactor, use_proxy=True)

self.reactor.lookups["proxy.com"] = "1.2.3.5"
d = agent.request(b"GET", b"http://test.com")
Expand Down Expand Up @@ -215,12 +226,10 @@ def test_http_request_via_proxy(self):
body = self.successResultOf(treq.content(resp))
self.assertEqual(body, b"result")

@patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "unused.com"})
def test_https_request_via_proxy(self):
agent = ProxyAgent(
self.reactor,
contextFactory=get_test_https_policy(),
https_proxy=b"proxy.com",
no_proxy=b"unused.com",
self.reactor, contextFactory=get_test_https_policy(), use_proxy=True,
)

self.reactor.lookups["proxy.com"] = "1.2.3.5"
Expand Down Expand Up @@ -296,14 +305,15 @@ def test_https_request_via_proxy(self):
body = self.successResultOf(treq.content(resp))
self.assertEqual(body, b"result")

@patch.dict(os.environ, {"http_proxy": "proxy.com:8888"})
def test_http_request_via_proxy_with_blacklist(self):
# The blacklist includes the configured proxy IP.
agent = ProxyAgent(
BlacklistingReactorWrapper(
self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"])
),
self.reactor,
http_proxy=b"proxy.com:8888",
use_proxy=True,
)

self.reactor.lookups["proxy.com"] = "1.2.3.5"
Expand Down Expand Up @@ -340,15 +350,16 @@ def test_http_request_via_proxy_with_blacklist(self):
body = self.successResultOf(treq.content(resp))
self.assertEqual(body, b"result")

def test_https_request_via_proxy_with_blacklist(self):
@patch.dict(os.environ, {"HTTPS_PROXY": "proxy.com"})
def test_https_request_via_uppercase_proxy_with_blacklist(self):
# The blacklist includes the configured proxy IP.
agent = ProxyAgent(
BlacklistingReactorWrapper(
self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"])
),
self.reactor,
contextFactory=get_test_https_policy(),
https_proxy=b"proxy.com",
use_proxy=True,
)

self.reactor.lookups["proxy.com"] = "1.2.3.5"
Expand Down